summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/core-api/packing.rst118
-rw-r--r--Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml105
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml1
-rw-r--r--Documentation/netlink/genetlink-c.yaml3
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml3
-rw-r--r--Documentation/netlink/specs/ethtool.yaml358
-rw-r--r--Documentation/networking/ip-sysctl.rst14
-rw-r--r--Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst1
-rw-r--r--Documentation/networking/netconsole.rst5
-rw-r--r--Documentation/userspace-api/netlink/c-code-gen.rst4
-rw-r--r--MAINTAINERS11
-rw-r--r--Makefile4
-rw-r--r--drivers/isdn/mISDN/core.c14
-rw-r--r--drivers/isdn/mISDN/core.h1
-rw-r--r--drivers/net/bareudp.c16
-rw-r--r--drivers/net/can/sja1000/sja1000_platform.c15
-rw-r--r--drivers/net/dsa/b53/b53_common.c13
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/bcm_sf2.c1
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c26
-rw-r--r--drivers/net/dsa/mt7530.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c50
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h1
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c1
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.c8
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_hw.c16
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c14
-rw-r--r--drivers/net/ethernet/freescale/Kconfig3
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c3
-rw-r--r--drivers/net/ethernet/freescale/fec.h2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c16
-rw-r--r--drivers/net/ethernet/freescale/fman/fman.c35
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c602
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.h22
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth_ethtool.c74
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c14
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_port.c2
-rw-r--r--drivers/net/ethernet/intel/Kconfig1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c293
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h49
-rw-r--r--drivers/net/ethernet/lantiq_etop.c25
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c27
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c25
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c39
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.h1
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c23
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c68
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/Makefile1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c1056
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h265
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c113
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h26
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c66
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c10
-rw-r--r--drivers/net/ethernet/mscc/ocelot_net.c3
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.c40
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h22
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c3
-rw-r--r--drivers/net/ethernet/realtek/r8169.h2
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c26
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c10
-rw-r--r--drivers/net/ethernet/realtek/rtase/rtase.h1
-rw-r--r--drivers/net/ethernet/realtek/rtase/rtase_main.c14
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c35
-rw-r--r--drivers/net/ethernet/renesas/rswitch.h15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c30
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c202
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_est.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h3
-rw-r--r--drivers/net/geneve.c12
-rw-r--r--drivers/net/mctp/mctp-i2c.c3
-rw-r--r--drivers/net/mdio/mdio-octeon.c25
-rw-r--r--drivers/net/netconsole.c60
-rw-r--r--drivers/net/pcs/pcs-lynx.c22
-rw-r--r--drivers/net/pcs/pcs-mtk-lynxi.c16
-rw-r--r--drivers/net/pcs/pcs-xpcs.c28
-rw-r--r--drivers/net/phy/bcm84881.c10
-rw-r--r--drivers/net/phy/dp83822.c58
-rw-r--r--drivers/net/phy/marvell.c52
-rw-r--r--drivers/net/phy/phy-c45.c14
-rw-r--r--drivers/net/phy/phy.c62
-rw-r--r--drivers/net/phy/phylink.c352
-rw-r--r--drivers/net/tun.c14
-rw-r--r--drivers/net/usb/lan78xx.c871
-rw-r--r--drivers/net/vrf.c49
-rw-r--r--drivers/net/vxlan/vxlan_core.c183
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.c23
-rw-r--r--include/linux/bpf.h12
-rw-r--r--include/linux/etherdevice.h18
-rw-r--r--include/linux/ethtool.h6
-rw-r--r--include/linux/filter.h9
-rw-r--r--include/linux/if_vlan.h28
-rw-r--r--include/linux/ktime.h5
-rw-r--r--include/linux/netdevice.h61
-rw-r--r--include/linux/netfilter/x_tables.h2
-rw-r--r--include/linux/netfilter_netdev.h3
-rw-r--r--include/linux/netpoll.h2
-rw-r--r--include/linux/packing.h425
-rw-r--r--include/linux/phy.h59
-rw-r--r--include/linux/phylink.h17
-rw-r--r--include/linux/ptp_clock_kernel.h4
-rw-r--r--include/linux/rfkill.h2
-rw-r--r--include/linux/rtnetlink.h8
-rw-r--r--include/linux/skbuff.h18
-rw-r--r--include/linux/stmmac.h10
-rw-r--r--include/linux/wwan.h2
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/dropreason-core.h18
-rw-r--r--include/net/dsa.h2
-rw-r--r--include/net/dst.h11
-rw-r--r--include/net/genetlink.h6
-rw-r--r--include/net/inet_timewait_sock.h4
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/iucv/iucv.h30
-rw-r--r--include/net/netfilter/nf_tproxy.h4
-rw-r--r--include/net/netlink.h44
-rw-r--r--include/net/netmem.h78
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/page_pool/helpers.h9
-rw-r--r--include/net/page_pool/types.h8
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/sch_generic.h8
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/vxlan.h1
-rw-r--r--include/net/xdp.h63
-rw-r--r--include/net/xdp_sock_drv.h11
-rw-r--r--include/net/xsk_buff_pool.h4
-rw-r--r--include/trace/events/rxrpc.h878
-rw-r--r--include/uapi/linux/ethtool_netlink.h893
-rw-r--r--include/uapi/linux/ethtool_netlink_generated.h792
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--kernel/bpf/devmap.c8
-rw-r--r--lib/packing.c293
-rw-r--r--lib/packing_test.c61
-rw-r--r--lib/win_minmax.c1
-rw-r--r--net/bridge/br_fdb.c3
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c29
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/page_pool.c30
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/rtnetlink.c108
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/xdp.c65
-rw-r--r--net/dsa/port.c16
-rw-r--r--net/dsa/user.c15
-rw-r--r--net/ipv4/ip_output.c13
-rw-r--r--net/ipv4/sysctl_net_ipv4.c10
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv6/ioam6_iptunnel.c73
-rw-r--r--net/ipv6/ip6_output.c13
-rw-r--r--net/ipv6/mcast.c59
-rw-r--r--net/ipv6/rpl_iptunnel.c46
-rw-r--r--net/ipv6/seg6_iptunnel.c85
-rw-r--r--net/l2tp/l2tp_eth.c9
-rw-r--r--net/mctp/device.c50
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c4
-rw-r--r--net/rxrpc/ar-internal.h341
-rw-r--r--net/rxrpc/call_accept.c22
-rw-r--r--net/rxrpc/call_event.c385
-rw-r--r--net/rxrpc/call_object.c66
-rw-r--r--net/rxrpc/conn_client.c26
-rw-r--r--net/rxrpc/conn_event.c40
-rw-r--r--net/rxrpc/conn_object.c14
-rw-r--r--net/rxrpc/input.c706
-rw-r--r--net/rxrpc/input_rack.c418
-rw-r--r--net/rxrpc/insecure.c5
-rw-r--r--net/rxrpc/io_thread.c113
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/misc.c4
-rw-r--r--net/rxrpc/output.c568
-rw-r--r--net/rxrpc/peer_event.c114
-rw-r--r--net/rxrpc/peer_object.c30
-rw-r--r--net/rxrpc/proc.c61
-rw-r--r--net/rxrpc/protocol.h13
-rw-r--r--net/rxrpc/recvmsg.c18
-rw-r--r--net/rxrpc/rtt.c103
-rw-r--r--net/rxrpc/rxkad.c59
-rw-r--r--net/rxrpc/rxperf.c2
-rw-r--r--net/rxrpc/security.c4
-rw-r--r--net/rxrpc/sendmsg.c92
-rw-r--r--net/rxrpc/sysctl.c6
-rw-r--r--net/rxrpc/txbuf.c127
-rw-r--r--net/sched/sch_fq.c14
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/smc/af_smc.c5
-rw-r--r--net/smc/smc_core.c5
-rw-r--r--net/smc/smc_core.h11
-rw-r--r--net/smc/smc_ib.c3
-rw-r--r--net/smc/smc_llc.c21
-rw-r--r--net/smc/smc_wr.c42
-rw-r--r--net/tipc/name_table.c4
-rw-r--r--net/tipc/name_table.h2
-rw-r--r--scripts/.gitignore1
-rw-r--r--scripts/Makefile2
-rw-r--r--scripts/gen_packed_field_checks.c37
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py147
-rw-r--r--tools/testing/selftests/net/busy_poller.c88
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh25
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh352
-rw-r--r--tools/testing/selftests/net/lib.sh41
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh3
-rwxr-xr-xtools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py21
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json20
239 files changed, 10381 insertions, 4590 deletions
diff --git a/Documentation/core-api/packing.rst b/Documentation/core-api/packing.rst
index 821691f23c54..0ce2078c8e13 100644
--- a/Documentation/core-api/packing.rst
+++ b/Documentation/core-api/packing.rst
@@ -227,11 +227,119 @@ Intended use
Drivers that opt to use this API first need to identify which of the above 3
quirk combinations (for a total of 8) match what the hardware documentation
-describes. Then they should wrap the packing() function, creating a new
-xxx_packing() that calls it using the proper QUIRK_* one-hot bits set.
+describes.
+
+There are 3 supported usage patterns, detailed below.
+
+packing()
+^^^^^^^^^
+
+This API function is deprecated.
The packing() function returns an int-encoded error code, which protects the
programmer against incorrect API use. The errors are not expected to occur
-during runtime, therefore it is reasonable for xxx_packing() to return void
-and simply swallow those errors. Optionally it can dump stack or print the
-error description.
+during runtime, therefore it is reasonable to wrap packing() into a custom
+function which returns void and swallows those errors. Optionally it can
+dump stack or print the error description.
+
+.. code-block:: c
+
+ void my_packing(void *buf, u64 *val, int startbit, int endbit,
+ size_t len, enum packing_op op)
+ {
+ int err;
+
+ /* Adjust quirks accordingly */
+ err = packing(buf, val, startbit, endbit, len, op, QUIRK_LSW32_IS_FIRST);
+ if (likely(!err))
+ return;
+
+ if (err == -EINVAL) {
+ pr_err("Start bit (%d) expected to be larger than end (%d)\n",
+ startbit, endbit);
+ } else if (err == -ERANGE) {
+ if ((startbit - endbit + 1) > 64)
+ pr_err("Field %d-%d too large for 64 bits!\n",
+ startbit, endbit);
+ else
+ pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n",
+ *val, startbit, endbit);
+ }
+ dump_stack();
+ }
+
+pack() and unpack()
+^^^^^^^^^^^^^^^^^^^
+
+These are const-correct variants of packing(), and eliminate the last "enum
+packing_op op" argument.
+
+Calling pack(...) is equivalent, and preferred, to calling packing(..., PACK).
+
+Calling unpack(...) is equivalent, and preferred, to calling packing(..., UNPACK).
+
+pack_fields() and unpack_fields()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The library exposes optimized functions for the scenario where there are many
+fields represented in a buffer, and it encourages consumer drivers to avoid
+repetitive calls to pack() and unpack() for each field, but instead use
+pack_fields() and unpack_fields(), which reduces the code footprint.
+
+These APIs use field definitions in arrays of ``struct packed_field_u8`` or
+``struct packed_field_u16``, allowing consumer drivers to minimize the size
+of these arrays according to their custom requirements.
+
+The pack_fields() and unpack_fields() API functions are actually macros which
+automatically select the appropriate function at compile time, based on the
+type of the fields array passed in.
+
+An additional benefit over pack() and unpack() is that sanity checks on the
+field definitions are handled at compile time with ``BUILD_BUG_ON`` rather
+than only when the offending code is executed. These functions return void and
+wrapping them to handle unexpected errors is not necessary.
+
+It is recommended, but not required, that you wrap your packed buffer into a
+structured type with a fixed size. This generally makes it easier for the
+compiler to enforce that the correct size buffer is used.
+
+Here is an example of how to use the fields APIs:
+
+.. code-block:: c
+
+ /* Ordering inside the unpacked structure is flexible and can be different
+ * from the packed buffer. Here, it is optimized to reduce padding.
+ */
+ struct data {
+ u64 field3;
+ u32 field4;
+ u16 field1;
+ u8 field2;
+ };
+
+ #define SIZE 13
+
+ typdef struct __packed { u8 buf[SIZE]; } packed_buf_t;
+
+ static const struct packed_field_u8 fields[] = {
+ PACKED_FIELD(100, 90, struct data, field1),
+ PACKED_FIELD(90, 87, struct data, field2),
+ PACKED_FIELD(86, 30, struct data, field3),
+ PACKED_FIELD(29, 0, struct data, field4),
+ };
+
+ void unpack_your_data(const packed_buf_t *buf, struct data *unpacked)
+ {
+ BUILD_BUG_ON(sizeof(*buf) != SIZE;
+
+ unpack_fields(buf, sizeof(*buf), unpacked, fields,
+ QUIRK_LITTLE_ENDIAN);
+ }
+
+ void pack_your_data(const struct data *unpacked, packed_buf_t *buf)
+ {
+ BUILD_BUG_ON(sizeof(*buf) != SIZE;
+
+ pack_fields(buf, sizeof(*buf), unpacked, fields,
+ QUIRK_LITTLE_ENDIAN);
+ }
diff --git a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml
new file mode 100644
index 000000000000..2b8b74c5feec
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2021-2024 NXP
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/nxp,s32-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP S32G2xx/S32G3xx/S32R45 GMAC ethernet controller
+
+maintainers:
+ - Jan Petrous (OSS) <jan.petrous@oss.nxp.com>
+
+description:
+ This device is a Synopsys DWC IP, integrated on NXP S32G/R SoCs.
+ The SoC series S32G2xx and S32G3xx feature one DWMAC instance,
+ the SoC S32R45 has two instances. The devices can use RGMII/RMII/MII
+ interface over Pinctrl device or the output can be routed
+ to the embedded SerDes for SGMII connectivity.
+
+properties:
+ compatible:
+ oneOf:
+ - const: nxp,s32g2-dwmac
+ - items:
+ - enum:
+ - nxp,s32g3-dwmac
+ - nxp,s32r45-dwmac
+ - const: nxp,s32g2-dwmac
+
+ reg:
+ items:
+ - description: Main GMAC registers
+ - description: GMAC PHY mode control register
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ const: macirq
+
+ clocks:
+ items:
+ - description: Main GMAC clock
+ - description: Transmit clock
+ - description: Receive clock
+ - description: PTP reference clock
+
+ clock-names:
+ items:
+ - const: stmmaceth
+ - const: tx
+ - const: rx
+ - const: ptp_ref
+
+required:
+ - clocks
+ - clock-names
+
+allOf:
+ - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/phy/phy.h>
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ethernet@4033c000 {
+ compatible = "nxp,s32g2-dwmac";
+ reg = <0x0 0x4033c000 0x0 0x2000>, /* gmac IP */
+ <0x0 0x4007c004 0x0 0x4>; /* GMAC_0_CTRL_STS */
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ snps,mtl-rx-config = <&mtl_rx_setup>;
+ snps,mtl-tx-config = <&mtl_tx_setup>;
+ clocks = <&clks 24>, <&clks 17>, <&clks 16>, <&clks 15>;
+ clock-names = "stmmaceth", "tx", "rx", "ptp_ref";
+ phy-mode = "rgmii-id";
+ phy-handle = <&phy0>;
+
+ mtl_rx_setup: rx-queues-config {
+ snps,rx-queues-to-use = <5>;
+ };
+
+ mtl_tx_setup: tx-queues-config {
+ snps,tx-queues-to-use = <5>;
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index eb1f3ae41ab9..91e75eb3f329 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -67,6 +67,7 @@ properties:
- ingenic,x2000-mac
- loongson,ls2k-dwmac
- loongson,ls7a-dwmac
+ - nxp,s32g2-dwmac
- qcom,qcs404-ethqos
- qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 4f803eaac6d8..9660ffb1ed6a 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -106,6 +106,9 @@ properties:
name-prefix:
description: For enum the prefix of the values, optional.
type: string
+ enum-cnt-name:
+ description: Name of the render-max counter enum entry.
+ type: string
# End genetlink-c
attribute-sets:
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 8db0e22fa72c..16380e12cabe 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -117,6 +117,9 @@ properties:
name-prefix:
description: For enum the prefix of the values, optional.
type: string
+ enum-cnt-name:
+ description: Name of the render-max counter enum entry.
+ type: string
# End genetlink-c
# Start genetlink-legacy
members:
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 93369f0eb816..c7634e957d9c 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -5,6 +5,7 @@ name: ethtool
protocol: genetlink-legacy
doc: Partial family for Ethtool Netlink.
+uapi-header: linux/ethtool_netlink_generated.h
definitions:
-
@@ -12,44 +13,100 @@ definitions:
enum-name:
type: enum
entries: [ vxlan, geneve, vxlan-gpe ]
+ enum-cnt-name: __ethtool-udp-tunnel-type-cnt
+ render-max: true
-
name: stringset
type: enum
entries: []
+ header: linux/ethtool.h # skip rendering, no actual definition
-
name: header-flags
type: flags
- entries: [ compact-bitsets, omit-reply, stats ]
+ name-prefix: ethtool-flag-
+ doc: common ethtool header flags
+ entries:
+ -
+ name: compact-bitsets
+ doc: use compact bitsets in reply
+ -
+ name: omit-reply
+ doc: provide optional reply for SET or ACT requests
+ -
+ name: stats
+ doc: request statistics, if supported by the driver
-
name: module-fw-flash-status
type: enum
- entries: [ started, in_progress, completed, error ]
+ doc: plug-in module firmware flashing status
+ header: linux/ethtool.h
+ entries:
+ -
+ name: started
+ doc: The firmware flashing process has started.
+ -
+ name: in_progress
+ doc: The firmware flashing process is in progress.
+ -
+ name: completed
+ doc: The firmware flashing process was completed successfully.
+ -
+ name: error
+ doc: The firmware flashing process was stopped due to an error.
-
name: c33-pse-ext-state
- enum-name:
+ doc: "groups of PSE extended states functions. IEEE 802.3-2022 33.2.4.4 Variables"
type: enum
name-prefix: ethtool-c33-pse-ext-state-
+ header: linux/ethtool.h
entries:
- - none
- - error-condition
- - mr-mps-valid
- - mr-pse-enable
- - option-detect-ted
- - option-vport-lim
- - ovld-detected
- - power-not-available
- - short-detected
+ -
+ name: none
+ doc: none
+ -
+ name: error-condition
+ doc: Group of error_condition states
+ -
+ name: mr-mps-valid
+ doc: Group of mr_mps_valid states
+ -
+ name: mr-pse-enable
+ doc: Group of mr_pse_enable states
+ -
+ name: option-detect-ted
+ doc: Group of option_detect_ted states
+ -
+ name: option-vport-lim
+ doc: Group of option_vport_lim states
+ -
+ name: ovld-detected
+ doc: Group of ovld_detected states
+ -
+ name: power-not-available
+ doc: Group of power_not_available states
+ -
+ name: short-detected
+ doc: Group of short_detected states
-
name: phy-upstream-type
enum-name:
type: enum
entries: [ mac, phy ]
+ -
+ name: tcp-data-split
+ type: enum
+ entries: [ unknown, disabled, enabled ]
attribute-sets:
-
name: header
+ attr-cnt-name: __ethtool-a-header-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: dev-index
type: u32
-
@@ -65,8 +122,13 @@ attribute-sets:
-
name: bitset-bit
+ attr-cnt-name: __ethtool-a-bitset-bit-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: index
type: u32
-
@@ -77,16 +139,26 @@ attribute-sets:
type: flag
-
name: bitset-bits
+ attr-cnt-name: __ethtool-a-bitset-bits-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: bit
type: nest
multi-attr: true
nested-attributes: bitset-bit
-
name: bitset
+ attr-cnt-name: __ethtool-a-bitset-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: nomask
type: flag
-
@@ -104,8 +176,13 @@ attribute-sets:
type: binary
-
name: string
+ attr-cnt-name: __ethtool-a-string-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: index
type: u32
-
@@ -113,16 +190,30 @@ attribute-sets:
type: string
-
name: strings
+ attr-cnt-name: __ethtool-a-strings-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
+ name: unspec
+ type: unused
+ value: 0
+ -
name: string
type: nest
multi-attr: true
nested-attributes: string
-
name: stringset
+ attr-cnt-name: __ethtool-a-stringset-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: id
type: u32
-
@@ -135,16 +226,26 @@ attribute-sets:
nested-attributes: strings
-
name: stringsets
+ attr-cnt-name: __ethtool-a-stringsets-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: stringset
type: nest
multi-attr: true
nested-attributes: stringset
-
name: strset
+ attr-cnt-name: __ethtool-a-strset-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -158,8 +259,13 @@ attribute-sets:
-
name: privflags
+ attr-cnt-name: __ethtool-a-privflags-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -170,8 +276,13 @@ attribute-sets:
-
name: rings
+ attr-cnt-name: __ethtool-a-rings-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -205,6 +316,7 @@ attribute-sets:
-
name: tcp-data-split
type: u8
+ enum: tcp-data-split
-
name: cqe-size
type: u32
@@ -223,32 +335,49 @@ attribute-sets:
-
name: mm-stat
+ attr-cnt-name: __ethtool-a-mm-stat-cnt
+ doc: MAC Merge (802.3)
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pad
type: pad
-
name: reassembly-errors
+ doc: aMACMergeFrameAssErrorCount
type: u64
-
name: smd-errors
+ doc: aMACMergeFrameSmdErrorCount
type: u64
-
name: reassembly-ok
+ doc: aMACMergeFrameAssOkCount
type: u64
-
name: rx-frag-count
+ doc: aMACMergeFragCountRx
type: u64
-
name: tx-frag-count
+ doc: aMACMergeFragCountTx
type: u64
-
name: hold-count
+ doc: aMACMergeHoldCount
type: u64
-
name: mm
+ attr-cnt-name: __ethtool-a-mm-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -285,8 +414,13 @@ attribute-sets:
nested-attributes: mm-stat
-
name: linkinfo
+ attr-cnt-name: __ethtool-a-linkinfo-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -307,8 +441,13 @@ attribute-sets:
type: u8
-
name: linkmodes
+ attr-cnt-name: __ethtool-a-linkmodes-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -343,8 +482,13 @@ attribute-sets:
type: u8
-
name: linkstate
+ attr-cnt-name: __ethtool-a-linkstate-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -368,8 +512,13 @@ attribute-sets:
type: u32
-
name: debug
+ attr-cnt-name: __ethtool-a-debug-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -379,8 +528,13 @@ attribute-sets:
nested-attributes: bitset
-
name: wol
+ attr-cnt-name: __ethtool-a-wol-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -393,8 +547,13 @@ attribute-sets:
type: binary
-
name: features
+ attr-cnt-name: __ethtool-a-features-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -416,8 +575,13 @@ attribute-sets:
nested-attributes: bitset
-
name: channels
+ attr-cnt-name: __ethtool-a-channels-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -448,8 +612,13 @@ attribute-sets:
-
name: irq-moderation
+ attr-cnt-name: __ethtool-a-irq-moderation-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: usec
type: u32
-
@@ -460,16 +629,26 @@ attribute-sets:
type: u32
-
name: profile
+ attr-cnt-name: __ethtool-a-profile-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: irq-moderation
type: nest
multi-attr: true
nested-attributes: irq-moderation
-
name: coalesce
+ attr-cnt-name: __ethtool-a-coalesce-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -565,8 +744,13 @@ attribute-sets:
-
name: pause-stat
+ attr-cnt-name: __ethtool-a-pause-stat-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pad
type: pad
-
@@ -577,8 +761,13 @@ attribute-sets:
type: u64
-
name: pause
+ attr-cnt-name: __ethtool-a-pause-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -600,8 +789,13 @@ attribute-sets:
type: u32
-
name: eee
+ attr-cnt-name: __ethtool-a-eee-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -627,8 +821,13 @@ attribute-sets:
type: u32
-
name: ts-stat
+ attr-cnt-name: __ethtool-a-ts-stat-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: tx-pkts
type: uint
-
@@ -639,8 +838,13 @@ attribute-sets:
type: uint
-
name: tsinfo
+ attr-cnt-name: __ethtool-a-tsinfo-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -665,20 +869,33 @@ attribute-sets:
nested-attributes: ts-stat
-
name: cable-result
+ attr-cnt-name: __ethtool-a-cable-result-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pair
+ doc: ETHTOOL_A_CABLE_PAIR
type: u8
-
name: code
+ doc: ETHTOOL_A_CABLE_RESULT_CODE
type: u8
-
name: src
+ doc: ETHTOOL_A_CABLE_INF_SRC
type: u32
-
name: cable-fault-length
+ attr-cnt-name: __ethtool-a-cable-fault-length-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pair
type: u8
-
@@ -689,8 +906,13 @@ attribute-sets:
type: u32
-
name: cable-nest
+ attr-cnt-name: __ethtool-a-cable-nest-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: result
type: nest
nested-attributes: cable-result
@@ -700,20 +922,31 @@ attribute-sets:
nested-attributes: cable-fault-length
-
name: cable-test
+ attr-cnt-name: __ethtool-a-cable-test-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
-
name: cable-test-ntf
+ attr-cnt-name: __ethtool-a-cable-test-ntf-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
-
name: status
+ doc: _STARTED/_COMPLETE
type: u8
-
name: nest
@@ -721,8 +954,13 @@ attribute-sets:
nested-attributes: cable-nest
-
name: cable-test-tdr-cfg
+ attr-cnt-name: __ethtool-a-cable-test-tdr-cfg-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: first
type: u32
-
@@ -736,8 +974,13 @@ attribute-sets:
type: u8
-
name: cable-test-tdr-ntf
+ attr-cnt-name: __ethtool-a-cable-test-tdr-ntf-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -750,8 +993,13 @@ attribute-sets:
nested-attributes: cable-nest
-
name: cable-test-tdr
+ attr-cnt-name: __ethtool-a-cable-test-tdr-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -761,8 +1009,13 @@ attribute-sets:
nested-attributes: cable-test-tdr-cfg
-
name: tunnel-udp-entry
+ attr-cnt-name: __ethtool-a-tunnel-udp-entry-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: port
type: u16
byte-order: big-endian
@@ -772,8 +1025,13 @@ attribute-sets:
enum: udp-tunnel-type
-
name: tunnel-udp-table
+ attr-cnt-name: __ethtool-a-tunnel-udp-table-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: size
type: u32
-
@@ -787,15 +1045,25 @@ attribute-sets:
nested-attributes: tunnel-udp-entry
-
name: tunnel-udp
+ attr-cnt-name: __ethtool-a-tunnel-udp-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: table
type: nest
nested-attributes: tunnel-udp-table
-
name: tunnel-info
+ attr-cnt-name: __ethtool-a-tunnel-info-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -805,8 +1073,13 @@ attribute-sets:
nested-attributes: tunnel-udp
-
name: fec-stat
+ attr-cnt-name: __ethtool-a-fec-stat-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pad
type: pad
-
@@ -823,8 +1096,13 @@ attribute-sets:
sub-type: u64
-
name: fec
+ attr-cnt-name: __ethtool-a-fec-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -844,8 +1122,13 @@ attribute-sets:
nested-attributes: fec-stat
-
name: module-eeprom
+ attr-cnt-name: __ethtool-a-module-eeprom-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -869,8 +1152,13 @@ attribute-sets:
type: binary
-
name: stats-grp
+ attr-cnt-name: __ethtool-a-stats-grp-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pad
type: pad
-
@@ -912,8 +1200,13 @@ attribute-sets:
name: hist-val
-
name: stats
+ attr-cnt-name: __ethtool-a-stats-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: pad
type: pad
-
@@ -933,8 +1226,13 @@ attribute-sets:
type: u32
-
name: phc-vclocks
+ attr-cnt-name: __ethtool-a-phc-vclocks-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -947,8 +1245,13 @@ attribute-sets:
sub-type: s32
-
name: module
+ attr-cnt-name: __ethtool-a-module-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -960,8 +1263,14 @@ attribute-sets:
type: u8
-
name: c33-pse-pw-limit
+ attr-cnt-name: __ethtool-a-c33-pse-pw-limit-cnt
+ attr-max-name: __ethtool-a-c33-pse-pw-limit-max
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: min
type: u32
-
@@ -969,8 +1278,13 @@ attribute-sets:
type: u32
-
name: pse
+ attr-cnt-name: __ethtool-a-pse-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -1027,8 +1341,13 @@ attribute-sets:
nested-attributes: c33-pse-pw-limit
-
name: rss
+ attr-cnt-name: __ethtool-a-rss-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -1053,8 +1372,13 @@ attribute-sets:
type: u32
-
name: plca
+ attr-cnt-name: __ethtool-a-plca-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -1084,8 +1408,13 @@ attribute-sets:
type: u32
-
name: module-fw-flash
+ attr-cnt-name: __ethtool-a-module-fw-flash-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
@@ -1110,8 +1439,13 @@ attribute-sets:
type: uint
-
name: phy
+ attr-cnt-name: __ethtool-a-phy-cnt
attributes:
-
+ name: unspec
+ type: unused
+ value: 0
+ -
name: header
type: nest
nested-attributes: header
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index dcbb6f6caf6d..363b4950d542 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1000,6 +1000,20 @@ tcp_tw_reuse - INTEGER
Default: 2
+tcp_tw_reuse_delay - UNSIGNED INTEGER
+ The delay in milliseconds before a TIME-WAIT socket can be reused by a
+ new connection, if TIME-WAIT socket reuse is enabled. The actual reuse
+ threshold is within [N, N+1] range, where N is the requested delay in
+ milliseconds, to ensure the delay interval is never shorter than the
+ configured value.
+
+ This setting contains an assumption about the other TCP timestamp clock
+ tick interval. It should not be set to a value lower than the peer's
+ clock tick for PAWS (Protection Against Wrapped Sequence numbers)
+ mechanism work correctly for the reused connection.
+
+ Default: 1000 (milliseconds)
+
tcp_window_scaling - BOOLEAN
Enable window scaling as defined in RFC1323.
diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
index 629da6dc6d74..de0263302f16 100644
--- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
+++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
@@ -79,6 +79,7 @@ u8 sysctl_tcp_retries1
u8 sysctl_tcp_retries2
u8 sysctl_tcp_orphan_retries
u8 sysctl_tcp_tw_reuse timewait_sock_ops
+unsigned_int sysctl_tcp_tw_reuse_delay timewait_sock_ops
int sysctl_tcp_fin_timeout TCP_LAST_ACK/tcp_rcv_state_process
unsigned_int sysctl_tcp_notsent_lowat read_mostly tcp_notsent_lowat/tcp_stream_memory_free
u8 sysctl_tcp_sack tcp_syn_options
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index d55c2a22ec7a..94c4680fdf3e 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -124,7 +124,7 @@ To remove a target::
The interface exposes these parameters of a netconsole target to userspace:
- ============== ================================= ============
+ =============== ================================= ============
enabled Is this target currently enabled? (read-write)
extended Extended mode enabled (read-write)
release Prepend kernel release to message (read-write)
@@ -135,7 +135,8 @@ The interface exposes these parameters of a netconsole target to userspace:
remote_ip Remote agent's IP address (read-write)
local_mac Local interface's MAC address (read-only)
remote_mac Remote agent's MAC address (read-write)
- ============== ================================= ============
+ transmit_errors Number of packet send errors (read-only)
+ =============== ================================= ============
The "enabled" attribute is also used to control whether the parameters of
a target can be updated or not -- you can modify the parameters of only
diff --git a/Documentation/userspace-api/netlink/c-code-gen.rst b/Documentation/userspace-api/netlink/c-code-gen.rst
index 89de42c13350..46415e6d646d 100644
--- a/Documentation/userspace-api/netlink/c-code-gen.rst
+++ b/Documentation/userspace-api/netlink/c-code-gen.rst
@@ -56,7 +56,9 @@ If ``name-prefix`` is specified it replaces the ``$family-$enum``
portion of the entry name.
Boolean ``render-max`` controls creation of the max values
-(which are enabled by default for attribute enums).
+(which are enabled by default for attribute enums). These max
+values are named ``__$pfx-MAX`` and ``$pfx-MAX``. The name
+of the first value can be overridden via ``enum-cnt-name`` property.
Attributes
==========
diff --git a/MAINTAINERS b/MAINTAINERS
index e6e71b05710b..6cced90772fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2836,6 +2836,13 @@ S: Maintained
F: arch/arm64/boot/dts/freescale/s32g*.dts*
F: drivers/pinctrl/nxp/
+ARM/NXP S32G/S32R DWMAC ETHERNET DRIVER
+M: Jan Petrous <jan.petrous@oss.nxp.com>
+L: NXP S32 Linux Team <s32@nxp.com>
+S: Maintained
+F: Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml
+F: drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
+
ARM/Orion SoC/Technologic Systems TS-78xx platform support
M: Alexander Clouter <alex@digriz.org.uk>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -13944,6 +13951,7 @@ M: Sunil Goutham <sgoutham@marvell.com>
M: Geetha sowjanya <gakula@marvell.com>
M: Subbaraya Sundeep <sbhatta@marvell.com>
M: hariprasad <hkelam@marvell.com>
+M: Bharat Bhushan <bbhushan2@marvell.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/marvell/octeontx2/nic/
@@ -16282,7 +16290,7 @@ F: include/linux/inetdevice.h
F: include/linux/netdev*
F: include/linux/platform_data/wiznet.h
F: include/uapi/linux/cn_proc.h
-F: include/uapi/linux/ethtool_netlink.h
+F: include/uapi/linux/ethtool_netlink*
F: include/uapi/linux/if_*
F: include/uapi/linux/net_shaper.h
F: include/uapi/linux/netdev*
@@ -17640,6 +17648,7 @@ F: Documentation/core-api/packing.rst
F: include/linux/packing.h
F: lib/packing.c
F: lib/packing_test.c
+F: scripts/gen_packed_field_checks.c
PADATA PARALLEL EXECUTION MECHANISM
M: Steffen Klassert <steffen.klassert@secunet.com>
diff --git a/Makefile b/Makefile
index 64c594bd7ad0..72214d35853a 100644
--- a/Makefile
+++ b/Makefile
@@ -1367,6 +1367,10 @@ PHONY += scripts_unifdef
scripts_unifdef: scripts_basic
$(Q)$(MAKE) $(build)=scripts scripts/unifdef
+PHONY += scripts_gen_packed_field_checks
+scripts_gen_packed_field_checks: scripts_basic
+ $(Q)$(MAKE) $(build)=scripts scripts/gen_packed_field_checks
+
# ---------------------------------------------------------------------------
# Install
diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
index e34a7a46754e..8ec2d4d4f135 100644
--- a/drivers/isdn/mISDN/core.c
+++ b/drivers/isdn/mISDN/core.c
@@ -294,20 +294,6 @@ get_Bprotocol4mask(u_int m)
return NULL;
}
-struct Bprotocol *
-get_Bprotocol4id(u_int id)
-{
- u_int m;
-
- if (id < ISDN_P_B_START || id > 63) {
- printk(KERN_WARNING "%s id not in range %d\n",
- __func__, id);
- return NULL;
- }
- m = 1 << (id & ISDN_P_B_MASK);
- return get_Bprotocol4mask(m);
-}
-
int
mISDN_register_Bprotocol(struct Bprotocol *bp)
{
diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h
index 42599f49c189..5617c06de8e4 100644
--- a/drivers/isdn/mISDN/core.h
+++ b/drivers/isdn/mISDN/core.h
@@ -55,7 +55,6 @@ extern void __add_layer2(struct mISDNchannel *, struct mISDNstack *);
extern u_int get_all_Bprotocols(void);
struct Bprotocol *get_Bprotocol4mask(u_int);
-struct Bprotocol *get_Bprotocol4id(u_int);
extern int mISDN_inittimer(u_int *);
extern void mISDN_timer_cleanup(void);
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index a2abfade82dd..70814303aab8 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -84,7 +84,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion,
sizeof(ipversion))) {
- dev_core_stats_rx_dropped_inc(bareudp->dev);
+ dev_dstats_rx_dropped(bareudp->dev);
goto drop;
}
ipversion >>= 4;
@@ -94,7 +94,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
} else if (ipversion == 6 && bareudp->multi_proto_mode) {
proto = htons(ETH_P_IPV6);
} else {
- dev_core_stats_rx_dropped_inc(bareudp->dev);
+ dev_dstats_rx_dropped(bareudp->dev);
goto drop;
}
} else if (bareudp->ethertype == htons(ETH_P_MPLS_UC)) {
@@ -108,7 +108,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
ipv4_is_multicast(tunnel_hdr->daddr)) {
proto = htons(ETH_P_MPLS_MC);
} else {
- dev_core_stats_rx_dropped_inc(bareudp->dev);
+ dev_dstats_rx_dropped(bareudp->dev);
goto drop;
}
} else {
@@ -124,7 +124,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
(addr_type & IPV6_ADDR_MULTICAST)) {
proto = htons(ETH_P_MPLS_MC);
} else {
- dev_core_stats_rx_dropped_inc(bareudp->dev);
+ dev_dstats_rx_dropped(bareudp->dev);
goto drop;
}
}
@@ -136,7 +136,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
proto,
!net_eq(bareudp->net,
dev_net(bareudp->dev)))) {
- dev_core_stats_rx_dropped_inc(bareudp->dev);
+ dev_dstats_rx_dropped(bareudp->dev);
goto drop;
}
@@ -144,7 +144,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0);
if (!tun_dst) {
- dev_core_stats_rx_dropped_inc(bareudp->dev);
+ dev_dstats_rx_dropped(bareudp->dev);
goto drop;
}
skb_dst_set(skb, &tun_dst->dst);
@@ -194,7 +194,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
len = skb->len;
err = gro_cells_receive(&bareudp->gro_cells, skb);
if (likely(err == NET_RX_SUCCESS))
- dev_sw_netstats_rx_add(bareudp->dev, len);
+ dev_dstats_rx_add(bareudp->dev, len);
return 0;
drop:
@@ -589,7 +589,7 @@ static void bareudp_setup(struct net_device *dev)
dev->priv_flags |= IFF_NO_QUEUE;
dev->lltx = true;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
- dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
}
static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
index c42ebe9da55a..2d555f854008 100644
--- a/drivers/net/can/sja1000/sja1000_platform.c
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -230,18 +230,9 @@ static int sp_probe(struct platform_device *pdev)
return -ENODEV;
}
- res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res_mem)
- return -ENODEV;
-
- if (!devm_request_mem_region(&pdev->dev, res_mem->start,
- resource_size(res_mem), DRV_NAME))
- return -EBUSY;
-
- addr = devm_ioremap(&pdev->dev, res_mem->start,
- resource_size(res_mem));
- if (!addr)
- return -ENOMEM;
+ addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res_mem);
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
if (of) {
irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 285785c942b0..0561b60f668f 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2224,13 +2224,16 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
}
EXPORT_SYMBOL(b53_eee_init);
-int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
+bool b53_support_eee(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
- if (is5325(dev) || is5365(dev))
- return -EOPNOTSUPP;
+ return !is5325(dev) && !is5365(dev);
+}
+EXPORT_SYMBOL(b53_support_eee);
+int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
+{
return 0;
}
EXPORT_SYMBOL(b53_get_mac_eee);
@@ -2240,9 +2243,6 @@ int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
struct b53_device *dev = ds->priv;
struct ethtool_keee *p = &dev->ports[port].eee;
- if (is5325(dev) || is5365(dev))
- return -EOPNOTSUPP;
-
p->eee_enabled = e->eee_enabled;
b53_eee_enable_set(ds, port, e->eee_enabled);
@@ -2298,6 +2298,7 @@ static const struct dsa_switch_ops b53_switch_ops = {
.phylink_get_caps = b53_phylink_get_caps,
.port_enable = b53_enable_port,
.port_disable = b53_disable_port,
+ .support_eee = b53_support_eee,
.get_mac_eee = b53_get_mac_eee,
.set_mac_eee = b53_set_mac_eee,
.port_bridge_join = b53_br_join,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 05141176daf5..99e5cfc98ae8 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -384,6 +384,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
void b53_disable_port(struct dsa_switch *ds, int port);
void b53_brcm_hdr_setup(struct dsa_switch *ds, int port);
int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy);
+bool b53_support_eee(struct dsa_switch *ds, int port);
int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 43bde1f583ff..a53fb6191e6b 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1232,6 +1232,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.set_wol = bcm_sf2_sw_set_wol,
.port_enable = bcm_sf2_port_setup,
.port_disable = bcm_sf2_port_disable,
+ .support_eee = b53_support_eee,
.get_mac_eee = b53_get_mac_eee,
.set_mac_eee = b53_set_mac_eee,
.port_bridge_join = b53_br_join,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 8a03baa6aecc..df314724e6a7 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -2544,7 +2544,11 @@ static int ksz_mdio_register(struct ksz_device *dev)
bus->read = ksz_sw_mdio_read;
bus->write = ksz_sw_mdio_write;
bus->name = "ksz user smi";
- snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d", ds->index);
+ if (ds->dst->index != 0) {
+ snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d-%d", ds->dst->index, ds->index);
+ } else {
+ snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d", ds->index);
+ }
}
ret = ksz_parse_dt_phy_config(dev, bus, mdio_np);
@@ -3444,12 +3448,12 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port)
return -EOPNOTSUPP;
}
-static int ksz_validate_eee(struct dsa_switch *ds, int port)
+static bool ksz_support_eee(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
if (!dev->info->internal_phy[port])
- return -EOPNOTSUPP;
+ return false;
switch (dev->chip_id) {
case KSZ8563_CHIP_ID:
@@ -3461,21 +3465,15 @@ static int ksz_validate_eee(struct dsa_switch *ds, int port)
case KSZ9896_CHIP_ID:
case KSZ9897_CHIP_ID:
case LAN9646_CHIP_ID:
- return 0;
+ return true;
}
- return -EOPNOTSUPP;
+ return false;
}
static int ksz_get_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
- int ret;
-
- ret = ksz_validate_eee(ds, port);
- if (ret)
- return ret;
-
/* There is no documented control of Tx LPI configuration. */
e->tx_lpi_enabled = true;
@@ -3491,11 +3489,6 @@ static int ksz_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
struct ksz_device *dev = ds->priv;
- int ret;
-
- ret = ksz_validate_eee(ds, port);
- if (ret)
- return ret;
if (!e->tx_lpi_enabled) {
dev_err(dev->dev, "Disabling EEE Tx LPI is not supported\n");
@@ -4641,6 +4634,7 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.cls_flower_add = ksz_cls_flower_add,
.cls_flower_del = ksz_cls_flower_del,
.port_setup_tc = ksz_setup_tc,
+ .support_eee = ksz_support_eee,
.get_mac_eee = ksz_get_mac_eee,
.set_mac_eee = ksz_set_mac_eee,
.port_get_default_prio = ksz_port_get_default_prio,
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 086b8b3d5b40..9605febd3573 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -3238,6 +3238,7 @@ const struct dsa_switch_ops mt7530_switch_ops = {
.port_mirror_add = mt753x_port_mirror_add,
.port_mirror_del = mt753x_port_mirror_del,
.phylink_get_caps = mt753x_phylink_get_caps,
+ .support_eee = dsa_supports_eee,
.get_mac_eee = mt753x_get_mac_eee,
.set_mac_eee = mt753x_set_mac_eee,
.conduit_state_change = mt753x_conduit_state_change,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 3a792f79270d..570c8642d387 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1289,9 +1289,6 @@ static size_t mv88e6095_stats_get_stat(struct mv88e6xxx_chip *chip, int port,
const struct mv88e6xxx_hw_stat *stat,
uint64_t *data)
{
- if (!(stat->type & (STATS_TYPE_BANK0 | STATS_TYPE_PORT)))
- return 0;
-
*data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 0,
MV88E6XXX_G1_STATS_OP_HIST_RX);
return 1;
@@ -1301,9 +1298,6 @@ static size_t mv88e6250_stats_get_stat(struct mv88e6xxx_chip *chip, int port,
const struct mv88e6xxx_hw_stat *stat,
uint64_t *data)
{
- if (!(stat->type & STATS_TYPE_BANK0))
- return 0;
-
*data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 0,
MV88E6XXX_G1_STATS_OP_HIST_RX);
return 1;
@@ -1313,9 +1307,6 @@ static size_t mv88e6320_stats_get_stat(struct mv88e6xxx_chip *chip, int port,
const struct mv88e6xxx_hw_stat *stat,
uint64_t *data)
{
- if (!(stat->type & (STATS_TYPE_BANK0 | STATS_TYPE_BANK1)))
- return 0;
-
*data = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
MV88E6XXX_G1_STATS_OP_BANK_1_BIT_9,
MV88E6XXX_G1_STATS_OP_HIST_RX);
@@ -1326,9 +1317,6 @@ static size_t mv88e6390_stats_get_stat(struct mv88e6xxx_chip *chip, int port,
const struct mv88e6xxx_hw_stat *stat,
uint64_t *data)
{
- if (!(stat->type & (STATS_TYPE_BANK0 | STATS_TYPE_BANK1)))
- return 0;
-
*data = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
MV88E6XXX_G1_STATS_OP_BANK_1_BIT_10,
0);
@@ -1341,6 +1329,9 @@ static size_t mv88e6xxx_stats_get_stat(struct mv88e6xxx_chip *chip, int port,
{
int ret = 0;
+ if (!(stat->type & chip->info->stats_type))
+ return 0;
+
if (chip->info->ops->stats_get_stat) {
mv88e6xxx_reg_lock(chip);
ret = chip->info->ops->stats_get_stat(chip, port, stat, data);
@@ -5645,6 +5636,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 5,
+ .stats_type = STATS_TYPE_BANK0,
.atu_move_port_mask = 0xf,
.dual_chip = true,
.ops = &mv88e6250_ops,
@@ -5665,6 +5657,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 5,
+ .stats_type = STATS_TYPE_BANK0,
.atu_move_port_mask = 0xf,
.dual_chip = true,
.ops = &mv88e6250_ops,
@@ -5687,6 +5680,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 8,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5708,6 +5702,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.global2_addr = 0x1c,
.age_time_coeff = 15000,
.g1_irqs = 8,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.multi_chip = true,
.ops = &mv88e6095_ops,
@@ -5730,6 +5725,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 8,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5754,6 +5750,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5776,6 +5773,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.global2_addr = 0x1c,
.age_time_coeff = 15000,
.g1_irqs = 9,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.multi_chip = true,
.ops = &mv88e6131_ops,
@@ -5800,6 +5798,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.atu_move_port_mask = 0x1f,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.pvt = true,
.multi_chip = true,
.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
@@ -5823,6 +5822,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5848,6 +5848,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5872,6 +5873,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5897,6 +5899,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5921,6 +5924,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5946,6 +5950,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -5968,6 +5973,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.global2_addr = 0x1c,
.age_time_coeff = 15000,
.g1_irqs = 8,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.multi_chip = true,
.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
@@ -5992,6 +5998,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 9,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.pvt = true,
.multi_chip = true,
.atu_move_port_mask = 0x1f,
@@ -6016,6 +6023,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 9,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6039,6 +6047,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 9,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6063,6 +6072,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 10,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6087,6 +6097,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 10,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6114,6 +6125,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0,
.atu_move_port_mask = 0xf,
.dual_chip = true,
.ptp_support = true,
@@ -6138,6 +6150,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -6161,6 +6174,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0,
.atu_move_port_mask = 0xf,
.dual_chip = true,
.ptp_support = true,
@@ -6184,6 +6198,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 9,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6208,6 +6223,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 8,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -6233,6 +6249,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 8,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0xf,
.multi_chip = true,
.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
@@ -6259,6 +6276,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.atu_move_port_mask = 0x1f,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.pvt = true,
.multi_chip = true,
.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
@@ -6283,6 +6301,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -6307,6 +6326,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -6332,6 +6352,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 15000,
.g1_irqs = 9,
.g2_irqs = 10,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT,
.atu_move_port_mask = 0xf,
.pvt = true,
.multi_chip = true,
@@ -6359,6 +6380,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 10,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6383,6 +6405,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 9,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6408,6 +6431,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 9,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -6433,6 +6457,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.age_time_coeff = 3750,
.g1_irqs = 10,
.g2_irqs = 14,
+ .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
.atu_move_port_mask = 0x1f,
.pvt = true,
.multi_chip = true,
@@ -7074,6 +7099,7 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.get_sset_count = mv88e6xxx_get_sset_count,
.port_max_mtu = mv88e6xxx_get_max_mtu,
.port_change_mtu = mv88e6xxx_change_mtu,
+ .support_eee = dsa_supports_eee,
.get_mac_eee = mv88e6xxx_get_mac_eee,
.set_mac_eee = mv88e6xxx_set_mac_eee,
.get_eeprom_len = mv88e6xxx_get_eeprom_len,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 9fe8e8a7856b..86bf113c9bfa 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -144,6 +144,7 @@ struct mv88e6xxx_info {
unsigned int age_time_coeff;
unsigned int g1_irqs;
unsigned int g2_irqs;
+ int stats_type;
bool pvt;
/* Mark certain ports as invalid. This is required for example for the
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index 59b4a7240b58..ec74e3c2b0e9 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -2016,6 +2016,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
.get_ethtool_stats = qca8k_get_ethtool_stats,
.get_sset_count = qca8k_get_sset_count,
.set_ageing_time = qca8k_set_ageing_time,
+ .support_eee = dsa_supports_eee,
.get_mac_eee = qca8k_get_mac_eee,
.set_mac_eee = qca8k_set_mac_eee,
.port_enable = qca8k_port_enable,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index baba204ad62f..3d790f8c6f4d 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -26,12 +26,8 @@ void sja1105_pack(void *buf, const u64 *val, int start, int end, size_t len)
pr_err("Start bit (%d) expected to be larger than end (%d)\n",
start, end);
} else if (rc == -ERANGE) {
- if ((start - end + 1) > 64)
- pr_err("Field %d-%d too large for 64 bits!\n",
- start, end);
- else
- pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n",
- *val, start, end);
+ pr_err("Field %d-%d too large for 64 bits!\n",
+ start, end);
}
dump_stack();
}
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index e641dbbea1e2..b854b6b42d77 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -421,18 +421,12 @@ static void xgene_enet_configure_clock(struct xgene_enet_pdata *pdata)
if (dev->of_node) {
struct clk *parent = clk_get_parent(pdata->clk);
+ long rate = rgmii_clock(pdata->phy_speed);
- switch (pdata->phy_speed) {
- case SPEED_10:
- clk_set_rate(parent, 2500000);
- break;
- case SPEED_100:
- clk_set_rate(parent, 25000000);
- break;
- default:
- clk_set_rate(parent, 125000000);
- break;
- }
+ if (rate < 0)
+ rate = 125000000;
+
+ clk_set_rate(parent, rate);
}
#ifdef CONFIG_ACPI
else {
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index daa416fb1724..640f500f989d 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -530,19 +530,9 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
if (bp->phy_interface == PHY_INTERFACE_MODE_MII)
return;
- switch (speed) {
- case SPEED_10:
- rate = 2500000;
- break;
- case SPEED_100:
- rate = 25000000;
- break;
- case SPEED_1000:
- rate = 125000000;
- break;
- default:
+ rate = rgmii_clock(speed);
+ if (rate < 0)
return;
- }
rate_rounded = clk_round_rate(bp->tx_clk, rate);
if (rate_rounded < 0)
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 75401d2a5fb4..a2d7300925a8 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -81,8 +81,7 @@ config UCC_GETH
tristate "Freescale QE Gigabit Ethernet"
depends on QUICC_ENGINE && PPC32
select FSL_PQ_MDIO
- select PHYLIB
- select FIXED_PHY
+ select PHYLINK
help
This driver supports the Gigabit Ethernet mode of the QUICC Engine,
which is available on some Freescale SOCs.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index a293b08f36d4..147a93bf9fa9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -780,13 +780,14 @@ struct ethsw_dump_ctx {
static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry,
struct ethsw_dump_ctx *dump)
{
+ struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx;
int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC;
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
- if (dump->idx < dump->cb->args[2])
+ if (dump->idx < ctx->fdb_idx)
goto skip;
nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 1cca0425d493..c81f2ea588f2 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -671,8 +671,6 @@ struct fec_enet_private {
unsigned int tx_time_itr;
unsigned int itr_clk_rate;
- /* tx lpi eee mode */
- struct ethtool_keee eee;
unsigned int clk_ref_rate;
/* ptp clock period in ns*/
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1b55047c0237..b2daed55bf6c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2045,14 +2045,14 @@ static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us)
return us * (fep->clk_ref_rate / 1000) / 1000;
}
-static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
+static int fec_enet_eee_mode_set(struct net_device *ndev, u32 lpi_timer,
+ bool enable)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct ethtool_keee *p = &fep->eee;
unsigned int sleep_cycle, wake_cycle;
if (enable) {
- sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer);
+ sleep_cycle = fec_enet_us_to_tx_cycle(ndev, lpi_timer);
wake_cycle = sleep_cycle;
} else {
sleep_cycle = 0;
@@ -2105,7 +2105,9 @@ static void fec_enet_adjust_link(struct net_device *ndev)
napi_enable(&fep->napi);
}
if (fep->quirks & FEC_QUIRK_HAS_EEE)
- fec_enet_eee_mode_set(ndev, phy_dev->enable_tx_lpi);
+ fec_enet_eee_mode_set(ndev,
+ phy_dev->eee_cfg.tx_lpi_timer,
+ phy_dev->enable_tx_lpi);
} else {
if (fep->link) {
netif_stop_queue(ndev);
@@ -3181,7 +3183,6 @@ static int
fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct ethtool_keee *p = &fep->eee;
if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
return -EOPNOTSUPP;
@@ -3189,8 +3190,6 @@ fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata)
if (!netif_running(ndev))
return -ENETDOWN;
- edata->tx_lpi_timer = p->tx_lpi_timer;
-
return phy_ethtool_get_eee(ndev->phydev, edata);
}
@@ -3198,7 +3197,6 @@ static int
fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- struct ethtool_keee *p = &fep->eee;
if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
return -EOPNOTSUPP;
@@ -3206,8 +3204,6 @@ fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata)
if (!netif_running(ndev))
return -ENETDOWN;
- p->tx_lpi_timer = edata->tx_lpi_timer;
-
return phy_ethtool_set_eee(ndev->phydev, edata);
}
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index fb416d60dcd7..11887458f050 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -2690,13 +2690,12 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
{
struct fman *fman;
struct device_node *fm_node, *muram_node;
+ void __iomem *base_addr;
struct resource *res;
u32 val, range[2];
int err, irq;
struct clk *clk;
u32 clk_rate;
- phys_addr_t phys_base_addr;
- resource_size_t mem_size;
fman = kzalloc(sizeof(*fman), GFP_KERNEL);
if (!fman)
@@ -2724,18 +2723,6 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
goto fman_node_put;
fman->dts_params.err_irq = err;
- /* Get the FM address */
- res = platform_get_resource(of_dev, IORESOURCE_MEM, 0);
- if (!res) {
- err = -EINVAL;
- dev_err(&of_dev->dev, "%s: Can't get FMan memory resource\n",
- __func__);
- goto fman_node_put;
- }
-
- phys_base_addr = res->start;
- mem_size = resource_size(res);
-
clk = of_clk_get(fm_node, 0);
if (IS_ERR(clk)) {
err = PTR_ERR(clk);
@@ -2803,24 +2790,16 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
}
}
- fman->dts_params.res =
- devm_request_mem_region(&of_dev->dev, phys_base_addr,
- mem_size, "fman");
- if (!fman->dts_params.res) {
- err = -EBUSY;
- dev_err(&of_dev->dev, "%s: request_mem_region() failed\n",
- __func__);
- goto fman_free;
- }
-
- fman->dts_params.base_addr =
- devm_ioremap(&of_dev->dev, phys_base_addr, mem_size);
- if (!fman->dts_params.base_addr) {
- err = -ENOMEM;
+ base_addr = devm_platform_get_and_ioremap_resource(of_dev, 0, &res);
+ if (IS_ERR(base_addr)) {
+ err = PTR_ERR(base_addr);
dev_err(&of_dev->dev, "%s: devm_ioremap() failed\n", __func__);
goto fman_free;
}
+ fman->dts_params.base_addr = base_addr;
+ fman->dts_params.res = res;
+
fman->dev = &of_dev->dev;
err = of_platform_populate(fm_node, NULL, NULL, &of_dev->dev);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 6663c1768089..f47f8177a93b 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -26,7 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/mii.h>
#include <linux/phy.h>
-#include <linux/phy_fixed.h>
+#include <linux/phylink.h>
#include <linux/workqueue.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -34,6 +34,7 @@
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
#include <linux/uaccess.h>
#include <asm/irq.h>
@@ -132,7 +133,6 @@ static const struct ucc_geth_info ugeth_primary_info = {
.transmitFlowControl = 1,
.maxGroupAddrInHash = 4,
.maxIndAddrInHash = 4,
- .prel = 7,
.maxFrameLength = 1518+16, /* Add extra bytes for VLANs etc. */
.minFrameLength = 64,
.maxD1Length = 1520+16, /* Add extra bytes for VLANs etc. */
@@ -1205,34 +1205,6 @@ static int init_mac_station_addr_regs(u8 address_byte_0,
return 0;
}
-static int init_check_frame_length_mode(int length_check,
- u32 __iomem *maccfg2_register)
-{
- u32 value = 0;
-
- value = in_be32(maccfg2_register);
-
- if (length_check)
- value |= MACCFG2_LC;
- else
- value &= ~MACCFG2_LC;
-
- out_be32(maccfg2_register, value);
- return 0;
-}
-
-static int init_preamble_length(u8 preamble_length,
- u32 __iomem *maccfg2_register)
-{
- if ((preamble_length < 3) || (preamble_length > 7))
- return -EINVAL;
-
- clrsetbits_be32(maccfg2_register, MACCFG2_PREL_MASK,
- preamble_length << MACCFG2_PREL_SHIFT);
-
- return 0;
-}
-
static int init_rx_parameters(int reject_broadcast,
int receive_short_frames,
int promiscuous, u32 __iomem *upsmr_register)
@@ -1287,94 +1259,11 @@ static int init_min_frame_len(u16 min_frame_length,
return 0;
}
-static int adjust_enet_interface(struct ucc_geth_private *ugeth)
+static bool phy_interface_mode_is_reduced(phy_interface_t interface)
{
- struct ucc_geth_info *ug_info;
- struct ucc_geth __iomem *ug_regs;
- struct ucc_fast __iomem *uf_regs;
- int ret_val;
- u32 upsmr, maccfg2;
- u16 value;
-
- ugeth_vdbg("%s: IN", __func__);
-
- ug_info = ugeth->ug_info;
- ug_regs = ugeth->ug_regs;
- uf_regs = ugeth->uccf->uf_regs;
-
- /* Set MACCFG2 */
- maccfg2 = in_be32(&ug_regs->maccfg2);
- maccfg2 &= ~MACCFG2_INTERFACE_MODE_MASK;
- if ((ugeth->max_speed == SPEED_10) ||
- (ugeth->max_speed == SPEED_100))
- maccfg2 |= MACCFG2_INTERFACE_MODE_NIBBLE;
- else if (ugeth->max_speed == SPEED_1000)
- maccfg2 |= MACCFG2_INTERFACE_MODE_BYTE;
- maccfg2 |= ug_info->padAndCrc;
- out_be32(&ug_regs->maccfg2, maccfg2);
-
- /* Set UPSMR */
- upsmr = in_be32(&uf_regs->upsmr);
- upsmr &= ~(UCC_GETH_UPSMR_RPM | UCC_GETH_UPSMR_R10M |
- UCC_GETH_UPSMR_TBIM | UCC_GETH_UPSMR_RMM);
- if ((ugeth->phy_interface == PHY_INTERFACE_MODE_RMII) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_ID) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) {
- if (ugeth->phy_interface != PHY_INTERFACE_MODE_RMII)
- upsmr |= UCC_GETH_UPSMR_RPM;
- switch (ugeth->max_speed) {
- case SPEED_10:
- upsmr |= UCC_GETH_UPSMR_R10M;
- fallthrough;
- case SPEED_100:
- if (ugeth->phy_interface != PHY_INTERFACE_MODE_RTBI)
- upsmr |= UCC_GETH_UPSMR_RMM;
- }
- }
- if ((ugeth->phy_interface == PHY_INTERFACE_MODE_TBI) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) {
- upsmr |= UCC_GETH_UPSMR_TBIM;
- }
- if (ugeth->phy_interface == PHY_INTERFACE_MODE_SGMII)
- upsmr |= UCC_GETH_UPSMR_SGMM;
-
- out_be32(&uf_regs->upsmr, upsmr);
-
- /* Disable autonegotiation in tbi mode, because by default it
- comes up in autonegotiation mode. */
- /* Note that this depends on proper setting in utbipar register. */
- if ((ugeth->phy_interface == PHY_INTERFACE_MODE_TBI) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) {
- struct ucc_geth_info *ug_info = ugeth->ug_info;
- struct phy_device *tbiphy;
-
- if (!ug_info->tbi_node)
- pr_warn("TBI mode requires that the device tree specify a tbi-handle\n");
-
- tbiphy = of_phy_find_device(ug_info->tbi_node);
- if (!tbiphy)
- pr_warn("Could not get TBI device\n");
-
- value = phy_read(tbiphy, ENET_TBI_MII_CR);
- value &= ~0x1000; /* Turn off autonegotiation */
- phy_write(tbiphy, ENET_TBI_MII_CR, value);
-
- put_device(&tbiphy->mdio.dev);
- }
-
- init_check_frame_length_mode(ug_info->lengthCheckRx, &ug_regs->maccfg2);
-
- ret_val = init_preamble_length(ug_info->prel, &ug_regs->maccfg2);
- if (ret_val != 0) {
- if (netif_msg_probe(ugeth))
- pr_err("Preamble length must be between 3 and 7 inclusive\n");
- return ret_val;
- }
-
- return 0;
+ return phy_interface_mode_is_rgmii(interface) ||
+ interface == PHY_INTERFACE_MODE_RMII ||
+ interface == PHY_INTERFACE_MODE_RTBI;
}
static int ugeth_graceful_stop_tx(struct ucc_geth_private *ugeth)
@@ -1548,107 +1437,6 @@ static void ugeth_activate(struct ucc_geth_private *ugeth)
__netdev_watchdog_up(ugeth->ndev);
}
-/* Called every time the controller might need to be made
- * aware of new link state. The PHY code conveys this
- * information through variables in the ugeth structure, and this
- * function converts those variables into the appropriate
- * register values, and can bring down the device if needed.
- */
-
-static void adjust_link(struct net_device *dev)
-{
- struct ucc_geth_private *ugeth = netdev_priv(dev);
- struct ucc_geth __iomem *ug_regs;
- struct ucc_fast __iomem *uf_regs;
- struct phy_device *phydev = ugeth->phydev;
- int new_state = 0;
-
- ug_regs = ugeth->ug_regs;
- uf_regs = ugeth->uccf->uf_regs;
-
- if (phydev->link) {
- u32 tempval = in_be32(&ug_regs->maccfg2);
- u32 upsmr = in_be32(&uf_regs->upsmr);
- /* Now we make sure that we can be in full duplex mode.
- * If not, we operate in half-duplex mode. */
- if (phydev->duplex != ugeth->oldduplex) {
- new_state = 1;
- if (!(phydev->duplex))
- tempval &= ~(MACCFG2_FDX);
- else
- tempval |= MACCFG2_FDX;
- ugeth->oldduplex = phydev->duplex;
- }
-
- if (phydev->speed != ugeth->oldspeed) {
- new_state = 1;
- switch (phydev->speed) {
- case SPEED_1000:
- tempval = ((tempval &
- ~(MACCFG2_INTERFACE_MODE_MASK)) |
- MACCFG2_INTERFACE_MODE_BYTE);
- break;
- case SPEED_100:
- case SPEED_10:
- tempval = ((tempval &
- ~(MACCFG2_INTERFACE_MODE_MASK)) |
- MACCFG2_INTERFACE_MODE_NIBBLE);
- /* if reduced mode, re-set UPSMR.R10M */
- if ((ugeth->phy_interface == PHY_INTERFACE_MODE_RMII) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_ID) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) ||
- (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) {
- if (phydev->speed == SPEED_10)
- upsmr |= UCC_GETH_UPSMR_R10M;
- else
- upsmr &= ~UCC_GETH_UPSMR_R10M;
- }
- break;
- default:
- if (netif_msg_link(ugeth))
- pr_warn(
- "%s: Ack! Speed (%d) is not 10/100/1000!",
- dev->name, phydev->speed);
- break;
- }
- ugeth->oldspeed = phydev->speed;
- }
-
- if (!ugeth->oldlink) {
- new_state = 1;
- ugeth->oldlink = 1;
- }
-
- if (new_state) {
- /*
- * To change the MAC configuration we need to disable
- * the controller. To do so, we have to either grab
- * ugeth->lock, which is a bad idea since 'graceful
- * stop' commands might take quite a while, or we can
- * quiesce driver's activity.
- */
- ugeth_quiesce(ugeth);
- ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
-
- out_be32(&ug_regs->maccfg2, tempval);
- out_be32(&uf_regs->upsmr, upsmr);
-
- ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
- ugeth_activate(ugeth);
- }
- } else if (ugeth->oldlink) {
- new_state = 1;
- ugeth->oldlink = 0;
- ugeth->oldspeed = 0;
- ugeth->oldduplex = -1;
- }
-
- if (new_state && netif_msg_link(ugeth))
- phy_print_status(phydev);
-}
-
/* Initialize TBI PHY interface for communicating with the
* SERDES lynx PHY on the chip. We communicate with this PHY
* through the MDIO bus on each controller, treating it as a
@@ -1664,8 +1452,7 @@ static void uec_configure_serdes(struct net_device *dev)
struct phy_device *tbiphy;
if (!ug_info->tbi_node) {
- dev_warn(&dev->dev, "SGMII mode requires that the device "
- "tree specify a tbi-handle\n");
+ dev_warn(&dev->dev, "SGMII mode requires that the device tree specify a tbi-handle\n");
return;
}
@@ -1696,34 +1483,145 @@ static void uec_configure_serdes(struct net_device *dev)
put_device(&tbiphy->mdio.dev);
}
-/* Configure the PHY for dev.
- * returns 0 if success. -1 if failure
- */
-static int init_phy(struct net_device *dev)
+static void ugeth_mac_link_up(struct phylink_config *config, struct phy_device *phy,
+ unsigned int mode, phy_interface_t interface,
+ int speed, int duplex, bool tx_pause, bool rx_pause)
{
- struct ucc_geth_private *priv = netdev_priv(dev);
- struct ucc_geth_info *ug_info = priv->ug_info;
- struct phy_device *phydev;
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ucc_geth_private *ugeth = netdev_priv(ndev);
+ struct ucc_geth_info *ug_info = ugeth->ug_info;
+ struct ucc_geth __iomem *ug_regs = ugeth->ug_regs;
+ struct ucc_fast __iomem *uf_regs = ugeth->uccf->uf_regs;
+ u32 old_maccfg2, maccfg2 = in_be32(&ug_regs->maccfg2);
+ u32 old_upsmr, upsmr = in_be32(&uf_regs->upsmr);
- priv->oldlink = 0;
- priv->oldspeed = 0;
- priv->oldduplex = -1;
+ old_maccfg2 = maccfg2;
+ old_upsmr = upsmr;
- phydev = of_phy_connect(dev, ug_info->phy_node, &adjust_link, 0,
- priv->phy_interface);
- if (!phydev) {
- dev_err(&dev->dev, "Could not attach to PHY\n");
- return -ENODEV;
+ /* No length check */
+ maccfg2 &= ~MACCFG2_LC;
+ maccfg2 &= ~MACCFG2_INTERFACE_MODE_MASK;
+ upsmr &= ~(UCC_GETH_UPSMR_RPM | UCC_GETH_UPSMR_R10M |
+ UCC_GETH_UPSMR_TBIM | UCC_GETH_UPSMR_RMM);
+
+ if (speed == SPEED_10 || speed == SPEED_100)
+ maccfg2 |= MACCFG2_INTERFACE_MODE_NIBBLE;
+ else if (speed == SPEED_1000)
+ maccfg2 |= MACCFG2_INTERFACE_MODE_BYTE;
+
+ maccfg2 |= ug_info->padAndCrc;
+
+ if (phy_interface_mode_is_reduced(interface)) {
+
+ if (interface != PHY_INTERFACE_MODE_RMII)
+ upsmr |= UCC_GETH_UPSMR_RPM;
+
+ switch (speed) {
+ case SPEED_10:
+ upsmr |= UCC_GETH_UPSMR_R10M;
+ fallthrough;
+ case SPEED_100:
+ if (interface != PHY_INTERFACE_MODE_RTBI)
+ upsmr |= UCC_GETH_UPSMR_RMM;
+ }
}
- if (priv->phy_interface == PHY_INTERFACE_MODE_SGMII)
- uec_configure_serdes(dev);
+ if (interface == PHY_INTERFACE_MODE_TBI ||
+ interface == PHY_INTERFACE_MODE_RTBI)
+ upsmr |= UCC_GETH_UPSMR_TBIM;
- phy_set_max_speed(phydev, priv->max_speed);
+ if (interface == PHY_INTERFACE_MODE_SGMII)
+ upsmr |= UCC_GETH_UPSMR_SGMM;
- priv->phydev = phydev;
+ if (duplex == DUPLEX_HALF)
+ maccfg2 &= ~(MACCFG2_FDX);
+ else
+ maccfg2 |= MACCFG2_FDX;
- return 0;
+ if (maccfg2 != old_maccfg2 || upsmr != old_upsmr) {
+ /*
+ * To change the MAC configuration we need to disable
+ * the controller. To do so, we have to either grab
+ * ugeth->lock, which is a bad idea since 'graceful
+ * stop' commands might take quite a while, or we can
+ * quiesce driver's activity.
+ */
+ ugeth_quiesce(ugeth);
+ ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
+
+ out_be32(&ug_regs->maccfg2, maccfg2);
+ out_be32(&uf_regs->upsmr, upsmr);
+
+ ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
+ ugeth_activate(ugeth);
+ }
+
+ if (interface == PHY_INTERFACE_MODE_SGMII)
+ uec_configure_serdes(ndev);
+
+ if (!phylink_autoneg_inband(mode)) {
+ ug_info->aufc = 0;
+ ug_info->receiveFlowControl = rx_pause;
+ ug_info->transmitFlowControl = tx_pause;
+
+ init_flow_control_params(ug_info->aufc,
+ ug_info->receiveFlowControl,
+ ug_info->transmitFlowControl,
+ ug_info->pausePeriod,
+ ug_info->extensionField,
+ &ugeth->uccf->uf_regs->upsmr,
+ &ugeth->ug_regs->uempr,
+ &ugeth->ug_regs->maccfg1);
+ }
+
+ ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
+}
+
+static void ugeth_mac_link_down(struct phylink_config *config,
+ unsigned int mode, phy_interface_t interface)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ucc_geth_private *ugeth = netdev_priv(ndev);
+
+ ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
+}
+
+static void ugeth_mac_config(struct phylink_config *config, unsigned int mode,
+ const struct phylink_link_state *state)
+{
+ struct net_device *ndev = to_net_dev(config->dev);
+ struct ucc_geth_private *ugeth = netdev_priv(ndev);
+ struct ucc_geth_info *ug_info = ugeth->ug_info;
+ u16 value;
+
+ if (state->interface == PHY_INTERFACE_MODE_TBI ||
+ state->interface == PHY_INTERFACE_MODE_RTBI) {
+ struct phy_device *tbiphy;
+
+ if (!ug_info->tbi_node)
+ pr_warn("TBI mode requires that the device tree specify a tbi-handle\n");
+
+ tbiphy = of_phy_find_device(ug_info->tbi_node);
+ if (!tbiphy)
+ pr_warn("Could not get TBI device\n");
+
+ value = phy_read(tbiphy, ENET_TBI_MII_CR);
+ value &= ~0x1000; /* Turn off autonegotiation */
+ phy_write(tbiphy, ENET_TBI_MII_CR, value);
+
+ put_device(&tbiphy->mdio.dev);
+ }
+
+ if (phylink_autoneg_inband(mode)) {
+ ug_info->aufc = 1;
+
+ init_flow_control_params(ug_info->aufc, 1, 1,
+ ug_info->pausePeriod,
+ ug_info->extensionField,
+ &ugeth->uccf->uf_regs->upsmr,
+ &ugeth->ug_regs->uempr,
+ &ugeth->ug_regs->maccfg1);
+ }
}
static void ugeth_dump_regs(struct ucc_geth_private *ugeth)
@@ -1995,7 +1893,6 @@ static void ucc_geth_set_multi(struct net_device *dev)
static void ucc_geth_stop(struct ucc_geth_private *ugeth)
{
struct ucc_geth __iomem *ug_regs = ugeth->ug_regs;
- struct phy_device *phydev = ugeth->phydev;
ugeth_vdbg("%s: IN", __func__);
@@ -2004,7 +1901,7 @@ static void ucc_geth_stop(struct ucc_geth_private *ugeth)
* Must be done before disabling the controller
* or deadlock may happen.
*/
- phy_stop(phydev);
+ phylink_stop(ugeth->phylink);
/* Disable the controller */
ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
@@ -3246,12 +3143,6 @@ static int ucc_geth_init_mac(struct ucc_geth_private *ugeth)
goto err;
}
- err = adjust_enet_interface(ugeth);
- if (err) {
- netif_err(ugeth, ifup, dev, "Cannot configure net device, aborting\n");
- goto err;
- }
-
/* Set MACSTNADDR1, MACSTNADDR2 */
/* For more details see the hardware spec. */
init_mac_station_addr_regs(dev->dev_addr[0],
@@ -3263,12 +3154,6 @@ static int ucc_geth_init_mac(struct ucc_geth_private *ugeth)
&ugeth->ug_regs->macstnaddr1,
&ugeth->ug_regs->macstnaddr2);
- err = ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
- if (err) {
- netif_err(ugeth, ifup, dev, "Cannot enable net device, aborting\n");
- goto err;
- }
-
return 0;
err:
ucc_geth_stop(ugeth);
@@ -3291,10 +3176,10 @@ static int ucc_geth_open(struct net_device *dev)
return -EINVAL;
}
- err = init_phy(dev);
+ err = phylink_of_phy_connect(ugeth->phylink, ugeth->dev->of_node, 0);
if (err) {
- netif_err(ugeth, ifup, dev, "Cannot initialize PHY, aborting\n");
- return err;
+ dev_err(&dev->dev, "Could not attach to PHY\n");
+ return -ENODEV;
}
err = ucc_geth_init_mac(ugeth);
@@ -3310,13 +3195,13 @@ static int ucc_geth_open(struct net_device *dev)
goto err;
}
- phy_start(ugeth->phydev);
+ phylink_start(ugeth->phylink);
napi_enable(&ugeth->napi);
netdev_reset_queue(dev);
netif_start_queue(dev);
device_set_wakeup_capable(&dev->dev,
- qe_alive_during_sleep() || ugeth->phydev->irq);
+ qe_alive_during_sleep() || dev->phydev->irq);
device_set_wakeup_enable(&dev->dev, ugeth->wol_en);
return err;
@@ -3337,8 +3222,7 @@ static int ucc_geth_close(struct net_device *dev)
cancel_work_sync(&ugeth->timeout_work);
ucc_geth_stop(ugeth);
- phy_disconnect(ugeth->phydev);
- ugeth->phydev = NULL;
+ phylink_disconnect_phy(ugeth->phylink);
free_irq(ugeth->ug_info->uf_info.irq, ugeth->ndev);
@@ -3372,7 +3256,7 @@ static void ucc_geth_timeout_work(struct work_struct *work)
ucc_geth_stop(ugeth);
ucc_geth_init_mac(ugeth);
/* Must start PHY here */
- phy_start(ugeth->phydev);
+ phylink_start(ugeth->phylink);
netif_tx_start_all_queues(dev);
}
@@ -3397,6 +3281,7 @@ static int ucc_geth_suspend(struct platform_device *ofdev, pm_message_t state)
{
struct net_device *ndev = platform_get_drvdata(ofdev);
struct ucc_geth_private *ugeth = netdev_priv(ndev);
+ bool mac_wol = false;
if (!netif_running(ndev))
return 0;
@@ -3410,14 +3295,17 @@ static int ucc_geth_suspend(struct platform_device *ofdev, pm_message_t state)
*/
ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
- if (ugeth->wol_en & WAKE_MAGIC) {
+ if (ugeth->wol_en & WAKE_MAGIC && !ugeth->phy_wol_en) {
setbits32(ugeth->uccf->p_uccm, UCC_GETH_UCCE_MPD);
setbits32(&ugeth->ug_regs->maccfg2, MACCFG2_MPE);
ucc_fast_enable(ugeth->uccf, COMM_DIR_RX_AND_TX);
- } else if (!(ugeth->wol_en & WAKE_PHY)) {
- phy_stop(ugeth->phydev);
+ mac_wol = true;
}
+ rtnl_lock();
+ phylink_suspend(ugeth->phylink, mac_wol);
+ rtnl_unlock();
+
return 0;
}
@@ -3451,12 +3339,9 @@ static int ucc_geth_resume(struct platform_device *ofdev)
}
}
- ugeth->oldlink = 0;
- ugeth->oldspeed = 0;
- ugeth->oldduplex = -1;
-
- phy_stop(ugeth->phydev);
- phy_start(ugeth->phydev);
+ rtnl_lock();
+ phylink_resume(ugeth->phylink);
+ rtnl_unlock();
napi_enable(&ugeth->napi);
netif_device_attach(ndev);
@@ -3469,32 +3354,6 @@ static int ucc_geth_resume(struct platform_device *ofdev)
#define ucc_geth_resume NULL
#endif
-static phy_interface_t to_phy_interface(const char *phy_connection_type)
-{
- if (strcasecmp(phy_connection_type, "mii") == 0)
- return PHY_INTERFACE_MODE_MII;
- if (strcasecmp(phy_connection_type, "gmii") == 0)
- return PHY_INTERFACE_MODE_GMII;
- if (strcasecmp(phy_connection_type, "tbi") == 0)
- return PHY_INTERFACE_MODE_TBI;
- if (strcasecmp(phy_connection_type, "rmii") == 0)
- return PHY_INTERFACE_MODE_RMII;
- if (strcasecmp(phy_connection_type, "rgmii") == 0)
- return PHY_INTERFACE_MODE_RGMII;
- if (strcasecmp(phy_connection_type, "rgmii-id") == 0)
- return PHY_INTERFACE_MODE_RGMII_ID;
- if (strcasecmp(phy_connection_type, "rgmii-txid") == 0)
- return PHY_INTERFACE_MODE_RGMII_TXID;
- if (strcasecmp(phy_connection_type, "rgmii-rxid") == 0)
- return PHY_INTERFACE_MODE_RGMII_RXID;
- if (strcasecmp(phy_connection_type, "rtbi") == 0)
- return PHY_INTERFACE_MODE_RTBI;
- if (strcasecmp(phy_connection_type, "sgmii") == 0)
- return PHY_INTERFACE_MODE_SGMII;
-
- return PHY_INTERFACE_MODE_MII;
-}
-
static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct ucc_geth_private *ugeth = netdev_priv(dev);
@@ -3502,10 +3361,7 @@ static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!netif_running(dev))
return -EINVAL;
- if (!ugeth->phydev)
- return -ENODEV;
-
- return phy_mii_ioctl(ugeth->phydev, rq, cmd);
+ return phylink_mii_ioctl(ugeth->phylink, rq, cmd);
}
static const struct net_device_ops ucc_geth_netdev_ops = {
@@ -3513,7 +3369,6 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
.ndo_stop = ucc_geth_close,
.ndo_start_xmit = ucc_geth_start_xmit,
.ndo_validate_addr = eth_validate_addr,
- .ndo_change_carrier = fixed_phy_change_carrier,
.ndo_set_mac_address = ucc_geth_set_mac_addr,
.ndo_set_rx_mode = ucc_geth_set_multi,
.ndo_tx_timeout = ucc_geth_timeout,
@@ -3553,6 +3408,12 @@ static int ucc_geth_parse_clock(struct device_node *np, const char *which,
return 0;
}
+struct phylink_mac_ops ugeth_mac_ops = {
+ .mac_link_up = ugeth_mac_link_up,
+ .mac_link_down = ugeth_mac_link_down,
+ .mac_config = ugeth_mac_config,
+};
+
static int ucc_geth_probe(struct platform_device* ofdev)
{
struct device *device = &ofdev->dev;
@@ -3560,23 +3421,12 @@ static int ucc_geth_probe(struct platform_device* ofdev)
struct net_device *dev = NULL;
struct ucc_geth_private *ugeth = NULL;
struct ucc_geth_info *ug_info;
+ struct device_node *phy_node;
+ struct phylink *phylink;
struct resource res;
- int err, ucc_num, max_speed = 0;
+ int err, ucc_num;
const unsigned int *prop;
phy_interface_t phy_interface;
- static const int enet_to_speed[] = {
- SPEED_10, SPEED_10, SPEED_10,
- SPEED_100, SPEED_100, SPEED_100,
- SPEED_1000, SPEED_1000, SPEED_1000, SPEED_1000,
- };
- static const phy_interface_t enet_to_phy_interface[] = {
- PHY_INTERFACE_MODE_MII, PHY_INTERFACE_MODE_RMII,
- PHY_INTERFACE_MODE_RGMII, PHY_INTERFACE_MODE_MII,
- PHY_INTERFACE_MODE_RMII, PHY_INTERFACE_MODE_RGMII,
- PHY_INTERFACE_MODE_GMII, PHY_INTERFACE_MODE_RGMII,
- PHY_INTERFACE_MODE_TBI, PHY_INTERFACE_MODE_RTBI,
- PHY_INTERFACE_MODE_SGMII,
- };
ugeth_vdbg("%s: IN", __func__);
@@ -3612,57 +3462,35 @@ static int ucc_geth_probe(struct platform_device* ofdev)
ug_info->uf_info.regs = res.start;
ug_info->uf_info.irq = irq_of_parse_and_map(np, 0);
- ug_info->phy_node = of_parse_phandle(np, "phy-handle", 0);
- if (!ug_info->phy_node && of_phy_is_fixed_link(np)) {
- /*
- * In the case of a fixed PHY, the DT node associated
- * to the PHY is the Ethernet MAC DT node.
- */
- err = of_phy_register_fixed_link(np);
- if (err)
- return err;
- ug_info->phy_node = of_node_get(np);
- }
-
/* Find the TBI PHY node. If it's not there, we don't support SGMII */
ug_info->tbi_node = of_parse_phandle(np, "tbi-handle", 0);
- /* get the phy interface type, or default to MII */
- prop = of_get_property(np, "phy-connection-type", NULL);
- if (!prop) {
- /* handle interface property present in old trees */
- prop = of_get_property(ug_info->phy_node, "interface", NULL);
- if (prop != NULL) {
- phy_interface = enet_to_phy_interface[*prop];
- max_speed = enet_to_speed[*prop];
- } else
- phy_interface = PHY_INTERFACE_MODE_MII;
- } else {
- phy_interface = to_phy_interface((const char *)prop);
- }
-
- /* get speed, or derive from PHY interface */
- if (max_speed == 0)
- switch (phy_interface) {
- case PHY_INTERFACE_MODE_GMII:
- case PHY_INTERFACE_MODE_RGMII:
- case PHY_INTERFACE_MODE_RGMII_ID:
- case PHY_INTERFACE_MODE_RGMII_RXID:
- case PHY_INTERFACE_MODE_RGMII_TXID:
- case PHY_INTERFACE_MODE_TBI:
- case PHY_INTERFACE_MODE_RTBI:
- case PHY_INTERFACE_MODE_SGMII:
- max_speed = SPEED_1000;
- break;
- default:
- max_speed = SPEED_100;
- break;
+ phy_node = of_parse_phandle(np, "phy-handle", 0);
+ if (phy_node) {
+ prop = of_get_property(phy_node, "interface", NULL);
+ if (prop) {
+ dev_err(&ofdev->dev,
+ "Device-tree property 'interface' is no longer supported. Please use 'phy-connection-type' instead.");
+ of_node_put(phy_node);
+ err = -EINVAL;
+ goto err_put_tbi;
}
+ of_node_put(phy_node);
+ }
+
+ err = of_get_phy_mode(np, &phy_interface);
+ if (err) {
+ dev_err(&ofdev->dev, "Invalid phy-connection-type");
+ goto err_put_tbi;
+ }
- if (max_speed == SPEED_1000) {
+ if (phy_interface == PHY_INTERFACE_MODE_GMII ||
+ phy_interface_mode_is_rgmii(phy_interface) ||
+ phy_interface == PHY_INTERFACE_MODE_TBI ||
+ phy_interface == PHY_INTERFACE_MODE_RTBI ||
+ phy_interface == PHY_INTERFACE_MODE_SGMII) {
unsigned int snums = qe_get_num_of_snums();
- /* configure muram FIFOs for gigabit operation */
ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT;
ug_info->uf_info.urfet = UCC_GETH_URFET_GIGA_INIT;
ug_info->uf_info.urfset = UCC_GETH_URFSET_GIGA_INIT;
@@ -3691,7 +3519,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
dev = devm_alloc_etherdev(&ofdev->dev, sizeof(*ugeth));
if (!dev) {
err = -ENOMEM;
- goto err_deregister_fixed_link;
+ goto err_put_tbi;
}
ugeth = netdev_priv(dev);
@@ -3718,23 +3546,50 @@ static int ucc_geth_probe(struct platform_device* ofdev)
dev->max_mtu = 1518;
ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT);
- ugeth->phy_interface = phy_interface;
- ugeth->max_speed = max_speed;
- /* Carrier starts down, phylib will bring it up */
- netif_carrier_off(dev);
+ ugeth->phylink_config.dev = &dev->dev;
+ ugeth->phylink_config.type = PHYLINK_NETDEV;
+
+ ugeth->phylink_config.mac_capabilities =
+ MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
+
+ __set_bit(PHY_INTERFACE_MODE_MII,
+ ugeth->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_RMII,
+ ugeth->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ ugeth->phylink_config.supported_interfaces);
+ phy_interface_set_rgmii(ugeth->phylink_config.supported_interfaces);
+
+ if (ug_info->tbi_node) {
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ ugeth->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_TBI,
+ ugeth->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_RTBI,
+ ugeth->phylink_config.supported_interfaces);
+ }
+
+ phylink = phylink_create(&ugeth->phylink_config, dev_fwnode(&dev->dev),
+ phy_interface, &ugeth_mac_ops);
+ if (IS_ERR(phylink)) {
+ err = PTR_ERR(phylink);
+ goto err_put_tbi;
+ }
+
+ ugeth->phylink = phylink;
err = devm_register_netdev(&ofdev->dev, dev);
if (err) {
if (netif_msg_probe(ugeth))
pr_err("%s: Cannot register net device, aborting\n",
dev->name);
- goto err_deregister_fixed_link;
+ goto err_destroy_phylink;
}
err = of_get_ethdev_address(np, dev);
if (err == -EPROBE_DEFER)
- goto err_deregister_fixed_link;
+ goto err_destroy_phylink;
ugeth->ug_info = ug_info;
ugeth->dev = device;
@@ -3743,11 +3598,11 @@ static int ucc_geth_probe(struct platform_device* ofdev)
return 0;
-err_deregister_fixed_link:
- if (of_phy_is_fixed_link(np))
- of_phy_deregister_fixed_link(np);
+err_destroy_phylink:
+ phylink_destroy(phylink);
+err_put_tbi:
of_node_put(ug_info->tbi_node);
- of_node_put(ug_info->phy_node);
+
return err;
}
@@ -3755,13 +3610,10 @@ static void ucc_geth_remove(struct platform_device* ofdev)
{
struct net_device *dev = platform_get_drvdata(ofdev);
struct ucc_geth_private *ugeth = netdev_priv(dev);
- struct device_node *np = ofdev->dev.of_node;
ucc_geth_memclean(ugeth);
- if (of_phy_is_fixed_link(np))
- of_phy_deregister_fixed_link(np);
+ phylink_destroy(ugeth->phylink);
of_node_put(ugeth->ug_info->tbi_node);
- of_node_put(ugeth->ug_info->phy_node);
}
static const struct of_device_id ucc_geth_match[] = {
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index 4294ed096ebb..38789faae706 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -16,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/phylink.h>
#include <linux/if_ether.h>
#include <soc/fsl/qe/immap_qe.h>
@@ -921,7 +922,8 @@ struct ucc_geth_hardware_statistics {
#define UCC_GETH_UPSMR_INIT UCC_GETH_UPSMR_RES1
#define UCC_GETH_MACCFG1_INIT 0
-#define UCC_GETH_MACCFG2_INIT (MACCFG2_RESERVED_1)
+#define UCC_GETH_MACCFG2_INIT (MACCFG2_RESERVED_1 | \
+ (7 << MACCFG2_PREL_SHIFT))
/* Ethernet Address Type. */
enum enet_addr_type {
@@ -1073,6 +1075,9 @@ struct ucc_geth_tad_params {
u16 vid;
};
+struct phylink;
+struct phylink_config;
+
/* GETH protocol initialization structure */
struct ucc_geth_info {
struct ucc_fast_info uf_info;
@@ -1088,7 +1093,6 @@ struct ucc_geth_info {
u8 miminumInterFrameGapEnforcement;
u8 backToBackInterFrameGap;
int ipAddressAlignment;
- int lengthCheckRx;
u32 mblinterval;
u16 nortsrbytetime;
u8 fracsiz;
@@ -1114,7 +1118,6 @@ struct ucc_geth_info {
int transmitFlowControl;
u8 maxGroupAddrInHash;
u8 maxIndAddrInHash;
- u8 prel;
u16 maxFrameLength;
u16 minFrameLength;
u16 maxD1Length;
@@ -1125,7 +1128,6 @@ struct ucc_geth_info {
u32 eventRegMask;
u16 pausePeriod;
u16 extensionField;
- struct device_node *phy_node;
struct device_node *tbi_node;
u8 weightfactor[NUM_TX_QUEUES];
u8 interruptcoalescingmaxvalue[NUM_RX_QUEUES];
@@ -1210,14 +1212,12 @@ struct ucc_geth_private {
u16 skb_dirtytx[NUM_TX_QUEUES];
struct ugeth_mii_info *mii_info;
- struct phy_device *phydev;
- phy_interface_t phy_interface;
- int max_speed;
uint32_t msg_enable;
- int oldspeed;
- int oldduplex;
- int oldlink;
- int wol_en;
+ u32 wol_en;
+ u32 phy_wol_en;
+
+ struct phylink *phylink;
+ struct phylink_config phylink_config;
struct device_node *node;
};
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 699f346faf5c..1fb49e5a414a 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -104,14 +104,8 @@ static int
uec_get_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd)
{
struct ucc_geth_private *ugeth = netdev_priv(netdev);
- struct phy_device *phydev = ugeth->phydev;
- if (!phydev)
- return -ENODEV;
-
- phy_ethtool_ksettings_get(phydev, cmd);
-
- return 0;
+ return phylink_ethtool_ksettings_get(ugeth->phylink, cmd);
}
static int
@@ -119,12 +113,8 @@ uec_set_ksettings(struct net_device *netdev,
const struct ethtool_link_ksettings *cmd)
{
struct ucc_geth_private *ugeth = netdev_priv(netdev);
- struct phy_device *phydev = ugeth->phydev;
- if (!phydev)
- return -ENODEV;
-
- return phy_ethtool_ksettings_set(phydev, cmd);
+ return phylink_ethtool_ksettings_set(ugeth->phylink, cmd);
}
static void
@@ -133,12 +123,7 @@ uec_get_pauseparam(struct net_device *netdev,
{
struct ucc_geth_private *ugeth = netdev_priv(netdev);
- pause->autoneg = ugeth->phydev->autoneg;
-
- if (ugeth->ug_info->receiveFlowControl)
- pause->rx_pause = 1;
- if (ugeth->ug_info->transmitFlowControl)
- pause->tx_pause = 1;
+ return phylink_ethtool_get_pauseparam(ugeth->phylink, pause);
}
static int
@@ -146,30 +131,11 @@ uec_set_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *pause)
{
struct ucc_geth_private *ugeth = netdev_priv(netdev);
- int ret = 0;
ugeth->ug_info->receiveFlowControl = pause->rx_pause;
ugeth->ug_info->transmitFlowControl = pause->tx_pause;
- if (ugeth->phydev->autoneg) {
- if (netif_running(netdev)) {
- /* FIXME: automatically restart */
- netdev_info(netdev, "Please re-open the interface\n");
- }
- } else {
- struct ucc_geth_info *ug_info = ugeth->ug_info;
-
- ret = init_flow_control_params(ug_info->aufc,
- ug_info->receiveFlowControl,
- ug_info->transmitFlowControl,
- ug_info->pausePeriod,
- ug_info->extensionField,
- &ugeth->uccf->uf_regs->upsmr,
- &ugeth->ug_regs->uempr,
- &ugeth->ug_regs->maccfg1);
- }
-
- return ret;
+ return phylink_ethtool_set_pauseparam(ugeth->phylink, pause);
}
static uint32_t
@@ -343,28 +309,42 @@ uec_get_drvinfo(struct net_device *netdev,
static void uec_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
{
struct ucc_geth_private *ugeth = netdev_priv(netdev);
- struct phy_device *phydev = ugeth->phydev;
- if (phydev && phydev->irq)
- wol->supported |= WAKE_PHY;
+ phylink_ethtool_get_wol(ugeth->phylink, wol);
+
if (qe_alive_during_sleep())
wol->supported |= WAKE_MAGIC;
- wol->wolopts = ugeth->wol_en;
+ wol->wolopts |= ugeth->wol_en;
}
static int uec_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
{
struct ucc_geth_private *ugeth = netdev_priv(netdev);
- struct phy_device *phydev = ugeth->phydev;
+ int ret = 0;
- if (wol->wolopts & ~(WAKE_PHY | WAKE_MAGIC))
- return -EINVAL;
- else if (wol->wolopts & WAKE_PHY && (!phydev || !phydev->irq))
+ ret = phylink_ethtool_set_wol(ugeth->phylink, wol);
+ if (ret == -EOPNOTSUPP) {
+ ugeth->phy_wol_en = 0;
+ } else if (ret) {
+ return ret;
+ } else {
+ ugeth->phy_wol_en = wol->wolopts;
+ goto out;
+ }
+
+ /* If the PHY isn't handling the WoL and the MAC is asked to more than
+ * WAKE_MAGIC, error-out
+ */
+ if (!ugeth->phy_wol_en &&
+ wol->wolopts & ~WAKE_MAGIC)
return -EINVAL;
- else if (wol->wolopts & WAKE_MAGIC && !qe_alive_during_sleep())
+
+ if (wol->wolopts & WAKE_MAGIC &&
+ !qe_alive_during_sleep())
return -EINVAL;
+out:
ugeth->wol_en = wol->wolopts;
device_set_wakeup_enable(&netdev->dev, ugeth->wol_en);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 060e0e674938..aa7d723011d0 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -1128,20 +1128,6 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
return gve_adminq_execute_cmd(priv, &cmd);
}
-int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
-{
- union gve_adminq_command cmd;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
- cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
- .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
- .parameter_value = cpu_to_be64(mtu),
- };
-
- return gve_adminq_execute_cmd(priv, &cmd);
-}
-
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval)
{
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 863683de9694..228217458275 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -612,7 +612,6 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
-int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval);
int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index f81a43d2cdfc..486fb0e20bef 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -469,7 +469,7 @@ int hinic_set_vlan_fliter(struct hinic_dev *nic_dev, u32 en)
err = HINIC_MGMT_CMD_UNSUPPORTED;
} else if (err || !out_size || vlan_filter.status) {
dev_err(&pdev->dev,
- "Failed to set vlan fliter, err: %d, status: 0x%x, out size: 0x%x\n",
+ "Failed to set vlan filter, err: %d, status: 0x%x, out size: 0x%x\n",
err, vlan_filter.status, out_size);
err = -EINVAL;
}
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 20bc40eec487..24ec9a4f1ffa 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -292,6 +292,7 @@ config ICE
select DIMLIB
select LIBIE
select NET_DEVLINK
+ select PACKING
select PLDMFW
select DPLL
help
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 1489a8ceec51..3bf05b135b35 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -12,6 +12,13 @@
#define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9
#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728
+#define ICE_RXQ_CTX_SIZE_DWORDS 8
+#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+#define ICE_TXQ_CTX_SZ 22
+
+typedef struct __packed { u8 buf[ICE_RXQ_CTX_SZ]; } ice_rxq_ctx_buf_t;
+typedef struct __packed { u8 buf[ICE_TXQ_CTX_SZ]; } ice_txq_ctx_buf_t;
+
struct ice_aqc_generic {
__le32 param0;
__le32 param1;
@@ -2084,10 +2091,10 @@ struct ice_aqc_add_txqs_perq {
__le16 txq_id;
u8 rsvd[2];
__le32 q_teid;
- u8 txq_ctx[22];
+ ice_txq_ctx_buf_t txq_ctx;
u8 rsvd2[2];
struct ice_aqc_txsched_elem info;
-};
+} __packed;
/* The format of the command buffer for Add Tx LAN Queues (0x0C30)
* is an array of the following structs. Please note that the length of
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 82a9cd4ec7ae..b2af8e3586f7 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -454,6 +454,9 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
/* Rx queue threshold in units of 64 */
rlan_ctx.lrxqthresh = 1;
+ /* Enable descriptor prefetch */
+ rlan_ctx.prefena = 1;
+
/* PF acts as uplink for switchdev; set flex descriptor with src_vsi
* metadata and flags to allow redirecting to PR netdev
*/
@@ -910,8 +913,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
/* copy context contents into the qg_buf */
qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
- ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
- ice_tlan_ctx_info);
+ ice_pack_txq_ctx(&tlan_ctx, &qg_buf->txqs[0].txq_ctx);
/* init queue specific tail reg. It is referred as
* transmit comm scheduler queue doorbell.
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 496d86cbd13f..f89bc6ede315 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -6,6 +6,7 @@
#include "ice_adminq_cmd.h"
#include "ice_flow.h"
#include "ice_ptp_hw.h"
+#include <linux/packing.h>
#define ICE_PF_RESET_WAIT_COUNT 300
#define ICE_MAX_NETLIST_SIZE 10
@@ -1360,39 +1361,31 @@ int ice_reset(struct ice_hw *hw, enum ice_reset_req req)
}
/**
- * ice_copy_rxq_ctx_to_hw
+ * ice_copy_rxq_ctx_to_hw - Copy packed Rx queue context to HW registers
* @hw: pointer to the hardware structure
- * @ice_rxq_ctx: pointer to the rxq context
+ * @rxq_ctx: pointer to the packed Rx queue context
* @rxq_index: the index of the Rx queue
- *
- * Copies rxq context from dense structure to HW register space
*/
-static int
-ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
+static void ice_copy_rxq_ctx_to_hw(struct ice_hw *hw,
+ const ice_rxq_ctx_buf_t *rxq_ctx,
+ u32 rxq_index)
{
- u8 i;
-
- if (!ice_rxq_ctx)
- return -EINVAL;
-
- if (rxq_index > QRX_CTRL_MAX_INDEX)
- return -EINVAL;
-
/* Copy each dword separately to HW */
- for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
- wr32(hw, QRX_CONTEXT(i, rxq_index),
- *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+ for (int i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
+ u32 ctx = ((const u32 *)rxq_ctx)[i];
- ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i,
- *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
- }
+ wr32(hw, QRX_CONTEXT(i, rxq_index), ctx);
- return 0;
+ ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, ctx);
+ }
}
+#define ICE_CTX_STORE(struct_name, struct_field, width, lsb) \
+ PACKED_FIELD((lsb) + (width) - 1, (lsb), struct struct_name, struct_field)
+
/* LAN Rx Queue Context */
-static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
- /* Field Width LSB */
+static const struct packed_field_u8 ice_rlan_ctx_fields[] = {
+ /* Field Width LSB */
ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0),
ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13),
ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32),
@@ -1413,35 +1406,50 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196),
ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198),
ICE_CTX_STORE(ice_rlan_ctx, prefena, 1, 201),
- { 0 }
};
/**
- * ice_write_rxq_ctx
+ * ice_pack_rxq_ctx - Pack Rx queue context into a HW buffer
+ * @ctx: the Rx queue context to pack
+ * @buf: the HW buffer to pack into
+ *
+ * Pack the Rx queue context from the CPU-friendly unpacked buffer into its
+ * bit-packed HW layout.
+ */
+static void ice_pack_rxq_ctx(const struct ice_rlan_ctx *ctx,
+ ice_rxq_ctx_buf_t *buf)
+{
+ pack_fields(buf, sizeof(*buf), ctx, ice_rlan_ctx_fields,
+ QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST);
+}
+
+/**
+ * ice_write_rxq_ctx - Write Rx Queue context to hardware
* @hw: pointer to the hardware structure
- * @rlan_ctx: pointer to the rxq context
+ * @rlan_ctx: pointer to the unpacked Rx queue context
* @rxq_index: the index of the Rx queue
*
- * Converts rxq context from sparse to dense structure and then writes
- * it to HW register space and enables the hardware to prefetch descriptors
- * instead of only fetching them on demand
+ * Pack the sparse Rx Queue context into dense hardware format and write it
+ * into the HW register space.
+ *
+ * Return: 0 on success, or -EINVAL if the Rx queue index is invalid.
*/
int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
u32 rxq_index)
{
- u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
+ ice_rxq_ctx_buf_t buf = {};
- if (!rlan_ctx)
+ if (rxq_index > QRX_CTRL_MAX_INDEX)
return -EINVAL;
- rlan_ctx->prefena = 1;
+ ice_pack_rxq_ctx(rlan_ctx, &buf);
+ ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index);
- ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
- return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
+ return 0;
}
/* LAN Tx Queue Context */
-const struct ice_ctx_ele ice_tlan_ctx_info[] = {
+static const struct packed_field_u8 ice_tlan_ctx_fields[] = {
/* Field Width LSB */
ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0),
ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57),
@@ -1470,10 +1478,22 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165),
ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166),
ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168),
- ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 122, 171),
- { 0 }
};
+/**
+ * ice_pack_txq_ctx - Pack Tx queue context into a HW buffer
+ * @ctx: the Tx queue context to pack
+ * @buf: the HW buffer to pack into
+ *
+ * Pack the Tx queue context from the CPU-friendly unpacked buffer into its
+ * bit-packed HW layout.
+ */
+void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf)
+{
+ pack_fields(buf, sizeof(*buf), ctx, ice_tlan_ctx_fields,
+ QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST);
+}
+
/* Sideband Queue command wrappers */
/**
@@ -4558,205 +4578,6 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
/* End of FW Admin Queue command wrappers */
/**
- * ice_pack_ctx_byte - write a byte to a packed context structure
- * @src_ctx: unpacked source context structure
- * @dest_ctx: packed destination context data
- * @ce_info: context element description
- */
-static void ice_pack_ctx_byte(u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info)
-{
- u8 src_byte, dest_byte, mask;
- u8 *from, *dest;
- u16 shift_width;
-
- /* copy from the next struct field */
- from = src_ctx + ce_info->offset;
-
- /* prepare the bits and mask */
- shift_width = ce_info->lsb % 8;
- mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
-
- src_byte = *from;
- src_byte <<= shift_width;
- src_byte &= mask;
-
- /* get the current bits from the target bit string */
- dest = dest_ctx + (ce_info->lsb / 8);
-
- memcpy(&dest_byte, dest, sizeof(dest_byte));
-
- dest_byte &= ~mask; /* get the bits not changing */
- dest_byte |= src_byte; /* add in the new bits */
-
- /* put it all back */
- memcpy(dest, &dest_byte, sizeof(dest_byte));
-}
-
-/**
- * ice_pack_ctx_word - write a word to a packed context structure
- * @src_ctx: unpacked source context structure
- * @dest_ctx: packed destination context data
- * @ce_info: context element description
- */
-static void ice_pack_ctx_word(u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info)
-{
- u16 src_word, mask;
- __le16 dest_word;
- u8 *from, *dest;
- u16 shift_width;
-
- /* copy from the next struct field */
- from = src_ctx + ce_info->offset;
-
- /* prepare the bits and mask */
- shift_width = ce_info->lsb % 8;
- mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
-
- /* don't swizzle the bits until after the mask because the mask bits
- * will be in a different bit position on big endian machines
- */
- src_word = *(u16 *)from;
- src_word <<= shift_width;
- src_word &= mask;
-
- /* get the current bits from the target bit string */
- dest = dest_ctx + (ce_info->lsb / 8);
-
- memcpy(&dest_word, dest, sizeof(dest_word));
-
- dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */
- dest_word |= cpu_to_le16(src_word); /* add in the new bits */
-
- /* put it all back */
- memcpy(dest, &dest_word, sizeof(dest_word));
-}
-
-/**
- * ice_pack_ctx_dword - write a dword to a packed context structure
- * @src_ctx: unpacked source context structure
- * @dest_ctx: packed destination context data
- * @ce_info: context element description
- */
-static void ice_pack_ctx_dword(u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info)
-{
- u32 src_dword, mask;
- __le32 dest_dword;
- u8 *from, *dest;
- u16 shift_width;
-
- /* copy from the next struct field */
- from = src_ctx + ce_info->offset;
-
- /* prepare the bits and mask */
- shift_width = ce_info->lsb % 8;
- mask = GENMASK(ce_info->width - 1 + shift_width, shift_width);
-
- /* don't swizzle the bits until after the mask because the mask bits
- * will be in a different bit position on big endian machines
- */
- src_dword = *(u32 *)from;
- src_dword <<= shift_width;
- src_dword &= mask;
-
- /* get the current bits from the target bit string */
- dest = dest_ctx + (ce_info->lsb / 8);
-
- memcpy(&dest_dword, dest, sizeof(dest_dword));
-
- dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */
- dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */
-
- /* put it all back */
- memcpy(dest, &dest_dword, sizeof(dest_dword));
-}
-
-/**
- * ice_pack_ctx_qword - write a qword to a packed context structure
- * @src_ctx: unpacked source context structure
- * @dest_ctx: packed destination context data
- * @ce_info: context element description
- */
-static void ice_pack_ctx_qword(u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info)
-{
- u64 src_qword, mask;
- __le64 dest_qword;
- u8 *from, *dest;
- u16 shift_width;
-
- /* copy from the next struct field */
- from = src_ctx + ce_info->offset;
-
- /* prepare the bits and mask */
- shift_width = ce_info->lsb % 8;
- mask = GENMASK_ULL(ce_info->width - 1 + shift_width, shift_width);
-
- /* don't swizzle the bits until after the mask because the mask bits
- * will be in a different bit position on big endian machines
- */
- src_qword = *(u64 *)from;
- src_qword <<= shift_width;
- src_qword &= mask;
-
- /* get the current bits from the target bit string */
- dest = dest_ctx + (ce_info->lsb / 8);
-
- memcpy(&dest_qword, dest, sizeof(dest_qword));
-
- dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */
- dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */
-
- /* put it all back */
- memcpy(dest, &dest_qword, sizeof(dest_qword));
-}
-
-/**
- * ice_set_ctx - set context bits in packed structure
- * @hw: pointer to the hardware structure
- * @src_ctx: pointer to a generic non-packed context structure
- * @dest_ctx: pointer to memory for the packed structure
- * @ce_info: List of Rx context elements
- */
-int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info)
-{
- int f;
-
- for (f = 0; ce_info[f].width; f++) {
- /* We have to deal with each element of the FW response
- * using the correct size so that we are correct regardless
- * of the endianness of the machine.
- */
- if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) {
- ice_debug(hw, ICE_DBG_QCTX, "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n",
- f, ce_info[f].width, ce_info[f].size_of);
- continue;
- }
- switch (ce_info[f].size_of) {
- case sizeof(u8):
- ice_pack_ctx_byte(src_ctx, dest_ctx, &ce_info[f]);
- break;
- case sizeof(u16):
- ice_pack_ctx_word(src_ctx, dest_ctx, &ce_info[f]);
- break;
- case sizeof(u32):
- ice_pack_ctx_dword(src_ctx, dest_ctx, &ce_info[f]);
- break;
- case sizeof(u64):
- ice_pack_ctx_qword(src_ctx, dest_ctx, &ce_info[f]);
- break;
- default:
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-/**
* ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC
* @hw: pointer to the HW struct
* @vsi_handle: software VSI handle
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 27208a60cece..a68bea3934e3 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -92,9 +92,8 @@ ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
-extern const struct ice_ctx_ele ice_tlan_ctx_info[];
-int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
- const struct ice_ctx_ele *ce_info);
+
+void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf);
extern struct mutex ice_global_cfg_lock_sw;
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 611577ebc29d..1479b45738af 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -371,29 +371,21 @@ enum ice_rx_flex_desc_status_error_1_bits {
ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */
};
-#define ICE_RXQ_CTX_SIZE_DWORDS 8
-#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
#define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS 22
#define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS 5
#define GLTCLAN_CQ_CNTX(i, CQ) (GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
-/* RLAN Rx queue context data
- *
- * The sizes of the variables may be larger than needed due to crossing byte
- * boundaries. If we do not have the width of the variable set to the correct
- * size then we could end up shifting bits off the top of the variable when the
- * variable is at the top of a byte and crosses over into the next byte.
- */
+/* RLAN Rx queue context data */
struct ice_rlan_ctx {
u16 head;
- u16 cpuid; /* bigger than needed, see above for reason */
+ u8 cpuid;
#define ICE_RLAN_BASE_S 7
u64 base;
u16 qlen;
#define ICE_RLAN_CTX_DBUF_S 7
- u16 dbuf; /* bigger than needed, see above for reason */
+ u8 dbuf;
#define ICE_RLAN_CTX_HBUF_S 6
- u16 hbuf; /* bigger than needed, see above for reason */
+ u8 hbuf;
u8 dtype;
u8 dsize;
u8 crcstrip;
@@ -401,29 +393,15 @@ struct ice_rlan_ctx {
u8 hsplit_0;
u8 hsplit_1;
u8 showiv;
- u32 rxmax; /* bigger than needed, see above for reason */
+ u16 rxmax;
u8 tphrdesc_ena;
u8 tphwdesc_ena;
u8 tphdata_ena;
u8 tphhead_ena;
- u16 lrxqthresh; /* bigger than needed, see above for reason */
+ u8 lrxqthresh;
u8 prefena; /* NOTE: normally must be set to 1 at init */
};
-struct ice_ctx_ele {
- u16 offset;
- u16 size_of;
- u16 width;
- u16 lsb;
-};
-
-#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \
- .offset = offsetof(struct _struct, _ele), \
- .size_of = sizeof_field(struct _struct, _ele), \
- .width = _width, \
- .lsb = _lsb, \
-}
-
/* for hsplit_0 field of Rx RLAN context */
enum ice_rlan_ctx_rx_hsplit_0 {
ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0,
@@ -551,18 +529,12 @@ enum ice_tx_ctx_desc_eipt_offload {
#define ICE_LAN_TXQ_MAX_QGRPS 127
#define ICE_LAN_TXQ_MAX_QDIS 1023
-/* Tx queue context data
- *
- * The sizes of the variables may be larger than needed due to crossing byte
- * boundaries. If we do not have the width of the variable set to the correct
- * size then we could end up shifting bits off the top of the variable when the
- * variable is at the top of a byte and crosses over into the next byte.
- */
+/* Tx queue context data */
struct ice_tlan_ctx {
#define ICE_TLAN_CTX_BASE_S 7
u64 base; /* base is defined in 128-byte units */
u8 port_num;
- u16 cgd_num; /* bigger than needed, see above for reason */
+ u8 cgd_num;
u8 pf_num;
u16 vmvf_num;
u8 vmvf_type;
@@ -573,7 +545,7 @@ struct ice_tlan_ctx {
u8 tsyn_ena;
u8 internal_usage_flag;
u8 alt_vlan;
- u16 cpuid; /* bigger than needed, see above for reason */
+ u8 cpuid;
u8 wb_mode;
u8 tphrd_desc;
u8 tphrd;
@@ -582,7 +554,7 @@ struct ice_tlan_ctx {
u16 qnum_in_func;
u8 itr_notification_mode;
u8 adjust_prof_id;
- u32 qlen; /* bigger than needed, see above for reason */
+ u16 qlen;
u8 quanta_prof_idx;
u8 tso_ena;
u16 tso_qnum;
@@ -590,7 +562,6 @@ struct ice_tlan_ctx {
u8 drop_ena;
u8 cache_prof_idx;
u8 pkt_shaper_prof_idx;
- u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */
};
#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 660dff5426e7..83ce3bfefa5c 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -90,7 +90,6 @@ struct ltq_etop_priv {
struct net_device *netdev;
struct platform_device *pdev;
struct ltq_eth_data *pldata;
- struct resource *res;
struct mii_bus *mii_bus;
@@ -643,31 +642,14 @@ ltq_etop_probe(struct platform_device *pdev)
{
struct net_device *dev;
struct ltq_etop_priv *priv;
- struct resource *res;
int err;
int i;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "failed to get etop resource\n");
- err = -ENOENT;
- goto err_out;
- }
-
- res = devm_request_mem_region(&pdev->dev, res->start,
- resource_size(res), dev_name(&pdev->dev));
- if (!res) {
- dev_err(&pdev->dev, "failed to request etop resource\n");
- err = -EBUSY;
- goto err_out;
- }
-
- ltq_etop_membase = devm_ioremap(&pdev->dev, res->start,
- resource_size(res));
- if (!ltq_etop_membase) {
+ ltq_etop_membase = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ltq_etop_membase)) {
dev_err(&pdev->dev, "failed to remap etop engine %d\n",
pdev->id);
- err = -ENOMEM;
+ err = PTR_ERR(ltq_etop_membase);
goto err_out;
}
@@ -679,7 +661,6 @@ ltq_etop_probe(struct platform_device *pdev)
dev->netdev_ops = &ltq_eth_netdev_ops;
dev->ethtool_ops = &ltq_etop_ethtool_ops;
priv = netdev_priv(dev);
- priv->res = res;
priv->pdev = pdev;
priv->pldata = dev_get_platdata(&pdev->dev);
priv->netdev = dev;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 1fb285fa0bdb..fe6261b81540 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3960,20 +3960,27 @@ static struct mvneta_port *mvneta_pcs_to_port(struct phylink_pcs *pcs)
return container_of(pcs, struct mvneta_port, phylink_pcs);
}
-static int mvneta_pcs_validate(struct phylink_pcs *pcs,
- unsigned long *supported,
- const struct phylink_link_state *state)
+static unsigned int mvneta_pcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
{
- /* We only support QSGMII, SGMII, 802.3z and RGMII modes.
- * When in 802.3z mode, we must have AN enabled:
+ /* When operating in an 802.3z mode, we must have AN enabled:
* "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
* When <PortType> = 1 (1000BASE-X) this field must be set to 1."
+ * Therefore, inband is "required".
*/
- if (phy_interface_mode_is_8023z(state->interface) &&
- !phylink_test(state->advertising, Autoneg))
- return -EINVAL;
+ if (phy_interface_mode_is_8023z(interface))
+ return LINK_INBAND_ENABLE;
- return 0;
+ /* QSGMII, SGMII and RGMII can be configured to use inband
+ * signalling of the AN result. Indicate these as "possible".
+ */
+ if (interface == PHY_INTERFACE_MODE_SGMII ||
+ interface == PHY_INTERFACE_MODE_QSGMII ||
+ phy_interface_mode_is_rgmii(interface))
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+
+ /* For any other modes, indicate that inband is not supported. */
+ return LINK_INBAND_DISABLE;
}
static void mvneta_pcs_get_state(struct phylink_pcs *pcs,
@@ -4071,7 +4078,7 @@ static void mvneta_pcs_an_restart(struct phylink_pcs *pcs)
}
static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = {
- .pcs_validate = mvneta_pcs_validate,
+ .pcs_inband_caps = mvneta_pcs_inband_caps,
.pcs_get_state = mvneta_pcs_get_state,
.pcs_config = mvneta_pcs_config,
.pcs_an_restart = mvneta_pcs_an_restart,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 571631a30320..f85229a30844 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6224,19 +6224,26 @@ static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = {
.pcs_config = mvpp2_xlg_pcs_config,
};
-static int mvpp2_gmac_pcs_validate(struct phylink_pcs *pcs,
- unsigned long *supported,
- const struct phylink_link_state *state)
+static unsigned int mvpp2_gmac_pcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
{
- /* When in 802.3z mode, we must have AN enabled:
+ /* When operating in an 802.3z mode, we must have AN enabled:
* Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
* When <PortType> = 1 (1000BASE-X) this field must be set to 1.
+ * Therefore, inband is "required".
*/
- if (phy_interface_mode_is_8023z(state->interface) &&
- !phylink_test(state->advertising, Autoneg))
- return -EINVAL;
+ if (phy_interface_mode_is_8023z(interface))
+ return LINK_INBAND_ENABLE;
- return 0;
+ /* SGMII and RGMII can be configured to use inband signalling of the
+ * AN result. Indicate these as "possible".
+ */
+ if (interface == PHY_INTERFACE_MODE_SGMII ||
+ phy_interface_mode_is_rgmii(interface))
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+
+ /* For any other modes, indicate that inband is not supported. */
+ return LINK_INBAND_DISABLE;
}
static void mvpp2_gmac_pcs_get_state(struct phylink_pcs *pcs,
@@ -6343,7 +6350,7 @@ static void mvpp2_gmac_pcs_an_restart(struct phylink_pcs *pcs)
}
static const struct phylink_pcs_ops mvpp2_phylink_gmac_pcs_ops = {
- .pcs_validate = mvpp2_gmac_pcs_validate,
+ .pcs_inband_caps = mvpp2_gmac_pcs_inband_caps,
.pcs_get_state = mvpp2_gmac_pcs_get_state,
.pcs_config = mvpp2_gmac_pcs_config,
.pcs_an_restart = mvpp2_gmac_pcs_an_restart,
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 549436efc204..3a9825883d79 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -1137,6 +1137,43 @@ static int octep_set_features(struct net_device *dev, netdev_features_t features
return err;
}
+static int octep_get_vf_config(struct net_device *dev, int vf,
+ struct ifla_vf_info *ivi)
+{
+ struct octep_device *oct = netdev_priv(dev);
+
+ ivi->vf = vf;
+ ether_addr_copy(ivi->mac, oct->vf_info[vf].mac_addr);
+ ivi->spoofchk = true;
+ ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+ ivi->trusted = false;
+
+ return 0;
+}
+
+static int octep_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct octep_device *oct = netdev_priv(dev);
+ int err;
+
+ if (!is_valid_ether_addr(mac)) {
+ dev_err(&oct->pdev->dev, "Invalid MAC Address %pM\n", mac);
+ return -EADDRNOTAVAIL;
+ }
+
+ dev_dbg(&oct->pdev->dev, "set vf-%d mac to %pM\n", vf, mac);
+ ether_addr_copy(oct->vf_info[vf].mac_addr, mac);
+ oct->vf_info[vf].flags |= OCTEON_PFVF_FLAG_MAC_SET_BY_PF;
+
+ err = octep_ctrl_net_set_mac_addr(oct, vf, mac, true);
+ if (err)
+ dev_err(&oct->pdev->dev,
+ "Set VF%d MAC address failed via host control Mbox\n",
+ vf);
+
+ return err;
+}
+
static const struct net_device_ops octep_netdev_ops = {
.ndo_open = octep_open,
.ndo_stop = octep_stop,
@@ -1146,6 +1183,8 @@ static const struct net_device_ops octep_netdev_ops = {
.ndo_set_mac_address = octep_set_mac,
.ndo_change_mtu = octep_change_mtu,
.ndo_set_features = octep_set_features,
+ .ndo_get_vf_config = octep_get_vf_config,
+ .ndo_set_vf_mac = octep_set_vf_mac
};
/**
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index fee59e0e0138..3b56916af468 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -220,6 +220,7 @@ struct octep_iface_link_info {
/* The Octeon VF device specific info data structure.*/
struct octep_pfvf_info {
u8 mac_addr[ETH_ALEN];
+ u32 flags;
u32 mbox_version;
};
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
index e6eb98d70f3c..ebecdd29f3bd 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
@@ -156,12 +156,23 @@ static void octep_pfvf_set_mac_addr(struct octep_device *oct, u32 vf_id,
{
int err;
+ if (oct->vf_info[vf_id].flags & OCTEON_PFVF_FLAG_MAC_SET_BY_PF) {
+ dev_err(&oct->pdev->dev,
+ "VF%d attempted to override administrative set MAC address\n",
+ vf_id);
+ rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK;
+ return;
+ }
+
err = octep_ctrl_net_set_mac_addr(oct, vf_id, cmd.s_set_mac.mac_addr, true);
if (err) {
rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK;
- dev_err(&oct->pdev->dev, "Set VF MAC address failed via host control Mbox\n");
+ dev_err(&oct->pdev->dev, "Set VF%d MAC address failed via host control Mbox\n",
+ vf_id);
return;
}
+
+ ether_addr_copy(oct->vf_info[vf_id].mac_addr, cmd.s_set_mac.mac_addr);
rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK;
}
@@ -171,10 +182,18 @@ static void octep_pfvf_get_mac_addr(struct octep_device *oct, u32 vf_id,
{
int err;
+ if (oct->vf_info[vf_id].flags & OCTEON_PFVF_FLAG_MAC_SET_BY_PF) {
+ dev_dbg(&oct->pdev->dev, "VF%d MAC address set by PF\n", vf_id);
+ ether_addr_copy(rsp->s_set_mac.mac_addr,
+ oct->vf_info[vf_id].mac_addr);
+ rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK;
+ return;
+ }
err = octep_ctrl_net_get_mac_addr(oct, vf_id, rsp->s_set_mac.mac_addr);
if (err) {
rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK;
- dev_err(&oct->pdev->dev, "Get VF MAC address failed via host control Mbox\n");
+ dev_err(&oct->pdev->dev, "Get VF%d MAC address failed via host control Mbox\n",
+ vf_id);
return;
}
rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h
index 0dc6eead292a..386a095a99bc 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h
@@ -8,8 +8,6 @@
#ifndef _OCTEP_PFVF_MBOX_H_
#define _OCTEP_PFVF_MBOX_H_
-/* VF flags */
-#define OCTEON_PFVF_FLAG_MAC_SET_BY_PF BIT_ULL(0) /* PF has set VF MAC address */
#define OCTEON_SDP_16K_HW_FRS 16380UL
#define OCTEON_SDP_64K_HW_FRS 65531UL
@@ -23,6 +21,10 @@ enum octep_pfvf_mbox_version {
#define OCTEP_PFVF_MBOX_VERSION_CURRENT OCTEP_PFVF_MBOX_VERSION_V2
+/* VF flags */
+/* PF has set VF MAC address */
+#define OCTEON_PFVF_FLAG_MAC_SET_BY_PF BIT(0)
+
enum octep_pfvf_mbox_opcode {
OCTEP_PFVF_MBOX_CMD_VERSION,
OCTEP_PFVF_MBOX_CMD_SET_MTU,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 62c07407eb94..005ca8a056c0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -313,6 +313,10 @@ M(NIX_BANDPROF_FREE, 0x801e, nix_bandprof_free, nix_bandprof_free_req, \
msg_rsp) \
M(NIX_BANDPROF_GET_HWINFO, 0x801f, nix_bandprof_get_hwinfo, msg_req, \
nix_bandprof_get_hwinfo_rsp) \
+M(NIX_CPT_BP_ENABLE, 0x8020, nix_cpt_bp_enable, nix_bp_cfg_req, \
+ nix_bp_cfg_rsp) \
+M(NIX_CPT_BP_DISABLE, 0x8021, nix_cpt_bp_disable, nix_bp_cfg_req, \
+ msg_rsp) \
M(NIX_READ_INLINE_IPSEC_CFG, 0x8023, nix_read_inline_ipsec_cfg, \
msg_req, nix_inline_ipsec_cfg) \
M(NIX_MCAST_GRP_CREATE, 0x802b, nix_mcast_grp_create, nix_mcast_grp_create_req, \
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index a5d1e2bddd58..613655fcd34f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -569,9 +569,17 @@ void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc)
mutex_unlock(&rvu->rsrc_lock);
}
-int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
- struct nix_bp_cfg_req *req,
- struct msg_rsp *rsp)
+static u16 nix_get_channel(u16 chan, bool cpt_link)
+{
+ /* CPT channel for a given link channel is always
+ * assumed to be BIT(11) set in link channel.
+ */
+ return cpt_link ? chan | BIT(11) : chan;
+}
+
+static int nix_bp_disable(struct rvu *rvu,
+ struct nix_bp_cfg_req *req,
+ struct msg_rsp *rsp, bool cpt_link)
{
u16 pcifunc = req->hdr.pcifunc;
int blkaddr, pf, type, err;
@@ -579,6 +587,7 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
struct rvu_pfvf *pfvf;
struct nix_hw *nix_hw;
struct nix_bp *bp;
+ u16 chan_v;
u64 cfg;
pf = rvu_get_pf(pcifunc);
@@ -589,6 +598,9 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
if (is_sdp_pfvf(pcifunc))
type = NIX_INTF_TYPE_SDP;
+ if (cpt_link && !rvu->hw->cpt_links)
+ return 0;
+
pfvf = rvu_get_pfvf(rvu, pcifunc);
err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
if (err)
@@ -597,8 +609,9 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
bp = &nix_hw->bp;
chan_base = pfvf->rx_chan_base + req->chan_base;
for (chan = chan_base; chan < (chan_base + req->chan_cnt); chan++) {
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan));
- rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan),
+ chan_v = nix_get_channel(chan, cpt_link);
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v));
+ rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v),
cfg & ~BIT_ULL(16));
if (type == NIX_INTF_TYPE_LBK) {
@@ -617,6 +630,20 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
return 0;
}
+int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
+ struct nix_bp_cfg_req *req,
+ struct msg_rsp *rsp)
+{
+ return nix_bp_disable(rvu, req, rsp, false);
+}
+
+int rvu_mbox_handler_nix_cpt_bp_disable(struct rvu *rvu,
+ struct nix_bp_cfg_req *req,
+ struct msg_rsp *rsp)
+{
+ return nix_bp_disable(rvu, req, rsp, true);
+}
+
static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
int type, int chan_id)
{
@@ -696,15 +723,17 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
return bpid;
}
-int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
- struct nix_bp_cfg_req *req,
- struct nix_bp_cfg_rsp *rsp)
+static int nix_bp_enable(struct rvu *rvu,
+ struct nix_bp_cfg_req *req,
+ struct nix_bp_cfg_rsp *rsp,
+ bool cpt_link)
{
int blkaddr, pf, type, chan_id = 0;
u16 pcifunc = req->hdr.pcifunc;
struct rvu_pfvf *pfvf;
u16 chan_base, chan;
s16 bpid, bpid_base;
+ u16 chan_v;
u64 cfg;
pf = rvu_get_pf(pcifunc);
@@ -717,6 +746,9 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
type != NIX_INTF_TYPE_SDP)
return 0;
+ if (cpt_link && !rvu->hw->cpt_links)
+ return 0;
+
pfvf = rvu_get_pfvf(rvu, pcifunc);
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
@@ -730,9 +762,11 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
return -EINVAL;
}
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan));
+ chan_v = nix_get_channel(chan, cpt_link);
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v));
cfg &= ~GENMASK_ULL(8, 0);
- rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan),
+ rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v),
cfg | (bpid & GENMASK_ULL(8, 0)) | BIT_ULL(16));
chan_id++;
bpid = rvu_nix_get_bpid(rvu, req, type, chan_id);
@@ -750,6 +784,20 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
return 0;
}
+int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
+ struct nix_bp_cfg_req *req,
+ struct nix_bp_cfg_rsp *rsp)
+{
+ return nix_bp_enable(rvu, req, rsp, false);
+}
+
+int rvu_mbox_handler_nix_cpt_bp_enable(struct rvu *rvu,
+ struct nix_bp_cfg_req *req,
+ struct nix_bp_cfg_rsp *rsp)
+{
+ return nix_bp_enable(rvu, req, rsp, true);
+}
+
static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr,
u64 format, bool v4, u64 *fidx)
{
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index dbc971266865..cb6513ab35e7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -15,5 +15,6 @@ rvu_rep-y := rep.o
rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
rvu_nicpf-$(CONFIG_MACSEC) += cn10k_macsec.o
+rvu_nicpf-$(CONFIG_XFRM_OFFLOAD) += cn10k_ipsec.o
ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c
new file mode 100644
index 000000000000..09a5b5268205
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell IPSEC offload driver
+ *
+ * Copyright (C) 2024 Marvell.
+ */
+
+#include <net/xfrm.h>
+#include <linux/netdevice.h>
+#include <linux/bitfield.h>
+#include <crypto/aead.h>
+#include <crypto/gcm.h>
+
+#include "otx2_common.h"
+#include "otx2_struct.h"
+#include "cn10k_ipsec.h"
+
+static bool is_dev_support_ipsec_offload(struct pci_dev *pdev)
+{
+ return is_dev_cn10ka_b0(pdev) || is_dev_cn10kb(pdev);
+}
+
+static bool cn10k_cpt_device_set_inuse(struct otx2_nic *pf)
+{
+ enum cn10k_cpt_hw_state_e state;
+
+ while (true) {
+ state = atomic_cmpxchg(&pf->ipsec.cpt_state,
+ CN10K_CPT_HW_AVAILABLE,
+ CN10K_CPT_HW_IN_USE);
+ if (state == CN10K_CPT_HW_AVAILABLE)
+ return true;
+ if (state == CN10K_CPT_HW_UNAVAILABLE)
+ return false;
+
+ mdelay(1);
+ }
+}
+
+static void cn10k_cpt_device_set_available(struct otx2_nic *pf)
+{
+ atomic_set(&pf->ipsec.cpt_state, CN10K_CPT_HW_AVAILABLE);
+}
+
+static void cn10k_cpt_device_set_unavailable(struct otx2_nic *pf)
+{
+ atomic_set(&pf->ipsec.cpt_state, CN10K_CPT_HW_UNAVAILABLE);
+}
+
+static int cn10k_outb_cptlf_attach(struct otx2_nic *pf)
+{
+ struct rsrc_attach *attach;
+ int ret = -ENOMEM;
+
+ mutex_lock(&pf->mbox.lock);
+ /* Get memory to put this msg */
+ attach = otx2_mbox_alloc_msg_attach_resources(&pf->mbox);
+ if (!attach)
+ goto unlock;
+
+ attach->cptlfs = true;
+ attach->modify = true;
+
+ /* Send attach request to AF */
+ ret = otx2_sync_mbox_msg(&pf->mbox);
+
+unlock:
+ mutex_unlock(&pf->mbox.lock);
+ return ret;
+}
+
+static int cn10k_outb_cptlf_detach(struct otx2_nic *pf)
+{
+ struct rsrc_detach *detach;
+ int ret = -ENOMEM;
+
+ mutex_lock(&pf->mbox.lock);
+ detach = otx2_mbox_alloc_msg_detach_resources(&pf->mbox);
+ if (!detach)
+ goto unlock;
+
+ detach->partial = true;
+ detach->cptlfs = true;
+
+ /* Send detach request to AF */
+ ret = otx2_sync_mbox_msg(&pf->mbox);
+
+unlock:
+ mutex_unlock(&pf->mbox.lock);
+ return ret;
+}
+
+static int cn10k_outb_cptlf_alloc(struct otx2_nic *pf)
+{
+ struct cpt_lf_alloc_req_msg *req;
+ int ret = -ENOMEM;
+
+ mutex_lock(&pf->mbox.lock);
+ req = otx2_mbox_alloc_msg_cpt_lf_alloc(&pf->mbox);
+ if (!req)
+ goto unlock;
+
+ /* PF function */
+ req->nix_pf_func = pf->pcifunc;
+ /* Enable SE-IE Engine Group */
+ req->eng_grpmsk = 1 << CN10K_DEF_CPT_IPSEC_EGRP;
+
+ ret = otx2_sync_mbox_msg(&pf->mbox);
+
+unlock:
+ mutex_unlock(&pf->mbox.lock);
+ return ret;
+}
+
+static void cn10k_outb_cptlf_free(struct otx2_nic *pf)
+{
+ mutex_lock(&pf->mbox.lock);
+ otx2_mbox_alloc_msg_cpt_lf_free(&pf->mbox);
+ otx2_sync_mbox_msg(&pf->mbox);
+ mutex_unlock(&pf->mbox.lock);
+}
+
+static int cn10k_outb_cptlf_config(struct otx2_nic *pf)
+{
+ struct cpt_inline_ipsec_cfg_msg *req;
+ int ret = -ENOMEM;
+
+ mutex_lock(&pf->mbox.lock);
+ req = otx2_mbox_alloc_msg_cpt_inline_ipsec_cfg(&pf->mbox);
+ if (!req)
+ goto unlock;
+
+ req->dir = CPT_INLINE_OUTBOUND;
+ req->enable = 1;
+ req->nix_pf_func = pf->pcifunc;
+ ret = otx2_sync_mbox_msg(&pf->mbox);
+unlock:
+ mutex_unlock(&pf->mbox.lock);
+ return ret;
+}
+
+static void cn10k_outb_cptlf_iq_enable(struct otx2_nic *pf)
+{
+ u64 reg_val;
+
+ /* Set Execution Enable of instruction queue */
+ reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG);
+ reg_val |= BIT_ULL(16);
+ otx2_write64(pf, CN10K_CPT_LF_INPROG, reg_val);
+
+ /* Set iqueue's enqueuing */
+ reg_val = otx2_read64(pf, CN10K_CPT_LF_CTL);
+ reg_val |= BIT_ULL(0);
+ otx2_write64(pf, CN10K_CPT_LF_CTL, reg_val);
+}
+
+static void cn10k_outb_cptlf_iq_disable(struct otx2_nic *pf)
+{
+ u32 inflight, grb_cnt, gwb_cnt;
+ u32 nq_ptr, dq_ptr;
+ int timeout = 20;
+ u64 reg_val;
+ int cnt;
+
+ /* Disable instructions enqueuing */
+ otx2_write64(pf, CN10K_CPT_LF_CTL, 0ull);
+
+ /* Wait for instruction queue to become empty.
+ * CPT_LF_INPROG.INFLIGHT count is zero
+ */
+ do {
+ reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG);
+ inflight = FIELD_GET(CPT_LF_INPROG_INFLIGHT, reg_val);
+ if (!inflight)
+ break;
+
+ usleep_range(10000, 20000);
+ if (timeout-- < 0) {
+ netdev_err(pf->netdev, "Timeout to cleanup CPT IQ\n");
+ break;
+ }
+ } while (1);
+
+ /* Disable executions in the LF's queue,
+ * the queue should be empty at this point
+ */
+ reg_val &= ~BIT_ULL(16);
+ otx2_write64(pf, CN10K_CPT_LF_INPROG, reg_val);
+
+ /* Wait for instruction queue to become empty */
+ cnt = 0;
+ do {
+ reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG);
+ if (reg_val & BIT_ULL(31))
+ cnt = 0;
+ else
+ cnt++;
+ reg_val = otx2_read64(pf, CN10K_CPT_LF_Q_GRP_PTR);
+ nq_ptr = FIELD_GET(CPT_LF_Q_GRP_PTR_DQ_PTR, reg_val);
+ dq_ptr = FIELD_GET(CPT_LF_Q_GRP_PTR_DQ_PTR, reg_val);
+ } while ((cnt < 10) && (nq_ptr != dq_ptr));
+
+ cnt = 0;
+ do {
+ reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG);
+ inflight = FIELD_GET(CPT_LF_INPROG_INFLIGHT, reg_val);
+ grb_cnt = FIELD_GET(CPT_LF_INPROG_GRB_CNT, reg_val);
+ gwb_cnt = FIELD_GET(CPT_LF_INPROG_GWB_CNT, reg_val);
+ if (inflight == 0 && gwb_cnt < 40 &&
+ (grb_cnt == 0 || grb_cnt == 40))
+ cnt++;
+ else
+ cnt = 0;
+ } while (cnt < 10);
+}
+
+/* Allocate memory for CPT outbound Instruction queue.
+ * Instruction queue memory format is:
+ * -----------------------------
+ * | Instruction Group memory |
+ * | (CPT_LF_Q_SIZE[SIZE_DIV40] |
+ * | x 16 Bytes) |
+ * | |
+ * ----------------------------- <-- CPT_LF_Q_BASE[ADDR]
+ * | Flow Control (128 Bytes) |
+ * | |
+ * -----------------------------
+ * | Instruction Memory |
+ * | (CPT_LF_Q_SIZE[SIZE_DIV40] |
+ * | × 40 × 64 bytes) |
+ * | |
+ * -----------------------------
+ */
+static int cn10k_outb_cptlf_iq_alloc(struct otx2_nic *pf)
+{
+ struct cn10k_cpt_inst_queue *iq = &pf->ipsec.iq;
+
+ iq->size = CN10K_CPT_INST_QLEN_BYTES + CN10K_CPT_Q_FC_LEN +
+ CN10K_CPT_INST_GRP_QLEN_BYTES + OTX2_ALIGN;
+
+ iq->real_vaddr = dma_alloc_coherent(pf->dev, iq->size,
+ &iq->real_dma_addr, GFP_KERNEL);
+ if (!iq->real_vaddr)
+ return -ENOMEM;
+
+ /* iq->vaddr/dma_addr points to Flow Control location */
+ iq->vaddr = iq->real_vaddr + CN10K_CPT_INST_GRP_QLEN_BYTES;
+ iq->dma_addr = iq->real_dma_addr + CN10K_CPT_INST_GRP_QLEN_BYTES;
+
+ /* Align pointers */
+ iq->vaddr = PTR_ALIGN(iq->vaddr, OTX2_ALIGN);
+ iq->dma_addr = PTR_ALIGN(iq->dma_addr, OTX2_ALIGN);
+ return 0;
+}
+
+static void cn10k_outb_cptlf_iq_free(struct otx2_nic *pf)
+{
+ struct cn10k_cpt_inst_queue *iq = &pf->ipsec.iq;
+
+ if (iq->real_vaddr)
+ dma_free_coherent(pf->dev, iq->size, iq->real_vaddr,
+ iq->real_dma_addr);
+
+ iq->real_vaddr = NULL;
+ iq->vaddr = NULL;
+}
+
+static int cn10k_outb_cptlf_iq_init(struct otx2_nic *pf)
+{
+ u64 reg_val;
+ int ret;
+
+ /* Allocate Memory for CPT IQ */
+ ret = cn10k_outb_cptlf_iq_alloc(pf);
+ if (ret)
+ return ret;
+
+ /* Disable IQ */
+ cn10k_outb_cptlf_iq_disable(pf);
+
+ /* Set IQ base address */
+ otx2_write64(pf, CN10K_CPT_LF_Q_BASE, pf->ipsec.iq.dma_addr);
+
+ /* Set IQ size */
+ reg_val = FIELD_PREP(CPT_LF_Q_SIZE_DIV40, CN10K_CPT_SIZE_DIV40 +
+ CN10K_CPT_EXTRA_SIZE_DIV40);
+ otx2_write64(pf, CN10K_CPT_LF_Q_SIZE, reg_val);
+
+ return 0;
+}
+
+static int cn10k_outb_cptlf_init(struct otx2_nic *pf)
+{
+ int ret;
+
+ /* Initialize CPTLF Instruction Queue (IQ) */
+ ret = cn10k_outb_cptlf_iq_init(pf);
+ if (ret)
+ return ret;
+
+ /* Configure CPTLF for outbound ipsec offload */
+ ret = cn10k_outb_cptlf_config(pf);
+ if (ret)
+ goto iq_clean;
+
+ /* Enable CPTLF IQ */
+ cn10k_outb_cptlf_iq_enable(pf);
+ return 0;
+iq_clean:
+ cn10k_outb_cptlf_iq_free(pf);
+ return ret;
+}
+
+static int cn10k_outb_cpt_init(struct net_device *netdev)
+{
+ struct otx2_nic *pf = netdev_priv(netdev);
+ int ret;
+
+ /* Attach a CPT LF for outbound ipsec offload */
+ ret = cn10k_outb_cptlf_attach(pf);
+ if (ret)
+ return ret;
+
+ /* Allocate a CPT LF for outbound ipsec offload */
+ ret = cn10k_outb_cptlf_alloc(pf);
+ if (ret)
+ goto detach;
+
+ /* Initialize the CPTLF for outbound ipsec offload */
+ ret = cn10k_outb_cptlf_init(pf);
+ if (ret)
+ goto lf_free;
+
+ pf->ipsec.io_addr = (__force u64)otx2_get_regaddr(pf,
+ CN10K_CPT_LF_NQX(0));
+
+ /* Set ipsec offload enabled for this device */
+ pf->flags |= OTX2_FLAG_IPSEC_OFFLOAD_ENABLED;
+
+ cn10k_cpt_device_set_available(pf);
+ return 0;
+
+lf_free:
+ cn10k_outb_cptlf_free(pf);
+detach:
+ cn10k_outb_cptlf_detach(pf);
+ return ret;
+}
+
+static int cn10k_outb_cpt_clean(struct otx2_nic *pf)
+{
+ int ret;
+
+ if (!cn10k_cpt_device_set_inuse(pf)) {
+ netdev_err(pf->netdev, "CPT LF device unavailable\n");
+ return -ENODEV;
+ }
+
+ /* Set ipsec offload disabled for this device */
+ pf->flags &= ~OTX2_FLAG_IPSEC_OFFLOAD_ENABLED;
+
+ /* Disable CPTLF Instruction Queue (IQ) */
+ cn10k_outb_cptlf_iq_disable(pf);
+
+ /* Set IQ base address and size to 0 */
+ otx2_write64(pf, CN10K_CPT_LF_Q_BASE, 0);
+ otx2_write64(pf, CN10K_CPT_LF_Q_SIZE, 0);
+
+ /* Free CPTLF IQ */
+ cn10k_outb_cptlf_iq_free(pf);
+
+ /* Free and detach CPT LF */
+ cn10k_outb_cptlf_free(pf);
+ ret = cn10k_outb_cptlf_detach(pf);
+ if (ret)
+ netdev_err(pf->netdev, "Failed to detach CPT LF\n");
+
+ cn10k_cpt_device_set_unavailable(pf);
+ return ret;
+}
+
+static void cn10k_cpt_inst_flush(struct otx2_nic *pf, struct cpt_inst_s *inst,
+ u64 size)
+{
+ struct otx2_lmt_info *lmt_info;
+ u64 val = 0, tar_addr = 0;
+
+ lmt_info = per_cpu_ptr(pf->hw.lmt_info, smp_processor_id());
+ /* FIXME: val[0:10] LMT_ID.
+ * [12:15] no of LMTST - 1 in the burst.
+ * [19:63] data size of each LMTST in the burst except first.
+ */
+ val = (lmt_info->lmt_id & 0x7FF);
+ /* Target address for LMTST flush tells HW how many 128bit
+ * words are present.
+ * tar_addr[6:4] size of first LMTST - 1 in units of 128b.
+ */
+ tar_addr |= pf->ipsec.io_addr | (((size / 16) - 1) & 0x7) << 4;
+ dma_wmb();
+ memcpy((u64 *)lmt_info->lmt_addr, inst, size);
+ cn10k_lmt_flush(val, tar_addr);
+}
+
+static int cn10k_wait_for_cpt_respose(struct otx2_nic *pf,
+ struct cpt_res_s *res)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(100);
+ u64 *completion_ptr = (u64 *)res;
+
+ do {
+ if (time_after(jiffies, timeout)) {
+ netdev_err(pf->netdev, "CPT response timeout\n");
+ return -EBUSY;
+ }
+ } while ((READ_ONCE(*completion_ptr) & CN10K_CPT_COMP_E_MASK) ==
+ CN10K_CPT_COMP_E_NOTDONE);
+
+ if (!(res->compcode == CN10K_CPT_COMP_E_GOOD ||
+ res->compcode == CN10K_CPT_COMP_E_WARN) || res->uc_compcode) {
+ netdev_err(pf->netdev, "compcode=%x doneint=%x\n",
+ res->compcode, res->doneint);
+ netdev_err(pf->netdev, "uc_compcode=%x uc_info=%llx esn=%llx\n",
+ res->uc_compcode, (u64)res->uc_info, res->esn);
+ }
+ return 0;
+}
+
+static int cn10k_outb_write_sa(struct otx2_nic *pf, struct qmem *sa_info)
+{
+ dma_addr_t res_iova, dptr_iova, sa_iova;
+ struct cn10k_tx_sa_s *sa_dptr;
+ struct cpt_inst_s inst = {};
+ struct cpt_res_s *res;
+ u32 sa_size, off;
+ u64 *sptr, *dptr;
+ u64 reg_val;
+ int ret;
+
+ sa_iova = sa_info->iova;
+ if (!sa_iova)
+ return -EINVAL;
+
+ res = dma_alloc_coherent(pf->dev, sizeof(struct cpt_res_s),
+ &res_iova, GFP_ATOMIC);
+ if (!res)
+ return -ENOMEM;
+
+ sa_size = sizeof(struct cn10k_tx_sa_s);
+ sa_dptr = dma_alloc_coherent(pf->dev, sa_size, &dptr_iova, GFP_ATOMIC);
+ if (!sa_dptr) {
+ dma_free_coherent(pf->dev, sizeof(struct cpt_res_s), res,
+ res_iova);
+ return -ENOMEM;
+ }
+
+ sptr = (__force u64 *)sa_info->base;
+ dptr = (__force u64 *)sa_dptr;
+ for (off = 0; off < (sa_size / 8); off++)
+ *(dptr + off) = (__force u64)cpu_to_be64(*(sptr + off));
+
+ res->compcode = CN10K_CPT_COMP_E_NOTDONE;
+ inst.res_addr = res_iova;
+ inst.dptr = (u64)dptr_iova;
+ inst.param2 = sa_size >> 3;
+ inst.dlen = sa_size;
+ inst.opcode_major = CN10K_IPSEC_MAJOR_OP_WRITE_SA;
+ inst.opcode_minor = CN10K_IPSEC_MINOR_OP_WRITE_SA;
+ inst.cptr = sa_iova;
+ inst.ctx_val = 1;
+ inst.egrp = CN10K_DEF_CPT_IPSEC_EGRP;
+
+ /* Check if CPT-LF available */
+ if (!cn10k_cpt_device_set_inuse(pf)) {
+ ret = -ENODEV;
+ goto free_mem;
+ }
+
+ cn10k_cpt_inst_flush(pf, &inst, sizeof(struct cpt_inst_s));
+ dma_wmb();
+ ret = cn10k_wait_for_cpt_respose(pf, res);
+ if (ret)
+ goto set_available;
+
+ /* Trigger CTX flush to write dirty data back to DRAM */
+ reg_val = FIELD_PREP(CPT_LF_CTX_FLUSH, sa_iova >> 7);
+ otx2_write64(pf, CN10K_CPT_LF_CTX_FLUSH, reg_val);
+
+set_available:
+ cn10k_cpt_device_set_available(pf);
+free_mem:
+ dma_free_coherent(pf->dev, sa_size, sa_dptr, dptr_iova);
+ dma_free_coherent(pf->dev, sizeof(struct cpt_res_s), res, res_iova);
+ return ret;
+}
+
+static int cn10k_ipsec_get_hw_ctx_offset(void)
+{
+ /* Offset on Hardware-context offset in word */
+ return (offsetof(struct cn10k_tx_sa_s, hw_ctx) / sizeof(u64)) & 0x7F;
+}
+
+static int cn10k_ipsec_get_ctx_push_size(void)
+{
+ /* Context push size is round up and in multiple of 8 Byte */
+ return (roundup(offsetof(struct cn10k_tx_sa_s, hw_ctx), 8) / 8) & 0x7F;
+}
+
+static int cn10k_ipsec_get_aes_key_len(int key_len)
+{
+ /* key_len is aes key length in bytes */
+ switch (key_len) {
+ case 16:
+ return CN10K_IPSEC_SA_AES_KEY_LEN_128;
+ case 24:
+ return CN10K_IPSEC_SA_AES_KEY_LEN_192;
+ default:
+ return CN10K_IPSEC_SA_AES_KEY_LEN_256;
+ }
+}
+
+static void cn10k_outb_prepare_sa(struct xfrm_state *x,
+ struct cn10k_tx_sa_s *sa_entry)
+{
+ int key_len = (x->aead->alg_key_len + 7) / 8;
+ struct net_device *netdev = x->xso.dev;
+ u8 *key = x->aead->alg_key;
+ struct otx2_nic *pf;
+ u32 *tmp_salt;
+ u64 *tmp_key;
+ int idx;
+
+ memset(sa_entry, 0, sizeof(struct cn10k_tx_sa_s));
+
+ /* context size, 128 Byte aligned up */
+ pf = netdev_priv(netdev);
+ sa_entry->ctx_size = (pf->ipsec.sa_size / OTX2_ALIGN) & 0xF;
+ sa_entry->hw_ctx_off = cn10k_ipsec_get_hw_ctx_offset();
+ sa_entry->ctx_push_size = cn10k_ipsec_get_ctx_push_size();
+
+ /* Ucode to skip two words of CPT_CTX_HW_S */
+ sa_entry->ctx_hdr_size = 1;
+
+ /* Allow Atomic operation (AOP) */
+ sa_entry->aop_valid = 1;
+
+ /* Outbound, ESP TRANSPORT/TUNNEL Mode, AES-GCM with */
+ sa_entry->sa_dir = CN10K_IPSEC_SA_DIR_OUTB;
+ sa_entry->ipsec_protocol = CN10K_IPSEC_SA_IPSEC_PROTO_ESP;
+ sa_entry->enc_type = CN10K_IPSEC_SA_ENCAP_TYPE_AES_GCM;
+ sa_entry->iv_src = CN10K_IPSEC_SA_IV_SRC_PACKET;
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ sa_entry->ipsec_mode = CN10K_IPSEC_SA_IPSEC_MODE_TUNNEL;
+ else
+ sa_entry->ipsec_mode = CN10K_IPSEC_SA_IPSEC_MODE_TRANSPORT;
+
+ /* Last 4 bytes are salt */
+ key_len -= 4;
+ sa_entry->aes_key_len = cn10k_ipsec_get_aes_key_len(key_len);
+ memcpy(sa_entry->cipher_key, key, key_len);
+ tmp_key = (u64 *)sa_entry->cipher_key;
+
+ for (idx = 0; idx < key_len / 8; idx++)
+ tmp_key[idx] = (__force u64)cpu_to_be64(tmp_key[idx]);
+
+ memcpy(&sa_entry->iv_gcm_salt, key + key_len, 4);
+ tmp_salt = (u32 *)&sa_entry->iv_gcm_salt;
+ *tmp_salt = (__force u32)cpu_to_be32(*tmp_salt);
+
+ /* Write SA context data to memory before enabling */
+ wmb();
+
+ /* Enable SA */
+ sa_entry->sa_valid = 1;
+}
+
+static int cn10k_ipsec_validate_state(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
+{
+ if (x->props.aalgo != SADB_AALG_NONE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload authenticated xfrm states");
+ return -EINVAL;
+ }
+ if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only AES-GCM-ICV16 xfrm state may be offloaded");
+ return -EINVAL;
+ }
+ if (x->props.calgo != SADB_X_CALG_NONE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload compressed xfrm states");
+ return -EINVAL;
+ }
+ if (x->props.flags & XFRM_STATE_ESN) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
+ return -EINVAL;
+ }
+ if (x->props.family != AF_INET && x->props.family != AF_INET6) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only IPv4/v6 xfrm states may be offloaded");
+ return -EINVAL;
+ }
+ if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload other than crypto-mode");
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_TRANSPORT &&
+ x->props.mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only tunnel/transport xfrm states may be offloaded");
+ return -EINVAL;
+ }
+ if (x->id.proto != IPPROTO_ESP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only ESP xfrm state may be offloaded");
+ return -EINVAL;
+ }
+ if (x->encap) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Encapsulated xfrm state may not be offloaded");
+ return -EINVAL;
+ }
+ if (!x->aead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload xfrm states without aead");
+ return -EINVAL;
+ }
+
+ if (x->aead->alg_icv_len != 128) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload xfrm states with AEAD ICV length other than 128bit");
+ return -EINVAL;
+ }
+ if (x->aead->alg_key_len != 128 + 32 &&
+ x->aead->alg_key_len != 192 + 32 &&
+ x->aead->alg_key_len != 256 + 32) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload xfrm states with AEAD key length other than 128/192/256bit");
+ return -EINVAL;
+ }
+ if (x->tfcpad) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload xfrm states with tfc padding");
+ return -EINVAL;
+ }
+ if (!x->geniv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload xfrm states without geniv");
+ return -EINVAL;
+ }
+ if (strcmp(x->geniv, "seqiv")) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload xfrm states with geniv other than seqiv");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int cn10k_ipsec_inb_add_state(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG_MOD(extack, "xfrm inbound offload not supported");
+ return -EOPNOTSUPP;
+}
+
+static int cn10k_ipsec_outb_add_state(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *netdev = x->xso.dev;
+ struct cn10k_tx_sa_s *sa_entry;
+ struct qmem *sa_info;
+ struct otx2_nic *pf;
+ int err;
+
+ err = cn10k_ipsec_validate_state(x, extack);
+ if (err)
+ return err;
+
+ pf = netdev_priv(netdev);
+
+ err = qmem_alloc(pf->dev, &sa_info, pf->ipsec.sa_size, OTX2_ALIGN);
+ if (err)
+ return err;
+
+ sa_entry = (struct cn10k_tx_sa_s *)sa_info->base;
+ cn10k_outb_prepare_sa(x, sa_entry);
+
+ err = cn10k_outb_write_sa(pf, sa_info);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error writing outbound SA");
+ qmem_free(pf->dev, sa_info);
+ return err;
+ }
+
+ x->xso.offload_handle = (unsigned long)sa_info;
+ /* Enable static branch when first SA setup */
+ if (!pf->ipsec.outb_sa_count)
+ static_branch_enable(&cn10k_ipsec_sa_enabled);
+ pf->ipsec.outb_sa_count++;
+ return 0;
+}
+
+static int cn10k_ipsec_add_state(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
+{
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN)
+ return cn10k_ipsec_inb_add_state(x, extack);
+ else
+ return cn10k_ipsec_outb_add_state(x, extack);
+}
+
+static void cn10k_ipsec_del_state(struct xfrm_state *x)
+{
+ struct net_device *netdev = x->xso.dev;
+ struct cn10k_tx_sa_s *sa_entry;
+ struct qmem *sa_info;
+ struct otx2_nic *pf;
+ int err;
+
+ if (x->xso.dir == XFRM_DEV_OFFLOAD_IN)
+ return;
+
+ pf = netdev_priv(netdev);
+
+ sa_info = (struct qmem *)x->xso.offload_handle;
+ sa_entry = (struct cn10k_tx_sa_s *)sa_info->base;
+ memset(sa_entry, 0, sizeof(struct cn10k_tx_sa_s));
+ /* Disable SA in CPT h/w */
+ sa_entry->ctx_push_size = cn10k_ipsec_get_ctx_push_size();
+ sa_entry->ctx_size = (pf->ipsec.sa_size / OTX2_ALIGN) & 0xF;
+ sa_entry->aop_valid = 1;
+
+ err = cn10k_outb_write_sa(pf, sa_info);
+ if (err)
+ netdev_err(netdev, "Error (%d) deleting SA\n", err);
+
+ x->xso.offload_handle = 0;
+ qmem_free(pf->dev, sa_info);
+
+ /* If no more SA's then update netdev feature for potential change
+ * in NETIF_F_HW_ESP.
+ */
+ if (!--pf->ipsec.outb_sa_count)
+ queue_work(pf->ipsec.sa_workq, &pf->ipsec.sa_work);
+}
+
+static bool cn10k_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ if (x->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl > 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+ return true;
+}
+
+static const struct xfrmdev_ops cn10k_ipsec_xfrmdev_ops = {
+ .xdo_dev_state_add = cn10k_ipsec_add_state,
+ .xdo_dev_state_delete = cn10k_ipsec_del_state,
+ .xdo_dev_offload_ok = cn10k_ipsec_offload_ok,
+};
+
+static void cn10k_ipsec_sa_wq_handler(struct work_struct *work)
+{
+ struct cn10k_ipsec *ipsec = container_of(work, struct cn10k_ipsec,
+ sa_work);
+ struct otx2_nic *pf = container_of(ipsec, struct otx2_nic, ipsec);
+
+ /* Disable static branch when no more SA enabled */
+ static_branch_disable(&cn10k_ipsec_sa_enabled);
+ rtnl_lock();
+ netdev_update_features(pf->netdev);
+ rtnl_unlock();
+}
+
+int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable)
+{
+ struct otx2_nic *pf = netdev_priv(netdev);
+
+ /* IPsec offload supported on cn10k */
+ if (!is_dev_support_ipsec_offload(pf->pdev))
+ return -EOPNOTSUPP;
+
+ /* Initialize CPT for outbound ipsec offload */
+ if (enable)
+ return cn10k_outb_cpt_init(netdev);
+
+ /* Don't do CPT cleanup if SA installed */
+ if (pf->ipsec.outb_sa_count) {
+ netdev_err(pf->netdev, "SA installed on this device\n");
+ return -EBUSY;
+ }
+
+ return cn10k_outb_cpt_clean(pf);
+}
+
+int cn10k_ipsec_init(struct net_device *netdev)
+{
+ struct otx2_nic *pf = netdev_priv(netdev);
+ u32 sa_size;
+
+ if (!is_dev_support_ipsec_offload(pf->pdev))
+ return 0;
+
+ /* Each SA entry size is 128 Byte round up in size */
+ sa_size = sizeof(struct cn10k_tx_sa_s) % OTX2_ALIGN ?
+ (sizeof(struct cn10k_tx_sa_s) / OTX2_ALIGN + 1) *
+ OTX2_ALIGN : sizeof(struct cn10k_tx_sa_s);
+ pf->ipsec.sa_size = sa_size;
+
+ INIT_WORK(&pf->ipsec.sa_work, cn10k_ipsec_sa_wq_handler);
+ pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq", 0, 0);
+ if (!pf->ipsec.sa_workq) {
+ netdev_err(pf->netdev, "SA alloc workqueue failed\n");
+ return -ENOMEM;
+ }
+
+ /* Set xfrm device ops */
+ netdev->xfrmdev_ops = &cn10k_ipsec_xfrmdev_ops;
+ netdev->hw_features |= NETIF_F_HW_ESP;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ cn10k_cpt_device_set_unavailable(pf);
+ return 0;
+}
+EXPORT_SYMBOL(cn10k_ipsec_init);
+
+void cn10k_ipsec_clean(struct otx2_nic *pf)
+{
+ if (!is_dev_support_ipsec_offload(pf->pdev))
+ return;
+
+ if (!(pf->flags & OTX2_FLAG_IPSEC_OFFLOAD_ENABLED))
+ return;
+
+ if (pf->ipsec.sa_workq) {
+ destroy_workqueue(pf->ipsec.sa_workq);
+ pf->ipsec.sa_workq = NULL;
+ }
+
+ cn10k_outb_cpt_clean(pf);
+}
+EXPORT_SYMBOL(cn10k_ipsec_clean);
+
+static u16 cn10k_ipsec_get_ip_data_len(struct xfrm_state *x,
+ struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+ struct iphdr *iph;
+ u8 *src;
+
+ src = (u8 *)skb->data + ETH_HLEN;
+
+ if (x->props.family == AF_INET) {
+ iph = (struct iphdr *)src;
+ return ntohs(iph->tot_len);
+ }
+
+ ipv6h = (struct ipv6hdr *)src;
+ return ntohs(ipv6h->payload_len) + sizeof(struct ipv6hdr);
+}
+
+/* Prepare CPT and NIX SQE scatter/gather subdescriptor structure.
+ * SG of NIX and CPT are same in size.
+ * Layout of a NIX SQE and CPT SG entry:
+ * -----------------------------
+ * | CPT Scatter Gather |
+ * | (SQE SIZE) |
+ * | |
+ * -----------------------------
+ * | NIX SQE |
+ * | (SQE SIZE) |
+ * | |
+ * -----------------------------
+ */
+bool otx2_sqe_add_sg_ipsec(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+ struct sk_buff *skb, int num_segs, int *offset)
+{
+ struct cpt_sg_s *cpt_sg = NULL;
+ struct nix_sqe_sg_s *sg = NULL;
+ u64 dma_addr, *iova = NULL;
+ u64 *cpt_iova = NULL;
+ u16 *sg_lens = NULL;
+ int seg, len;
+
+ sq->sg[sq->head].num_segs = 0;
+ cpt_sg = (struct cpt_sg_s *)(sq->sqe_base - sq->sqe_size);
+
+ for (seg = 0; seg < num_segs; seg++) {
+ if ((seg % MAX_SEGS_PER_SG) == 0) {
+ sg = (struct nix_sqe_sg_s *)(sq->sqe_base + *offset);
+ sg->ld_type = NIX_SEND_LDTYPE_LDD;
+ sg->subdc = NIX_SUBDC_SG;
+ sg->segs = 0;
+ sg_lens = (void *)sg;
+ iova = (void *)sg + sizeof(*sg);
+ /* Next subdc always starts at a 16byte boundary.
+ * So if sg->segs is whether 2 or 3, offset += 16bytes.
+ */
+ if ((num_segs - seg) >= (MAX_SEGS_PER_SG - 1))
+ *offset += sizeof(*sg) + (3 * sizeof(u64));
+ else
+ *offset += sizeof(*sg) + sizeof(u64);
+
+ cpt_sg += (seg / MAX_SEGS_PER_SG) * 4;
+ cpt_iova = (void *)cpt_sg + sizeof(*cpt_sg);
+ }
+ dma_addr = otx2_dma_map_skb_frag(pfvf, skb, seg, &len);
+ if (dma_mapping_error(pfvf->dev, dma_addr))
+ return false;
+
+ sg_lens[seg % MAX_SEGS_PER_SG] = len;
+ sg->segs++;
+ *iova++ = dma_addr;
+ *cpt_iova++ = dma_addr;
+
+ /* Save DMA mapping info for later unmapping */
+ sq->sg[sq->head].dma_addr[seg] = dma_addr;
+ sq->sg[sq->head].size[seg] = len;
+ sq->sg[sq->head].num_segs++;
+
+ *cpt_sg = *(struct cpt_sg_s *)sg;
+ cpt_sg->rsvd_63_50 = 0;
+ }
+
+ sq->sg[sq->head].skb = (u64)skb;
+ return true;
+}
+
+static u16 cn10k_ipsec_get_param1(u8 iv_offset)
+{
+ u16 param1_val;
+
+ /* Set Crypto mode, disable L3/L4 checksum */
+ param1_val = CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM |
+ CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM;
+ param1_val |= (u16)iv_offset << CN10K_IPSEC_INST_PARAM1_IV_OFFSET_SHIFT;
+ return param1_val;
+}
+
+bool cn10k_ipsec_transmit(struct otx2_nic *pf, struct netdev_queue *txq,
+ struct otx2_snd_queue *sq, struct sk_buff *skb,
+ int num_segs, int size)
+{
+ struct cpt_inst_s inst;
+ struct cpt_res_s *res;
+ struct xfrm_state *x;
+ struct qmem *sa_info;
+ dma_addr_t dptr_iova;
+ struct sec_path *sp;
+ u8 encap_offset;
+ u8 auth_offset;
+ u8 gthr_size;
+ u8 iv_offset;
+ u16 dlen;
+
+ /* Check for IPSEC offload enabled */
+ if (!(pf->flags & OTX2_FLAG_IPSEC_OFFLOAD_ENABLED))
+ goto drop;
+
+ sp = skb_sec_path(skb);
+ if (unlikely(!sp->len))
+ goto drop;
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x))
+ goto drop;
+
+ if (x->props.mode != XFRM_MODE_TRANSPORT &&
+ x->props.mode != XFRM_MODE_TUNNEL)
+ goto drop;
+
+ dlen = cn10k_ipsec_get_ip_data_len(x, skb);
+ if (dlen == 0 && netif_msg_tx_err(pf)) {
+ netdev_err(pf->netdev, "Invalid IP header, ip-length zero\n");
+ goto drop;
+ }
+
+ /* Check for valid SA context */
+ sa_info = (struct qmem *)x->xso.offload_handle;
+ if (!sa_info)
+ goto drop;
+
+ memset(&inst, 0, sizeof(struct cpt_inst_s));
+
+ /* Get authentication offset */
+ if (x->props.family == AF_INET)
+ auth_offset = sizeof(struct iphdr);
+ else
+ auth_offset = sizeof(struct ipv6hdr);
+
+ /* IV offset is after ESP header */
+ iv_offset = auth_offset + sizeof(struct ip_esp_hdr);
+ /* Encap will start after IV */
+ encap_offset = iv_offset + GCM_RFC4106_IV_SIZE;
+
+ /* CPT Instruction word-1 */
+ res = (struct cpt_res_s *)(sq->cpt_resp->base + (64 * sq->head));
+ res->compcode = 0;
+ inst.res_addr = sq->cpt_resp->iova + (64 * sq->head);
+
+ /* CPT Instruction word-2 */
+ inst.rvu_pf_func = pf->pcifunc;
+
+ /* CPT Instruction word-3:
+ * Set QORD to force CPT_RES_S write completion
+ */
+ inst.qord = 1;
+
+ /* CPT Instruction word-4 */
+ /* inst.dlen should not include ICV length */
+ inst.dlen = dlen + ETH_HLEN - (x->aead->alg_icv_len / 8);
+ inst.opcode_major = CN10K_IPSEC_MAJOR_OP_OUTB_IPSEC;
+ inst.param1 = cn10k_ipsec_get_param1(iv_offset);
+
+ inst.param2 = encap_offset <<
+ CN10K_IPSEC_INST_PARAM2_ENC_DATA_OFFSET_SHIFT;
+ inst.param2 |= (u16)auth_offset <<
+ CN10K_IPSEC_INST_PARAM2_AUTH_DATA_OFFSET_SHIFT;
+
+ /* CPT Instruction word-5 */
+ gthr_size = num_segs / MAX_SEGS_PER_SG;
+ gthr_size = (num_segs % MAX_SEGS_PER_SG) ? gthr_size + 1 : gthr_size;
+
+ gthr_size &= 0xF;
+ dptr_iova = (sq->sqe_ring->iova + (sq->head * (sq->sqe_size * 2)));
+ inst.dptr = dptr_iova | ((u64)gthr_size << 60);
+
+ /* CPT Instruction word-6 */
+ inst.rptr = inst.dptr;
+
+ /* CPT Instruction word-7 */
+ inst.cptr = sa_info->iova;
+ inst.ctx_val = 1;
+ inst.egrp = CN10K_DEF_CPT_IPSEC_EGRP;
+
+ /* CPT Instruction word-0 */
+ inst.nixtxl = (size / 16) - 1;
+ inst.dat_offset = ETH_HLEN;
+ inst.nixtx_offset = sq->sqe_size;
+
+ netdev_tx_sent_queue(txq, skb->len);
+
+ /* Finally Flush the CPT instruction */
+ sq->head++;
+ sq->head &= (sq->sqe_cnt - 1);
+ cn10k_cpt_inst_flush(pf, &inst, sizeof(struct cpt_inst_s));
+ return true;
+drop:
+ dev_kfree_skb_any(skb);
+ return false;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h
new file mode 100644
index 000000000000..9965df0faa3e
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell IPSEC offload driver
+ *
+ * Copyright (C) 2024 Marvell.
+ */
+
+#ifndef CN10K_IPSEC_H
+#define CN10K_IPSEC_H
+
+#include <linux/types.h>
+
+DECLARE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled);
+
+/* CPT instruction size in bytes */
+#define CN10K_CPT_INST_SIZE 64
+
+/* CPT instruction (CPT_INST_S) queue length */
+#define CN10K_CPT_INST_QLEN 8200
+
+/* CPT instruction queue size passed to HW is in units of
+ * 40*CPT_INST_S messages.
+ */
+#define CN10K_CPT_SIZE_DIV40 (CN10K_CPT_INST_QLEN / 40)
+
+/* CPT needs 320 free entries */
+#define CN10K_CPT_INST_QLEN_EXTRA_BYTES (320 * CN10K_CPT_INST_SIZE)
+#define CN10K_CPT_EXTRA_SIZE_DIV40 (320 / 40)
+
+/* CPT instruction queue length in bytes */
+#define CN10K_CPT_INST_QLEN_BYTES \
+ ((CN10K_CPT_SIZE_DIV40 * 40 * CN10K_CPT_INST_SIZE) + \
+ CN10K_CPT_INST_QLEN_EXTRA_BYTES)
+
+/* CPT instruction group queue length in bytes */
+#define CN10K_CPT_INST_GRP_QLEN_BYTES \
+ ((CN10K_CPT_SIZE_DIV40 + CN10K_CPT_EXTRA_SIZE_DIV40) * 16)
+
+/* CPT FC length in bytes */
+#define CN10K_CPT_Q_FC_LEN 128
+
+/* Default CPT engine group for ipsec offload */
+#define CN10K_DEF_CPT_IPSEC_EGRP 1
+
+/* CN10K CPT LF registers */
+#define CPT_LFBASE (BLKTYPE_CPT << RVU_FUNC_BLKADDR_SHIFT)
+#define CN10K_CPT_LF_CTL (CPT_LFBASE | 0x10)
+#define CN10K_CPT_LF_INPROG (CPT_LFBASE | 0x40)
+#define CN10K_CPT_LF_Q_BASE (CPT_LFBASE | 0xf0)
+#define CN10K_CPT_LF_Q_SIZE (CPT_LFBASE | 0x100)
+#define CN10K_CPT_LF_Q_INST_PTR (CPT_LFBASE | 0x110)
+#define CN10K_CPT_LF_Q_GRP_PTR (CPT_LFBASE | 0x120)
+#define CN10K_CPT_LF_NQX(a) (CPT_LFBASE | 0x400 | (a) << 3)
+#define CN10K_CPT_LF_CTX_FLUSH (CPT_LFBASE | 0x510)
+
+/* IPSEC Instruction opcodes */
+#define CN10K_IPSEC_MAJOR_OP_WRITE_SA 0x01UL
+#define CN10K_IPSEC_MINOR_OP_WRITE_SA 0x09UL
+#define CN10K_IPSEC_MAJOR_OP_OUTB_IPSEC 0x2AUL
+
+enum cn10k_cpt_comp_e {
+ CN10K_CPT_COMP_E_NOTDONE = 0x00,
+ CN10K_CPT_COMP_E_GOOD = 0x01,
+ CN10K_CPT_COMP_E_FAULT = 0x02,
+ CN10K_CPT_COMP_E_HWERR = 0x04,
+ CN10K_CPT_COMP_E_INSTERR = 0x05,
+ CN10K_CPT_COMP_E_WARN = 0x06,
+ CN10K_CPT_COMP_E_MASK = 0x3F
+};
+
+struct cn10k_cpt_inst_queue {
+ u8 *vaddr;
+ u8 *real_vaddr;
+ dma_addr_t dma_addr;
+ dma_addr_t real_dma_addr;
+ u32 size;
+};
+
+enum cn10k_cpt_hw_state_e {
+ CN10K_CPT_HW_UNAVAILABLE,
+ CN10K_CPT_HW_AVAILABLE,
+ CN10K_CPT_HW_IN_USE
+};
+
+struct cn10k_ipsec {
+ /* Outbound CPT */
+ u64 io_addr;
+ atomic_t cpt_state;
+ struct cn10k_cpt_inst_queue iq;
+
+ /* SA info */
+ u32 sa_size;
+ u32 outb_sa_count;
+ struct work_struct sa_work;
+ struct workqueue_struct *sa_workq;
+};
+
+/* CN10K IPSEC Security Association (SA) */
+/* SA direction */
+#define CN10K_IPSEC_SA_DIR_INB 0
+#define CN10K_IPSEC_SA_DIR_OUTB 1
+/* SA protocol */
+#define CN10K_IPSEC_SA_IPSEC_PROTO_AH 0
+#define CN10K_IPSEC_SA_IPSEC_PROTO_ESP 1
+/* SA Encryption Type */
+#define CN10K_IPSEC_SA_ENCAP_TYPE_AES_GCM 5
+/* SA IPSEC mode Transport/Tunnel */
+#define CN10K_IPSEC_SA_IPSEC_MODE_TRANSPORT 0
+#define CN10K_IPSEC_SA_IPSEC_MODE_TUNNEL 1
+/* SA AES Key Length */
+#define CN10K_IPSEC_SA_AES_KEY_LEN_128 1
+#define CN10K_IPSEC_SA_AES_KEY_LEN_192 2
+#define CN10K_IPSEC_SA_AES_KEY_LEN_256 3
+/* IV Source */
+#define CN10K_IPSEC_SA_IV_SRC_COUNTER 0
+#define CN10K_IPSEC_SA_IV_SRC_PACKET 3
+
+struct cn10k_tx_sa_s {
+ u64 esn_en : 1; /* W0 */
+ u64 rsvd_w0_1_8 : 8;
+ u64 hw_ctx_off : 7;
+ u64 ctx_id : 16;
+ u64 rsvd_w0_32_47 : 16;
+ u64 ctx_push_size : 7;
+ u64 rsvd_w0_55 : 1;
+ u64 ctx_hdr_size : 2;
+ u64 aop_valid : 1;
+ u64 rsvd_w0_59 : 1;
+ u64 ctx_size : 4;
+ u64 w1; /* W1 */
+ u64 sa_valid : 1; /* W2 */
+ u64 sa_dir : 1;
+ u64 rsvd_w2_2_3 : 2;
+ u64 ipsec_mode : 1;
+ u64 ipsec_protocol : 1;
+ u64 aes_key_len : 2;
+ u64 enc_type : 3;
+ u64 rsvd_w2_11_19 : 9;
+ u64 iv_src : 2;
+ u64 rsvd_w2_22_31 : 10;
+ u64 rsvd_w2_32_63 : 32;
+ u64 w3; /* W3 */
+ u8 cipher_key[32]; /* W4 - W7 */
+ u32 rsvd_w8_0_31; /* W8 : IV */
+ u32 iv_gcm_salt;
+ u64 rsvd_w9_w30[22]; /* W9 - W30 */
+ u64 hw_ctx[6]; /* W31 - W36 */
+};
+
+/* CPT instruction parameter-1 */
+#define CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM 0x1
+#define CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM 0x2
+#define CN10K_IPSEC_INST_PARAM1_CRYPTO_MODE 0x20
+#define CN10K_IPSEC_INST_PARAM1_IV_OFFSET_SHIFT 8
+
+/* CPT instruction parameter-2 */
+#define CN10K_IPSEC_INST_PARAM2_ENC_DATA_OFFSET_SHIFT 0
+#define CN10K_IPSEC_INST_PARAM2_AUTH_DATA_OFFSET_SHIFT 8
+
+/* CPT Instruction Structure */
+struct cpt_inst_s {
+ u64 nixtxl : 3; /* W0 */
+ u64 doneint : 1;
+ u64 rsvd_w0_4_15 : 12;
+ u64 dat_offset : 8;
+ u64 ext_param1 : 8;
+ u64 nixtx_offset : 20;
+ u64 rsvd_w0_52_63 : 12;
+ u64 res_addr; /* W1 */
+ u64 tag : 32; /* W2 */
+ u64 tt : 2;
+ u64 grp : 10;
+ u64 rsvd_w2_44_47 : 4;
+ u64 rvu_pf_func : 16;
+ u64 qord : 1; /* W3 */
+ u64 rsvd_w3_1_2 : 2;
+ u64 wqe_ptr : 61;
+ u64 dlen : 16; /* W4 */
+ u64 param2 : 16;
+ u64 param1 : 16;
+ u64 opcode_major : 8;
+ u64 opcode_minor : 8;
+ u64 dptr; /* W5 */
+ u64 rptr; /* W6 */
+ u64 cptr : 60; /* W7 */
+ u64 ctx_val : 1;
+ u64 egrp : 3;
+};
+
+/* CPT Instruction Result Structure */
+struct cpt_res_s {
+ u64 compcode : 7; /* W0 */
+ u64 doneint : 1;
+ u64 uc_compcode : 8;
+ u64 uc_info : 48;
+ u64 esn; /* W1 */
+};
+
+/* CPT SG structure */
+struct cpt_sg_s {
+ u64 seg1_size : 16;
+ u64 seg2_size : 16;
+ u64 seg3_size : 16;
+ u64 segs : 2;
+ u64 rsvd_63_50 : 14;
+};
+
+/* CPT LF_INPROG Register */
+#define CPT_LF_INPROG_INFLIGHT GENMASK_ULL(8, 0)
+#define CPT_LF_INPROG_GRB_CNT GENMASK_ULL(39, 32)
+#define CPT_LF_INPROG_GWB_CNT GENMASK_ULL(47, 40)
+
+/* CPT LF_Q_GRP_PTR Register */
+#define CPT_LF_Q_GRP_PTR_DQ_PTR GENMASK_ULL(14, 0)
+#define CPT_LF_Q_GRP_PTR_NQ_PTR GENMASK_ULL(46, 32)
+
+/* CPT LF_Q_SIZE Register */
+#define CPT_LF_Q_BASE_ADDR GENMASK_ULL(52, 7)
+
+/* CPT LF_Q_SIZE Register */
+#define CPT_LF_Q_SIZE_DIV40 GENMASK_ULL(14, 0)
+
+/* CPT LF CTX Flush Register */
+#define CPT_LF_CTX_FLUSH GENMASK_ULL(45, 0)
+
+#ifdef CONFIG_XFRM_OFFLOAD
+int cn10k_ipsec_init(struct net_device *netdev);
+void cn10k_ipsec_clean(struct otx2_nic *pf);
+int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable);
+bool otx2_sqe_add_sg_ipsec(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+ struct sk_buff *skb, int num_segs, int *offset);
+bool cn10k_ipsec_transmit(struct otx2_nic *pf, struct netdev_queue *txq,
+ struct otx2_snd_queue *sq, struct sk_buff *skb,
+ int num_segs, int size);
+#else
+static inline __maybe_unused int cn10k_ipsec_init(struct net_device *netdev)
+{
+ return 0;
+}
+
+static inline __maybe_unused void cn10k_ipsec_clean(struct otx2_nic *pf)
+{
+}
+
+static inline __maybe_unused
+int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable)
+{
+ return 0;
+}
+
+static inline bool __maybe_unused
+otx2_sqe_add_sg_ipsec(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+ struct sk_buff *skb, int num_segs, int *offset)
+{
+ return true;
+}
+
+static inline bool __maybe_unused
+cn10k_ipsec_transmit(struct otx2_nic *pf, struct netdev_queue *txq,
+ struct otx2_snd_queue *sq, struct sk_buff *skb,
+ int num_segs, int size)
+{
+ return true;
+}
+#endif
+#endif // CN10K_IPSEC_H
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 523ecb798a7a..bf56888e7fe7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -10,12 +10,18 @@
#include <net/page_pool/helpers.h>
#include <net/tso.h>
#include <linux/bitfield.h>
+#include <net/xfrm.h>
#include "otx2_reg.h"
#include "otx2_common.h"
#include "otx2_struct.h"
#include "cn10k.h"
+static bool otx2_is_pfc_enabled(struct otx2_nic *pfvf)
+{
+ return IS_ENABLED(CONFIG_DCB) && !!pfvf->pfc_en;
+}
+
static void otx2_nix_rq_op_stats(struct queue_stats *stats,
struct otx2_nic *pfvf, int qidx)
{
@@ -964,6 +970,29 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
if (err)
return err;
+ /* Allocate memory for NIX SQE (which includes NIX SG) and CPT SG.
+ * SG of NIX and CPT are same in size. Allocate memory for CPT SG
+ * same as NIX SQE for base address alignment.
+ * Layout of a NIX SQE and CPT SG entry:
+ * -----------------------------
+ * | CPT Scatter Gather |
+ * | (SQE SIZE) |
+ * | |
+ * -----------------------------
+ * | NIX SQE |
+ * | (SQE SIZE) |
+ * | |
+ * -----------------------------
+ */
+ err = qmem_alloc(pfvf->dev, &sq->sqe_ring, qset->sqe_cnt,
+ sq->sqe_size * 2);
+ if (err)
+ return err;
+
+ err = qmem_alloc(pfvf->dev, &sq->cpt_resp, qset->sqe_cnt, 64);
+ if (err)
+ return err;
+
if (qidx < pfvf->hw.tx_queues) {
err = qmem_alloc(pfvf->dev, &sq->tso_hdrs, qset->sqe_cnt,
TSO_HEADER_SIZE);
@@ -1722,18 +1751,43 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable)
return -ENOMEM;
req->chan_base = 0;
-#ifdef CONFIG_DCB
- req->chan_cnt = pfvf->pfc_en ? IEEE_8021QAZ_MAX_TCS : 1;
- req->bpid_per_chan = pfvf->pfc_en ? 1 : 0;
-#else
- req->chan_cnt = 1;
- req->bpid_per_chan = 0;
-#endif
+ if (otx2_is_pfc_enabled(pfvf)) {
+ req->chan_cnt = IEEE_8021QAZ_MAX_TCS;
+ req->bpid_per_chan = 1;
+ } else {
+ req->chan_cnt = 1;
+ req->bpid_per_chan = 0;
+ }
return otx2_sync_mbox_msg(&pfvf->mbox);
}
EXPORT_SYMBOL(otx2_nix_config_bp);
+int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable)
+{
+ struct nix_bp_cfg_req *req;
+
+ if (enable)
+ req = otx2_mbox_alloc_msg_nix_cpt_bp_enable(&pfvf->mbox);
+ else
+ req = otx2_mbox_alloc_msg_nix_cpt_bp_disable(&pfvf->mbox);
+
+ if (!req)
+ return -ENOMEM;
+
+ req->chan_base = 0;
+ if (otx2_is_pfc_enabled(pfvf)) {
+ req->chan_cnt = IEEE_8021QAZ_MAX_TCS;
+ req->bpid_per_chan = 1;
+ } else {
+ req->chan_cnt = 1;
+ req->bpid_per_chan = 0;
+ }
+
+ return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+EXPORT_SYMBOL(otx2_nix_cpt_config_bp);
+
/* Mbox message handlers */
void mbox_handler_cgx_stats(struct otx2_nic *pfvf,
struct cgx_stats_rsp *rsp)
@@ -1947,3 +2001,48 @@ EXPORT_SYMBOL(otx2_mbox_up_handler_ ## _fn_name);
MBOX_UP_CGX_MESSAGES
MBOX_UP_MCS_MESSAGES
#undef M
+
+dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf,
+ struct sk_buff *skb, int seg, int *len)
+{
+ enum dma_data_direction dir = DMA_TO_DEVICE;
+ const skb_frag_t *frag;
+ struct page *page;
+ int offset;
+
+ /* Crypto hardware need write permission for ipsec crypto offload */
+ if (unlikely(xfrm_offload(skb))) {
+ dir = DMA_BIDIRECTIONAL;
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ }
+
+ /* First segment is always skb->data */
+ if (!seg) {
+ page = virt_to_page(skb->data);
+ offset = offset_in_page(skb->data);
+ *len = skb_headlen(skb);
+ } else {
+ frag = &skb_shinfo(skb)->frags[seg - 1];
+ page = skb_frag_page(frag);
+ offset = skb_frag_off(frag);
+ *len = skb_frag_size(frag);
+ }
+ return otx2_dma_map_page(pfvf, page, offset, *len, dir);
+}
+
+void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg)
+{
+ enum dma_data_direction dir = DMA_TO_DEVICE;
+ struct sk_buff *skb = NULL;
+ int seg;
+
+ skb = (struct sk_buff *)sg->skb;
+ if (unlikely(xfrm_offload(skb)))
+ dir = DMA_BIDIRECTIONAL;
+
+ for (seg = 0; seg < sg->num_segs; seg++) {
+ otx2_dma_unmap_page(pfvf, sg->dma_addr[seg],
+ sg->size[seg], dir);
+ }
+ sg->num_segs = 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 566848663fea..44d737a0dd09 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -30,6 +30,7 @@
#include <rvu_trace.h>
#include "qos.h"
#include "rep.h"
+#include "cn10k_ipsec.h"
/* IPv4 flag more fragment bit */
#define IPV4_FLAG_MORE 0x20
@@ -40,6 +41,7 @@
#define PCI_DEVID_OCTEONTX2_RVU_AFVF 0xA0F8
#define PCI_SUBSYS_DEVID_96XX_RVU_PFVF 0xB200
+#define PCI_SUBSYS_DEVID_CN10K_A_RVU_PFVF 0xB900
#define PCI_SUBSYS_DEVID_CN10K_B_RVU_PFVF 0xBD00
#define PCI_DEVID_OCTEONTX2_SDP_REP 0xA0F7
@@ -55,6 +57,9 @@
#define NIX_PF_PFC_PRIO_MAX 8
#endif
+/* Number of segments per SG structure */
+#define MAX_SEGS_PER_SG 3
+
enum arua_mapped_qtypes {
AURA_NIX_RQ,
AURA_NIX_SQ,
@@ -448,6 +453,7 @@ struct otx2_nic {
#define OTX2_FLAG_TC_MARK_ENABLED BIT_ULL(17)
#define OTX2_FLAG_REP_MODE_ENABLED BIT_ULL(18)
#define OTX2_FLAG_PORT_UP BIT_ULL(19)
+#define OTX2_FLAG_IPSEC_OFFLOAD_ENABLED BIT_ULL(20)
u64 flags;
u64 *cq_op_addr;
@@ -522,6 +528,9 @@ struct otx2_nic {
u16 rep_pf_map[RVU_MAX_REP];
u16 esw_mode;
#endif
+
+ /* Inline ipsec */
+ struct cn10k_ipsec ipsec;
};
static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -572,6 +581,15 @@ static inline bool is_dev_cn10kb(struct pci_dev *pdev)
return pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_B_RVU_PFVF;
}
+static inline bool is_dev_cn10ka_b0(struct pci_dev *pdev)
+{
+ if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_RVU_PFVF &&
+ (pdev->revision & 0xFF) == 0x54)
+ return true;
+
+ return false;
+}
+
static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
{
struct otx2_hw *hw = &pfvf->hw;
@@ -621,6 +639,9 @@ static inline void __iomem *otx2_get_regaddr(struct otx2_nic *nic, u64 offset)
case BLKTYPE_NPA:
blkaddr = BLKADDR_NPA;
break;
+ case BLKTYPE_CPT:
+ blkaddr = BLKADDR_CPT0;
+ break;
default:
blkaddr = BLKADDR_RVUM;
break;
@@ -985,6 +1006,7 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
+int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable);
void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx);
void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura);
@@ -1149,4 +1171,8 @@ static inline int mcam_entry_cmp(const void *a, const void *b)
{
return *(u16 *)a - *(u16 *)b;
}
+
+dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf,
+ struct sk_buff *skb, int seg, int *len);
+void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg);
#endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
index 294fba58b670..f110dfa42360 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -435,6 +435,9 @@ process_pfc:
return err;
}
+ /* Default disable backpressure on NIX-CPT */
+ otx2_nix_cpt_config_bp(pfvf, false);
+
/* Request Per channel Bpids */
if (pfc->pfc_en)
otx2_nix_config_bp(pfvf, true);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index e310f99b1736..e1dde93e8af8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -26,6 +26,7 @@
#include "cn10k.h"
#include "qos.h"
#include <rvu_trace.h>
+#include "cn10k_ipsec.h"
#define DRV_NAME "rvu_nicpf"
#define DRV_STRING "Marvell RVU NIC Physical Function Driver"
@@ -1484,6 +1485,8 @@ static void otx2_free_sq_res(struct otx2_nic *pf)
if (!sq->sqe)
continue;
qmem_free(pf->dev, sq->sqe);
+ qmem_free(pf->dev, sq->sqe_ring);
+ qmem_free(pf->dev, sq->cpt_resp);
qmem_free(pf->dev, sq->tso_hdrs);
kfree(sq->sg);
kfree(sq->sqb_ptrs);
@@ -1551,6 +1554,9 @@ int otx2_init_hw_resources(struct otx2_nic *pf)
if (err)
goto err_free_npa_lf;
+ /* Default disable backpressure on NIX-CPT */
+ otx2_nix_cpt_config_bp(pf, false);
+
/* Enable backpressure for CGX mapped PF/VFs */
if (!is_otx2_lbkvf(pf->pdev))
otx2_nix_config_bp(pf, true);
@@ -2273,6 +2279,10 @@ static int otx2_set_features(struct net_device *netdev,
return otx2_enable_rxvlan(pf,
features & NETIF_F_HW_VLAN_CTAG_RX);
+ if (changed & NETIF_F_HW_ESP)
+ return cn10k_ipsec_ethtool_init(netdev,
+ features & NETIF_F_HW_ESP);
+
return otx2_handle_ntuple_tc_features(netdev, features);
}
@@ -3162,10 +3172,14 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* reset CGX/RPM MAC stats */
otx2_reset_mac_stats(pf);
+ err = cn10k_ipsec_init(netdev);
+ if (err)
+ goto err_mcs_free;
+
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
- goto err_mcs_free;
+ goto err_ipsec_clean;
}
err = otx2_wq_init(pf);
@@ -3206,6 +3220,8 @@ err_mcam_flow_del:
otx2_mcam_flow_del(pf);
err_unreg_netdev:
unregister_netdev(netdev);
+err_ipsec_clean:
+ cn10k_ipsec_clean(pf);
err_mcs_free:
cn10k_mcs_free(pf);
err_del_mcam_entries:
@@ -3403,6 +3419,7 @@ static void otx2_remove(struct pci_dev *pdev)
otx2_unregister_dl(pf);
unregister_netdev(netdev);
+ cn10k_ipsec_clean(pf);
cn10k_mcs_free(pf);
otx2_sriov_disable(pf->pdev);
otx2_sriov_vfcfg_cleanup(pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 04bc06a80e23..224cef938927 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -11,6 +11,7 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
#include "otx2_reg.h"
#include "otx2_common.h"
@@ -26,12 +27,25 @@
*/
#define PTP_SYNC_SEC_OFFSET 34
+DEFINE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled);
+
static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
struct bpf_prog *prog,
struct nix_cqe_rx_s *cqe,
struct otx2_cq_queue *cq,
bool *need_xdp_flush);
+static void otx2_sq_set_sqe_base(struct otx2_snd_queue *sq,
+ struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&cn10k_ipsec_sa_enabled) &&
+ (xfrm_offload(skb)))
+ sq->sqe_base = sq->sqe_ring->base + sq->sqe_size +
+ (sq->head * (sq->sqe_size * 2));
+ else
+ sq->sqe_base = sq->sqe->base;
+}
+
static int otx2_nix_cq_op_status(struct otx2_nic *pfvf,
struct otx2_cq_queue *cq)
{
@@ -80,38 +94,6 @@ static unsigned int frag_num(unsigned int i)
#endif
}
-static dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf,
- struct sk_buff *skb, int seg, int *len)
-{
- const skb_frag_t *frag;
- struct page *page;
- int offset;
-
- /* First segment is always skb->data */
- if (!seg) {
- page = virt_to_page(skb->data);
- offset = offset_in_page(skb->data);
- *len = skb_headlen(skb);
- } else {
- frag = &skb_shinfo(skb)->frags[seg - 1];
- page = skb_frag_page(frag);
- offset = skb_frag_off(frag);
- *len = skb_frag_size(frag);
- }
- return otx2_dma_map_page(pfvf, page, offset, *len, DMA_TO_DEVICE);
-}
-
-static void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg)
-{
- int seg;
-
- for (seg = 0; seg < sg->num_segs; seg++) {
- otx2_dma_unmap_page(pfvf, sg->dma_addr[seg],
- sg->size[seg], DMA_TO_DEVICE);
- }
- sg->num_segs = 0;
-}
-
static void otx2_xdp_snd_pkt_handler(struct otx2_nic *pfvf,
struct otx2_snd_queue *sq,
struct nix_cqe_tx_s *cqe)
@@ -625,7 +607,6 @@ void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq,
sq->head &= (sq->sqe_cnt - 1);
}
-#define MAX_SEGS_PER_SG 3
/* Add SQE scatter/gather subdescriptor structure */
static bool otx2_sqe_add_sg(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
struct sk_buff *skb, int num_segs, int *offset)
@@ -1161,6 +1142,7 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq,
int offset, num_segs, free_desc;
struct nix_sqe_hdr_s *sqe_hdr;
struct otx2_nic *pfvf = dev;
+ bool ret;
/* Check if there is enough room between producer
* and consumer index.
@@ -1177,6 +1159,7 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq,
/* If SKB doesn't fit in a single SQE, linearize it.
* TODO: Consider adding JUMP descriptor instead.
*/
+
if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
if (__skb_linearize(skb)) {
dev_kfree_skb_any(skb);
@@ -1196,6 +1179,9 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq,
return true;
}
+ /* Set sqe base address */
+ otx2_sq_set_sqe_base(sq, skb);
+
/* Set SQE's SEND_HDR.
* Do not clear the first 64bit as it contains constant info.
*/
@@ -1208,7 +1194,13 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq,
otx2_sqe_add_ext(pfvf, sq, skb, &offset);
/* Add SG subdesc with data frags */
- if (!otx2_sqe_add_sg(pfvf, sq, skb, num_segs, &offset)) {
+ if (static_branch_unlikely(&cn10k_ipsec_sa_enabled) &&
+ (xfrm_offload(skb)))
+ ret = otx2_sqe_add_sg_ipsec(pfvf, sq, skb, num_segs, &offset);
+ else
+ ret = otx2_sqe_add_sg(pfvf, sq, skb, num_segs, &offset);
+
+ if (!ret) {
otx2_dma_unmap_skb_frags(pfvf, &sq->sg[sq->head]);
return false;
}
@@ -1217,11 +1209,15 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq,
sqe_hdr->sizem1 = (offset / 16) - 1;
+ if (static_branch_unlikely(&cn10k_ipsec_sa_enabled) &&
+ (xfrm_offload(skb)))
+ return cn10k_ipsec_transmit(pfvf, txq, sq, skb, num_segs,
+ offset);
+
netdev_tx_sent_queue(txq, skb->len);
/* Flush SQE to HW */
pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx);
-
return true;
}
EXPORT_SYMBOL(otx2_sq_append_skb);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index e1db5f961877..d23810963fdb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -101,6 +101,9 @@ struct otx2_snd_queue {
struct queue_stats stats;
u16 sqb_count;
u64 *sqb_ptrs;
+ /* SQE ring and CPT response queue for Inline IPSEC */
+ struct qmem *sqe_ring;
+ struct qmem *cpt_resp;
} ____cacheline_aligned_in_smp;
enum cq_type {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 839fc77c11b2..e926c6ce96cf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -14,6 +14,7 @@
#include "otx2_reg.h"
#include "otx2_ptp.h"
#include "cn10k.h"
+#include "cn10k_ipsec.h"
#define DRV_NAME "rvu_nicvf"
#define DRV_STRING "Marvell RVU NIC Virtual Function Driver"
@@ -693,10 +694,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pdev->bus->number, n);
}
+ err = cn10k_ipsec_init(netdev);
+ if (err)
+ goto err_ptp_destroy;
+
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
- goto err_ptp_destroy;
+ goto err_ipsec_clean;
}
err = otx2_vf_wq_init(vf);
@@ -730,6 +735,8 @@ err_shutdown_tc:
otx2_shutdown_tc(vf);
err_unreg_netdev:
unregister_netdev(netdev);
+err_ipsec_clean:
+ cn10k_ipsec_clean(vf);
err_ptp_destroy:
otx2_ptp_destroy(vf);
err_detach_rsrc:
@@ -782,6 +789,7 @@ static void otx2vf_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
if (vf->otx2_wq)
destroy_workqueue(vf->otx2_wq);
+ cn10k_ipsec_clean(vf);
otx2_ptp_destroy(vf);
otx2_mcam_flow_del(vf);
otx2_shutdown_tc(vf);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index f07955b5439f..6a4a81c63451 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -192,6 +192,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
return -EOPNOTSUPP;
}
+ if (sample_act_count) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirror action after sample action is not supported");
+ return -EOPNOTSUPP;
+ }
+
err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei,
block, out_dev,
extack);
@@ -265,6 +270,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
return -EOPNOTSUPP;
}
+ if (mirror_act_count) {
+ NL_SET_ERR_MSG_MOD(extack, "Sample action after mirror action is not supported");
+ return -EOPNOTSUPP;
+ }
+
err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei,
block,
act->sample.psample_group,
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 558e03301aa8..8d48468cddd7 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -758,12 +758,13 @@ static int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid,
bool is_static, void *data)
{
struct ocelot_dump_ctx *dump = data;
+ struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx;
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
- if (dump->idx < dump->cb->args[2])
+ if (dump->idx < ctx->fdb_idx)
goto skip;
nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 98e098c09c03..abba165738a3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2779,7 +2779,7 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
break;
}
- netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
+ netdev->watchdog_timeo = secs_to_jiffies(5);
/* MTU range: 68 - hw-specific max */
netdev->min_mtu = ETH_MIN_MTU;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 1c61390677f7..0639bf56bd3a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -18,8 +18,6 @@ struct ionic_lif;
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003
-#define IONIC_ASIC_TYPE_ELBA 2
-
#define DEVCMD_TIMEOUT 5
#define IONIC_ADMINQ_TIME_SLICE msecs_to_jiffies(100)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index dda22fa4448c..720092b1633a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -158,6 +158,20 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
25000baseCR_Full);
copper_seen++;
break;
+ case IONIC_XCVR_PID_QSFP_50G_CR2_FC:
+ case IONIC_XCVR_PID_QSFP_50G_CR2:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 50000baseCR2_Full);
+ copper_seen++;
+ break;
+ case IONIC_XCVR_PID_QSFP_200G_CR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported, 200000baseCR4_Full);
+ copper_seen++;
+ break;
+ case IONIC_XCVR_PID_QSFP_400G_CR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported, 400000baseCR4_Full);
+ copper_seen++;
+ break;
case IONIC_XCVR_PID_SFP_10GBASE_AOC:
case IONIC_XCVR_PID_SFP_10GBASE_CU:
ethtool_link_ksettings_add_link_mode(ks, supported,
@@ -196,6 +210,31 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseSR_Full);
break;
+ case IONIC_XCVR_PID_QSFP_200G_AOC:
+ case IONIC_XCVR_PID_QSFP_200G_SR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 200000baseSR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_200G_FR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 200000baseLR4_ER4_FR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_200G_DR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 200000baseDR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_400G_FR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 400000baseLR4_ER4_FR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_400G_DR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 400000baseDR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_400G_SR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 400000baseSR4_Full);
+ break;
case IONIC_XCVR_PID_SFP_10GBASE_SR:
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseSR_Full);
@@ -929,6 +968,7 @@ static int ionic_get_module_info(struct net_device *netdev,
break;
case SFF8024_ID_QSFP_8436_8636:
case SFF8024_ID_QSFP28_8636:
+ case SFF8024_ID_QSFP_PLUS_CMIS:
modinfo->type = ETH_MODULE_SFF_8436;
modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
break;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 9c85c0706c6e..830c8adbfbee 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -1277,7 +1277,10 @@ enum ionic_xcvr_pid {
IONIC_XCVR_PID_SFP_25GBASE_CR_S = 3,
IONIC_XCVR_PID_SFP_25GBASE_CR_L = 4,
IONIC_XCVR_PID_SFP_25GBASE_CR_N = 5,
-
+ IONIC_XCVR_PID_QSFP_50G_CR2_FC = 6,
+ IONIC_XCVR_PID_QSFP_50G_CR2 = 7,
+ IONIC_XCVR_PID_QSFP_200G_CR4 = 8,
+ IONIC_XCVR_PID_QSFP_400G_CR4 = 9,
/* Fiber */
IONIC_XCVR_PID_QSFP_100G_AOC = 50,
IONIC_XCVR_PID_QSFP_100G_ACC = 51,
@@ -1303,6 +1306,15 @@ enum ionic_xcvr_pid {
IONIC_XCVR_PID_SFP_25GBASE_ACC = 71,
IONIC_XCVR_PID_SFP_10GBASE_T = 72,
IONIC_XCVR_PID_SFP_1000BASE_T = 73,
+ IONIC_XCVR_PID_QSFP_200G_AOC = 74,
+ IONIC_XCVR_PID_QSFP_200G_FR4 = 75,
+ IONIC_XCVR_PID_QSFP_200G_DR4 = 76,
+ IONIC_XCVR_PID_QSFP_200G_SR4 = 77,
+ IONIC_XCVR_PID_QSFP_200G_ACC = 78,
+ IONIC_XCVR_PID_QSFP_400G_FR4 = 79,
+ IONIC_XCVR_PID_QSFP_400G_DR4 = 80,
+ IONIC_XCVR_PID_QSFP_400G_SR4 = 81,
+ IONIC_XCVR_PID_QSFP_400G_VR4 = 82,
};
/**
@@ -1404,6 +1416,8 @@ struct ionic_xcvr_status {
*/
union ionic_port_config {
struct {
+#define IONIC_SPEED_400G 400000 /* 400G in Mbps */
+#define IONIC_SPEED_200G 200000 /* 200G in Mbps */
#define IONIC_SPEED_100G 100000 /* 100G in Mbps */
#define IONIC_SPEED_50G 50000 /* 50G in Mbps */
#define IONIC_SPEED_40G 40000 /* 40G in Mbps */
@@ -3209,7 +3223,11 @@ union ionic_adminq_comp {
#define IONIC_BAR0_INTR_CTRL_OFFSET 0x2000
#define IONIC_DEV_CMD_DONE 0x00000001
-#define IONIC_ASIC_TYPE_CAPRI 0
+#define IONIC_ASIC_TYPE_NONE 0
+#define IONIC_ASIC_TYPE_CAPRI 1
+#define IONIC_ASIC_TYPE_ELBA 2
+#define IONIC_ASIC_TYPE_GIGLIO 3
+#define IONIC_ASIC_TYPE_SALINA 4
/**
* struct ionic_doorbell - Doorbell register layout
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 40496587b2b3..052c767a2c75 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -3265,7 +3265,7 @@ int ionic_lif_alloc(struct ionic *ionic)
lif->netdev->min_mtu = max_t(unsigned int, ETH_MIN_MTU,
le32_to_cpu(lif->identity->eth.min_frame_size));
lif->netdev->max_mtu =
- le32_to_cpu(lif->identity->eth.max_frame_size) - ETH_HLEN - VLAN_HLEN;
+ le32_to_cpu(lif->identity->eth.max_frame_size) - VLAN_ETH_HLEN;
lif->neqs = ionic->neqs_per_lif;
lif->nxqs = ionic->ntxqs_per_lif;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 0f817c3f92d8..daf1e82cb76b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -81,8 +81,9 @@ static int ionic_error_to_errno(enum ionic_status_code code)
case IONIC_RC_EQTYPE:
case IONIC_RC_EQID:
case IONIC_RC_EINVAL:
- case IONIC_RC_ENOSUPP:
return -EINVAL;
+ case IONIC_RC_ENOSUPP:
+ return -EOPNOTSUPP;
case IONIC_RC_EPERM:
return -EPERM;
case IONIC_RC_ENOENT:
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index be4c9622618d..8904aae41aca 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -23,7 +23,7 @@ enum mac_version {
RTL_GIGA_MAC_VER_08,
RTL_GIGA_MAC_VER_09,
RTL_GIGA_MAC_VER_10,
- RTL_GIGA_MAC_VER_11,
+ /* support for RTL_GIGA_MAC_VER_11 has been removed */
/* RTL_GIGA_MAC_VER_12 was handled the same as VER_17 */
/* RTL_GIGA_MAC_VER_13 was merged with VER_10 */
RTL_GIGA_MAC_VER_14,
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 739707a7b40f..6934bdee2a91 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -104,7 +104,6 @@ static const struct {
[RTL_GIGA_MAC_VER_08] = {"RTL8102e" },
[RTL_GIGA_MAC_VER_09] = {"RTL8102e/RTL8103e" },
[RTL_GIGA_MAC_VER_10] = {"RTL8101e/RTL8100e" },
- [RTL_GIGA_MAC_VER_11] = {"RTL8168b/8111b" },
[RTL_GIGA_MAC_VER_14] = {"RTL8401" },
[RTL_GIGA_MAC_VER_17] = {"RTL8168b/8111b" },
[RTL_GIGA_MAC_VER_18] = {"RTL8168cp/8111cp" },
@@ -623,7 +622,6 @@ struct rtl8169_tc_offsets {
enum rtl_flag {
RTL_FLAG_TASK_RESET_PENDING,
- RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
RTL_FLAG_TASK_TX_TIMEOUT,
RTL_FLAG_MAX
};
@@ -2336,7 +2334,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
/* 8168B family. */
{ 0x7c8, 0x380, RTL_GIGA_MAC_VER_17 },
- /* This one is very old and rare, let's see if anybody complains.
+ /* This one is very old and rare, support has been removed.
* { 0x7c8, 0x300, RTL_GIGA_MAC_VER_11 },
*/
@@ -3804,7 +3802,6 @@ static void rtl_hw_config(struct rtl8169_private *tp)
[RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3,
[RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2,
[RTL_GIGA_MAC_VER_10] = NULL,
- [RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168b,
[RTL_GIGA_MAC_VER_14] = rtl_hw_start_8401,
[RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168b,
[RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
@@ -4680,12 +4677,6 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
if (status & LinkChg)
phy_mac_interrupt(tp->phydev);
- if (unlikely(status & RxFIFOOver &&
- tp->mac_version == RTL_GIGA_MAC_VER_11)) {
- netif_stop_queue(tp->dev);
- rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
- }
-
rtl_irq_disable(tp);
napi_schedule(&tp->napi);
out:
@@ -4723,8 +4714,6 @@ static void rtl_task(struct work_struct *work)
reset:
rtl_reset_work(tp);
netif_wake_queue(tp->dev);
- } else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) {
- rtl_reset_work(tp);
}
}
@@ -5103,9 +5092,6 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp)
if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
tp->irq_mask |= SYSErr | RxFIFOOver;
- else if (tp->mac_version == RTL_GIGA_MAC_VER_11)
- /* special workaround needed */
- tp->irq_mask |= RxFIFOOver;
}
static int rtl_alloc_irq(struct rtl8169_private *tp)
@@ -5300,7 +5286,6 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
return JUMBO_7K;
/* RTL8168b */
- case RTL_GIGA_MAC_VER_11:
case RTL_GIGA_MAC_VER_17:
return JUMBO_4K;
/* RTL8168c */
@@ -5347,13 +5332,6 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
return false;
}
-static umode_t r8169_hwmon_is_visible(const void *drvdata,
- enum hwmon_sensor_types type,
- u32 attr, int channel)
-{
- return 0444;
-}
-
static int r8169_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
@@ -5370,7 +5348,7 @@ static int r8169_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
}
static const struct hwmon_ops r8169_hwmon_ops = {
- .is_visible = r8169_hwmon_is_visible,
+ .visible = 0444,
.read = r8169_hwmon_read,
};
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index 5307c6ff4e25..b28b30390e84 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -276,15 +276,6 @@ static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp,
rtl_writephy_batch(phydev, phy_reg_init);
}
-static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp,
- struct phy_device *phydev)
-{
- phy_write(phydev, 0x1f, 0x0001);
- phy_set_bits(phydev, 0x16, BIT(0));
- phy_write(phydev, 0x10, 0xf41b);
- phy_write(phydev, 0x1f, 0x0000);
-}
-
static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp,
struct phy_device *phydev)
{
@@ -1136,7 +1127,6 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
[RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
[RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
[RTL_GIGA_MAC_VER_10] = NULL,
- [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
[RTL_GIGA_MAC_VER_14] = rtl8401_hw_phy_config,
[RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
[RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h
index dbc3f92eebc4..2bbfcad613ab 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase.h
+++ b/drivers/net/ethernet/realtek/rtase/rtase.h
@@ -13,6 +13,7 @@
#define RTASE_HW_VER_906X_7XA 0x00800000
#define RTASE_HW_VER_906X_7XC 0x04000000
#define RTASE_HW_VER_907XD_V1 0x04800000
+#define RTASE_HW_VER_907XD_VA 0x08000000
#define RTASE_RX_DMA_BURST_256 4
#define RTASE_TX_DMA_BURST_UNLIMITED 7
diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
index de7f11232593..585d0b21c9e0 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -1725,6 +1725,7 @@ static int rtase_get_settings(struct net_device *dev,
cmd->base.speed = SPEED_5000;
break;
case RTASE_HW_VER_907XD_V1:
+ case RTASE_HW_VER_907XD_VA:
cmd->base.speed = SPEED_10000;
break;
}
@@ -1993,6 +1994,7 @@ static int rtase_check_mac_version_valid(struct rtase_private *tp)
case RTASE_HW_VER_906X_7XA:
case RTASE_HW_VER_906X_7XC:
case RTASE_HW_VER_907XD_V1:
+ case RTASE_HW_VER_907XD_VA:
ret = 0;
break;
}
@@ -2016,7 +2018,7 @@ static int rtase_init_board(struct pci_dev *pdev, struct net_device **dev_out,
SET_NETDEV_DEV(dev, &pdev->dev);
ret = pci_enable_device(pdev);
- if (ret < 0)
+ if (ret)
goto err_out_free_dev;
/* make sure PCI base addr 1 is MMIO */
@@ -2032,7 +2034,7 @@ static int rtase_init_board(struct pci_dev *pdev, struct net_device **dev_out,
}
ret = pci_request_regions(pdev, KBUILD_MODNAME);
- if (ret < 0)
+ if (ret)
goto err_out_disable;
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
@@ -2108,7 +2110,7 @@ static int rtase_init_one(struct pci_dev *pdev,
dev_dbg(&pdev->dev, "Automotive Switch Ethernet driver loaded\n");
ret = rtase_init_board(pdev, &dev, &ioaddr);
- if (ret != 0)
+ if (ret)
return ret;
tp = netdev_priv(dev);
@@ -2118,7 +2120,7 @@ static int rtase_init_one(struct pci_dev *pdev,
/* identify chip attached to board */
ret = rtase_check_mac_version_valid(tp);
- if (ret != 0) {
+ if (ret) {
dev_err(&pdev->dev,
"unknown chip version: 0x%08x, contact rtase maintainers (see MAINTAINERS file)\n",
tp->hw_ver);
@@ -2129,7 +2131,7 @@ static int rtase_init_one(struct pci_dev *pdev,
rtase_init_hardware(tp);
ret = rtase_alloc_interrupt(pdev, tp);
- if (ret < 0) {
+ if (ret) {
dev_err(&pdev->dev, "unable to alloc MSIX/MSI\n");
goto err_out_del_napi;
}
@@ -2174,7 +2176,7 @@ static int rtase_init_one(struct pci_dev *pdev,
netif_carrier_off(dev);
ret = register_netdev(dev);
- if (ret != 0)
+ if (ret)
goto err_out_free_dma;
netdev_dbg(dev, "%pM, IRQ %d\n", dev->dev_addr, dev->irq);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index dbbbf024e7ab..5fc8c94d1e4b 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -111,25 +111,35 @@ static void rswitch_top_init(struct rswitch_private *priv)
/* Forwarding engine block (MFWD) */
static void rswitch_fwd_init(struct rswitch_private *priv)
{
+ u32 all_ports_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0);
unsigned int i;
- /* For ETHA */
- for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
- iowrite32(FWPC0_DEFAULT, priv->addr + FWPC0(i));
+ /* Start with empty configuration */
+ for (i = 0; i < RSWITCH_NUM_AGENTS; i++) {
+ /* Disable all port features */
+ iowrite32(0, priv->addr + FWPC0(i));
+ /* Disallow L3 forwarding and direct descriptor forwarding */
+ iowrite32(FIELD_PREP(FWCP1_LTHFW, all_ports_mask),
+ priv->addr + FWPC1(i));
+ /* Disallow L2 forwarding */
+ iowrite32(FIELD_PREP(FWCP2_LTWFW, all_ports_mask),
+ priv->addr + FWPC2(i));
+ /* Disallow port based forwarding */
iowrite32(0, priv->addr + FWPBFC(i));
}
- for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
+ /* For enabled ETHA ports, setup port based forwarding */
+ rswitch_for_each_enabled_port(priv, i) {
+ /* Port based forwarding from port i to GWCA port */
+ rswitch_modify(priv->addr, FWPBFC(i), FWPBFC_PBDV,
+ FIELD_PREP(FWPBFC_PBDV, BIT(priv->gwca.index)));
+ /* Within GWCA port, forward to Rx queue for port i */
iowrite32(priv->rdev[i]->rx_queue->index,
priv->addr + FWPBFCSDC(GWCA_INDEX, i));
- iowrite32(BIT(priv->gwca.index), priv->addr + FWPBFC(i));
}
- /* For GWCA */
- iowrite32(FWPC0_DEFAULT, priv->addr + FWPC0(priv->gwca.index));
- iowrite32(FWPC1_DDE, priv->addr + FWPC1(priv->gwca.index));
- iowrite32(0, priv->addr + FWPBFC(priv->gwca.index));
- iowrite32(GENMASK(RSWITCH_NUM_PORTS - 1, 0), priv->addr + FWPBFC(priv->gwca.index));
+ /* For GWCA port, allow direct descriptor forwarding */
+ rswitch_modify(priv->addr, FWPC1(priv->gwca.index), FWPC1_DDE, FWPC1_DDE);
}
/* Gateway CPU agent block (GWCA) */
@@ -1544,7 +1554,7 @@ static void rswitch_ether_port_deinit_all(struct rswitch_private *priv)
{
unsigned int i;
- for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
+ rswitch_for_each_enabled_port(priv, i) {
phy_exit(priv->rdev[i]->serdes);
rswitch_ether_port_deinit_one(priv->rdev[i]);
}
@@ -1920,9 +1930,6 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index
if (err < 0)
goto out_get_params;
- if (rdev->priv->gwca.speed < rdev->etha->speed)
- rdev->priv->gwca.speed = rdev->etha->speed;
-
err = rswitch_rxdmac_alloc(ndev);
if (err < 0)
goto out_rxdmac;
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index e020800dcc57..4b1489100330 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -12,6 +12,7 @@
#define RSWITCH_MAX_NUM_QUEUES 128
+#define RSWITCH_NUM_AGENTS 5
#define RSWITCH_NUM_PORTS 3
#define rswitch_for_each_enabled_port(priv, i) \
for (i = 0; i < RSWITCH_NUM_PORTS; i++) \
@@ -806,6 +807,7 @@ enum rswitch_gwca_mode {
#define CABPPFLC_INIT_VALUE 0x00800080
/* MFWD */
+#define FWPC0(i) (FWPC00 + (i) * 0x10)
#define FWPC0_LTHTA BIT(0)
#define FWPC0_IP4UE BIT(3)
#define FWPC0_IP4TE BIT(4)
@@ -819,15 +821,15 @@ enum rswitch_gwca_mode {
#define FWPC0_MACHMA BIT(27)
#define FWPC0_VLANSA BIT(28)
-#define FWPC0(i) (FWPC00 + (i) * 0x10)
-#define FWPC0_DEFAULT (FWPC0_LTHTA | FWPC0_IP4UE | FWPC0_IP4TE | \
- FWPC0_IP4OE | FWPC0_L2SE | FWPC0_IP4EA | \
- FWPC0_IPDSA | FWPC0_IPHLA | FWPC0_MACSDA | \
- FWPC0_MACHLA | FWPC0_MACHMA | FWPC0_VLANSA)
#define FWPC1(i) (FWPC10 + (i) * 0x10)
+#define FWCP1_LTHFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16)
#define FWPC1_DDE BIT(0)
-#define FWPBFC(i) (FWPBFC0 + (i) * 0x10)
+#define FWPC2(i) (FWPC20 + (i) * 0x10)
+#define FWCP2_LTWFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16)
+
+#define FWPBFC(i) (FWPBFC0 + (i) * 0x10)
+#define FWPBFC_PBDV GENMASK(RSWITCH_NUM_AGENTS - 1, 0)
#define FWPBFCSDC(j, i) (FWPBFCSDC00 + (i) * 0x10 + (j) * 0x04)
@@ -993,7 +995,6 @@ struct rswitch_gwca {
DECLARE_BITMAP(used, RSWITCH_MAX_NUM_QUEUES);
u32 tx_irq_bits[RSWITCH_NUM_IRQ_REGS];
u32 rx_irq_bits[RSWITCH_NUM_IRQ_REGS];
- int speed;
};
#define NUM_QUEUES_PER_NDEV 2
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 6658536a4e17..4cc85a36a1ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -154,6 +154,18 @@ config DWMAC_RZN1
the stmmac device driver. This support can make use of a custom MII
converter PCS device.
+config DWMAC_S32
+ tristate "NXP S32G/S32R GMAC support"
+ default ARCH_S32
+ depends on OF && (ARCH_S32 || COMPILE_TEST)
+ help
+ Support for ethernet controller on NXP S32CC SOCs.
+
+ This selects NXP SoC glue layer support for the stmmac
+ device driver. This driver is used for the S32CC series
+ SOCs GMAC ethernet controller, ie. S32G2xx, S32G3xx and
+ S32R45.
+
config DWMAC_SOCFPGA
tristate "SOCFPGA dwmac support"
default ARCH_INTEL_SOCFPGA
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 2389fd261344..b26f0e79c2b3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o
obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o
obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o
obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o
+obj-$(CONFIG_DWMAC_S32) += dwmac-s32.o
obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o
obj-$(CONFIG_DWMAC_STARFIVE) += dwmac-starfive.o
obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1367fa5c9b8e..e25db747a81a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -257,6 +257,8 @@ struct stmmac_safety_stats {
#define CSR_F_150M 150000000
#define CSR_F_250M 250000000
#define CSR_F_300M 300000000
+#define CSR_F_500M 500000000
+#define CSR_F_800M 800000000
#define MAC_CSR_H_FRQ_MASK 0x20
@@ -543,18 +545,8 @@ struct dma_features {
#define STMMAC_VLAN_INSERT 0x2
#define STMMAC_VLAN_REPLACE 0x3
-extern const struct stmmac_desc_ops enh_desc_ops;
-extern const struct stmmac_desc_ops ndesc_ops;
-
struct mac_device_info;
-extern const struct stmmac_hwtimestamp stmmac_ptp;
-extern const struct stmmac_hwtimestamp dwmac1000_ptp;
-extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
-
-extern const struct ptp_clock_info stmmac_ptp_clock_ops;
-extern const struct ptp_clock_info dwmac1000_ptp_clock_ops;
-
struct mac_link {
u32 caps;
u32 speed_mask;
@@ -641,8 +633,4 @@ void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable);
void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr);
-extern const struct stmmac_mode_ops ring_mode_ops;
-extern const struct stmmac_mode_ops chain_mode_ops;
-extern const struct stmmac_desc_ops dwmac4_desc_ops;
-
#endif /* __COMMON_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 83290e707df5..bd4eb187f8c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -181,24 +181,19 @@ static void dwc_qos_remove(struct platform_device *pdev)
static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mode)
{
struct tegra_eqos *eqos = priv;
- unsigned long rate = 125000000;
bool needs_calibration = false;
+ long rate = 125000000;
u32 value;
int err;
switch (speed) {
case SPEED_1000:
- needs_calibration = true;
- rate = 125000000;
- break;
-
case SPEED_100:
needs_calibration = true;
- rate = 25000000;
- break;
+ fallthrough;
case SPEED_10:
- rate = 2500000;
+ rate = rgmii_clock(speed);
break;
default:
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 641f3cd019a3..43e0fbba4f77 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -186,7 +186,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod
{
struct plat_stmmacenet_data *plat_dat;
struct imx_priv_data *dwmac = priv;
- unsigned long rate;
+ long rate;
int err;
plat_dat = dwmac->plat_dat;
@@ -196,17 +196,8 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod
(plat_dat->mac_interface == PHY_INTERFACE_MODE_MII))
return;
- switch (speed) {
- case SPEED_1000:
- rate = 125000000;
- break;
- case SPEED_100:
- rate = 25000000;
- break;
- case SPEED_10:
- rate = 2500000;
- break;
- default:
+ rate = rgmii_clock(speed);
+ if (rate < 0) {
dev_err(dwmac->dev, "invalid speed %u\n", speed);
return;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index d94f0a150e93..ddee6154d40b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -31,27 +31,13 @@ struct intel_dwmac_data {
static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
{
struct intel_dwmac *dwmac = priv;
- unsigned long rate;
+ long rate;
int ret;
- rate = clk_get_rate(dwmac->tx_clk);
-
- switch (speed) {
- case SPEED_1000:
- rate = 125000000;
- break;
-
- case SPEED_100:
- rate = 25000000;
- break;
-
- case SPEED_10:
- rate = 2500000;
- break;
-
- default:
+ rate = rgmii_clock(speed);
+ if (rate < 0) {
dev_err(dwmac->dev, "Invalid speed\n");
- break;
+ return;
}
ret = clk_set_rate(dwmac->tx_clk, rate);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 901a3c1959fa..2a5b38723635 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -777,7 +777,7 @@ static void ethqos_ptp_clk_freq_config(struct stmmac_priv *priv)
netdev_err(priv->dev, "Failed to max out clk_ptp_ref: %d\n", err);
plat_dat->clk_ptp_rate = clk_get_rate(plat_dat->clk_ptp_ref);
- netdev_dbg(priv->dev, "PTP rate %d\n", plat_dat->clk_ptp_rate);
+ netdev_dbg(priv->dev, "PTP rate %lu\n", plat_dat->clk_ptp_rate);
}
static int qcom_ethqos_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 8cb374668b74..a4dc89e23a68 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1079,20 +1079,11 @@ static void rk3568_set_gmac_speed(struct rk_priv_data *bsp_priv, int speed)
{
struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk;
struct device *dev = &bsp_priv->pdev->dev;
- unsigned long rate;
+ long rate;
int ret;
- switch (speed) {
- case 10:
- rate = 2500000;
- break;
- case 100:
- rate = 25000000;
- break;
- case 1000:
- rate = 125000000;
- break;
- default:
+ rate = rgmii_clock(speed);
+ if (rate < 0) {
dev_err(dev, "unknown speed value for GMAC speed=%d", speed);
return;
}
@@ -1540,20 +1531,11 @@ static void rv1126_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
{
struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk;
struct device *dev = &bsp_priv->pdev->dev;
- unsigned long rate;
+ long rate;
int ret;
- switch (speed) {
- case 10:
- rate = 2500000;
- break;
- case 100:
- rate = 25000000;
- break;
- case 1000:
- rate = 125000000;
- break;
- default:
+ rate = rgmii_clock(speed);
+ if (rate < 0) {
dev_err(dev, "unknown speed value for RGMII speed=%d", speed);
return;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
new file mode 100644
index 000000000000..9cc0e5817416
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NXP S32G/R GMAC glue layer
+ *
+ * Copyright 2019-2024 NXP
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/ethtool.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_mdio.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/phylink.h>
+#include <linux/platform_device.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+#define GMAC_INTF_RATE_125M 125000000 /* 125MHz */
+
+/* SoC PHY interface control register */
+#define PHY_INTF_SEL_MII 0x00
+#define PHY_INTF_SEL_SGMII 0x01
+#define PHY_INTF_SEL_RGMII 0x02
+#define PHY_INTF_SEL_RMII 0x08
+
+struct s32_priv_data {
+ void __iomem *ioaddr;
+ void __iomem *ctrl_sts;
+ struct device *dev;
+ phy_interface_t *intf_mode;
+ struct clk *tx_clk;
+ struct clk *rx_clk;
+};
+
+static int s32_gmac_write_phy_intf_select(struct s32_priv_data *gmac)
+{
+ writel(PHY_INTF_SEL_RGMII, gmac->ctrl_sts);
+
+ dev_dbg(gmac->dev, "PHY mode set to %s\n", phy_modes(*gmac->intf_mode));
+
+ return 0;
+}
+
+static int s32_gmac_init(struct platform_device *pdev, void *priv)
+{
+ struct s32_priv_data *gmac = priv;
+ int ret;
+
+ /* Set initial TX interface clock */
+ ret = clk_prepare_enable(gmac->tx_clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't enable tx clock\n");
+ return ret;
+ }
+ ret = clk_set_rate(gmac->tx_clk, GMAC_INTF_RATE_125M);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't set tx clock\n");
+ goto err_tx_disable;
+ }
+
+ /* Set initial RX interface clock */
+ ret = clk_prepare_enable(gmac->rx_clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't enable rx clock\n");
+ goto err_tx_disable;
+ }
+ ret = clk_set_rate(gmac->rx_clk, GMAC_INTF_RATE_125M);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't set rx clock\n");
+ goto err_txrx_disable;
+ }
+
+ /* Set interface mode */
+ ret = s32_gmac_write_phy_intf_select(gmac);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't set PHY interface mode\n");
+ goto err_txrx_disable;
+ }
+
+ return 0;
+
+err_txrx_disable:
+ clk_disable_unprepare(gmac->rx_clk);
+err_tx_disable:
+ clk_disable_unprepare(gmac->tx_clk);
+ return ret;
+}
+
+static void s32_gmac_exit(struct platform_device *pdev, void *priv)
+{
+ struct s32_priv_data *gmac = priv;
+
+ clk_disable_unprepare(gmac->tx_clk);
+ clk_disable_unprepare(gmac->rx_clk);
+}
+
+static void s32_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
+{
+ struct s32_priv_data *gmac = priv;
+ long tx_clk_rate;
+ int ret;
+
+ tx_clk_rate = rgmii_clock(speed);
+ if (tx_clk_rate < 0) {
+ dev_err(gmac->dev, "Unsupported/Invalid speed: %d\n", speed);
+ return;
+ }
+
+ dev_dbg(gmac->dev, "Set tx clock to %ld Hz\n", tx_clk_rate);
+ ret = clk_set_rate(gmac->tx_clk, tx_clk_rate);
+ if (ret)
+ dev_err(gmac->dev, "Can't set tx clock\n");
+}
+
+static int s32_dwmac_probe(struct platform_device *pdev)
+{
+ struct plat_stmmacenet_data *plat;
+ struct device *dev = &pdev->dev;
+ struct stmmac_resources res;
+ struct s32_priv_data *gmac;
+ int ret;
+
+ gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL);
+ if (!gmac)
+ return -ENOMEM;
+
+ gmac->dev = &pdev->dev;
+
+ ret = stmmac_get_platform_resources(pdev, &res);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "Failed to get platform resources\n");
+
+ plat = devm_stmmac_probe_config_dt(pdev, res.mac);
+ if (IS_ERR(plat))
+ return dev_err_probe(dev, PTR_ERR(plat),
+ "dt configuration failed\n");
+
+ /* PHY interface mode control reg */
+ gmac->ctrl_sts = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
+ if (IS_ERR(gmac->ctrl_sts))
+ return dev_err_probe(dev, PTR_ERR(gmac->ctrl_sts),
+ "S32CC config region is missing\n");
+
+ /* tx clock */
+ gmac->tx_clk = devm_clk_get(&pdev->dev, "tx");
+ if (IS_ERR(gmac->tx_clk))
+ return dev_err_probe(dev, PTR_ERR(gmac->tx_clk),
+ "tx clock not found\n");
+
+ /* rx clock */
+ gmac->rx_clk = devm_clk_get(&pdev->dev, "rx");
+ if (IS_ERR(gmac->rx_clk))
+ return dev_err_probe(dev, PTR_ERR(gmac->rx_clk),
+ "rx clock not found\n");
+
+ gmac->intf_mode = &plat->phy_interface;
+ gmac->ioaddr = res.addr;
+
+ /* S32CC core feature set */
+ plat->has_gmac4 = true;
+ plat->pmt = 1;
+ plat->flags |= STMMAC_FLAG_SPH_DISABLE;
+ plat->rx_fifo_size = 20480;
+ plat->tx_fifo_size = 20480;
+
+ plat->init = s32_gmac_init;
+ plat->exit = s32_gmac_exit;
+ plat->fix_mac_speed = s32_fix_mac_speed;
+
+ plat->bsp_priv = gmac;
+
+ return stmmac_pltfr_probe(pdev, plat, &res);
+}
+
+static const struct of_device_id s32_dwmac_match[] = {
+ { .compatible = "nxp,s32g2-dwmac" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, s32_dwmac_match);
+
+static struct platform_driver s32_dwmac_driver = {
+ .probe = s32_dwmac_probe,
+ .remove = stmmac_pltfr_remove,
+ .driver = {
+ .name = "s32-dwmac",
+ .pm = &stmmac_pltfr_pm_ops,
+ .of_match_table = s32_dwmac_match,
+ },
+};
+module_platform_driver(s32_dwmac_driver);
+
+MODULE_AUTHOR("Jan Petrous (OSS) <jan.petrous@oss.nxp.com>");
+MODULE_DESCRIPTION("NXP S32G/R common chassis GMAC driver");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
index 421666279dd3..0a0a363d3730 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -34,24 +34,13 @@ struct starfive_dwmac {
static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode)
{
struct starfive_dwmac *dwmac = priv;
- unsigned long rate;
+ long rate;
int err;
- rate = clk_get_rate(dwmac->clk_tx);
-
- switch (speed) {
- case SPEED_1000:
- rate = 125000000;
- break;
- case SPEED_100:
- rate = 25000000;
- break;
- case SPEED_10:
- rate = 2500000;
- break;
- default:
+ rate = rgmii_clock(speed);
+ if (rate < 0) {
dev_err(dwmac->dev, "invalid speed %u\n", speed);
- break;
+ return;
}
err = clk_set_rate(dwmac->clk_tx, rate);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index a6ff02d905a9..eabc4da9e1a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -21,10 +21,7 @@
#include "stmmac_platform.h"
-#define DWMAC_125MHZ 125000000
#define DWMAC_50MHZ 50000000
-#define DWMAC_25MHZ 25000000
-#define DWMAC_2_5MHZ 2500000
#define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \
iface == PHY_INTERFACE_MODE_RGMII_ID || \
@@ -140,7 +137,7 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode)
struct sti_dwmac *dwmac = priv;
u32 src = dwmac->tx_retime_src;
u32 reg = dwmac->ctrl_reg;
- u32 freq = 0;
+ long freq = 0;
if (dwmac->interface == PHY_INTERFACE_MODE_MII) {
src = TX_RETIME_SRC_TXCLK;
@@ -153,19 +150,14 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode)
}
} else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) {
/* On GiGa clk source can be either ext or from clkgen */
- if (spd == SPEED_1000) {
- freq = DWMAC_125MHZ;
- } else {
+ freq = rgmii_clock(spd);
+
+ if (spd != SPEED_1000 && freq > 0)
/* Switch to clkgen for these speeds */
src = TX_RETIME_SRC_CLKGEN;
- if (spd == SPEED_100)
- freq = DWMAC_25MHZ;
- else if (spd == SPEED_10)
- freq = DWMAC_2_5MHZ;
- }
}
- if (src == TX_RETIME_SRC_CLKGEN && freq)
+ if (src == TX_RETIME_SRC_CLKGEN && freq > 0)
clk_set_rate(dwmac->clk, freq);
regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index c25781874aa7..c36f90a782c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -27,7 +27,7 @@ static void dwmac4_core_init(struct mac_device_info *hw,
struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONFIG);
- u32 clk_rate;
+ unsigned long clk_rate;
value |= GMAC_CORE_INIT;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
index 1ce6f43d545a..806555976496 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
@@ -144,4 +144,7 @@
/* TDS3 use for both format (read and write back) */
#define RDES3_OWN BIT(31)
+extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
+extern const struct stmmac_desc_ops dwmac4_desc_ops;
+
#endif /* __DWMAC4_DESCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index a04a79003692..20027d3c25a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -493,4 +493,9 @@
#define XGMAC_RDES3_TSD BIT(6)
#define XGMAC_RDES3_TSA BIT(4)
+extern const struct stmmac_ops dwxgmac210_ops;
+extern const struct stmmac_ops dwxlgmac2_ops;
+extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
+extern const struct stmmac_desc_ops dwxgmac210_desc_ops;
+
#endif /* __STMMAC_DWXGMAC2_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index a72d336a8350..4bd79de2e222 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -9,6 +9,8 @@
#include "stmmac_fpe.h"
#include "stmmac_ptp.h"
#include "stmmac_est.h"
+#include "dwmac4_descs.h"
+#include "dwxgmac2.h"
static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg)
{
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 64f8ed67dcc4..e428c82b7d31 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -665,6 +665,15 @@ struct stmmac_regs_off {
u32 est_off;
};
+extern const struct stmmac_desc_ops enh_desc_ops;
+extern const struct stmmac_desc_ops ndesc_ops;
+
+extern const struct stmmac_hwtimestamp stmmac_ptp;
+extern const struct stmmac_hwtimestamp dwmac1000_ptp;
+
+extern const struct stmmac_mode_ops ring_mode_ops;
+extern const struct stmmac_mode_ops chain_mode_ops;
+
extern const struct stmmac_ops dwmac100_ops;
extern const struct stmmac_dma_ops dwmac100_dma_ops;
extern const struct stmmac_ops dwmac1000_ops;
@@ -677,13 +686,6 @@ extern const struct stmmac_ops dwmac510_ops;
extern const struct stmmac_tc_ops dwmac4_tc_ops;
extern const struct stmmac_tc_ops dwmac510_tc_ops;
extern const struct stmmac_tc_ops dwxgmac_tc_ops;
-extern const struct stmmac_ops dwxgmac210_ops;
-extern const struct stmmac_ops dwxlgmac2_ops;
-extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
-extern const struct stmmac_desc_ops dwxgmac210_desc_ops;
-extern const struct stmmac_mmc_ops dwmac_mmc_ops;
-extern const struct stmmac_mmc_ops dwxgmac_mmc_ops;
-extern const struct stmmac_est_ops dwmac510_est_ops;
#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */
#define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index 5d1ea3e07459..1cba39fb2c44 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -139,4 +139,7 @@ struct stmmac_counters {
unsigned int mmc_rx_fpe_fragment_cntr;
};
+extern const struct stmmac_mmc_ops dwmac_mmc_ops;
+extern const struct stmmac_mmc_ops dwxgmac_mmc_ops;
+
#endif /* __MMC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h
index 7a858c566e7e..d247fa383a6e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h
@@ -62,3 +62,5 @@
#define EST_SRWO BIT(0)
#define EST_GCL_DATA 0x00000034
+
+extern const struct stmmac_est_ops dwmac510_est_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c81ea8cdfe6e..16b8bcfa8b11 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -301,7 +301,7 @@ static void stmmac_global_err(struct stmmac_priv *priv)
*/
static void stmmac_clk_csr_set(struct stmmac_priv *priv)
{
- u32 clk_rate;
+ unsigned long clk_rate;
clk_rate = clk_get_rate(priv->plat->stmmac_clk);
@@ -325,6 +325,10 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
priv->clk_csr = STMMAC_CSR_150_250M;
else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M))
priv->clk_csr = STMMAC_CSR_250_300M;
+ else if ((clk_rate >= CSR_F_300M) && (clk_rate < CSR_F_500M))
+ priv->clk_csr = STMMAC_CSR_300_500M;
+ else if ((clk_rate >= CSR_F_500M) && (clk_rate < CSR_F_800M))
+ priv->clk_csr = STMMAC_CSR_500_800M;
}
if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 3ac32444e492..06e07e6e180b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -640,7 +640,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
dev_info(&pdev->dev, "PTP uses main clock\n");
} else {
plat->clk_ptp_rate = clk_get_rate(plat->clk_ptp_ref);
- dev_dbg(&pdev->dev, "PTP rate %d\n", plat->clk_ptp_rate);
+ dev_dbg(&pdev->dev, "PTP rate %lu\n", plat->clk_ptp_rate);
}
plat->stmmac_rst = devm_reset_control_get_optional(&pdev->dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index 4cc70480ce0f..3fe0e3a80e80 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
@@ -104,4 +104,7 @@ int dwmac1000_ptp_enable(struct ptp_clock_info *ptp,
void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time);
void dwmac1000_timestamp_interrupt(struct stmmac_priv *priv);
+extern const struct ptp_clock_info stmmac_ptp_clock_ops;
+extern const struct ptp_clock_info dwmac1000_ptp_clock_ops;
+
#endif /* __STMMAC_PTP_H__ */
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index bc658bc60885..642155cb8315 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -235,7 +235,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
vni_to_tunnel_id(gnvh->vni),
gnvh->opt_len * 4);
if (!tun_dst) {
- DEV_STATS_INC(geneve->dev, rx_dropped);
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
/* Update tunnel dst according to Geneve options. */
@@ -322,7 +322,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
len = skb->len;
err = gro_cells_receive(&geneve->gro_cells, skb);
if (likely(err == NET_RX_SUCCESS))
- dev_sw_netstats_rx_add(geneve->dev, len);
+ dev_dstats_rx_add(geneve->dev, len);
return;
drop:
@@ -387,14 +387,14 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely((!geneve->cfg.inner_proto_inherit &&
inner_proto != htons(ETH_P_TEB)))) {
- DEV_STATS_INC(geneve->dev, rx_dropped);
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
opts_len = geneveh->opt_len * 4;
if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
!net_eq(geneve->net, dev_net(geneve->dev)))) {
- DEV_STATS_INC(geneve->dev, rx_dropped);
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
@@ -1023,7 +1023,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
netdev_dbg(dev, "no tunnel metadata\n");
dev_kfree_skb(skb);
- DEV_STATS_INC(dev, tx_dropped);
+ dev_dstats_tx_dropped(dev);
return NETDEV_TX_OK;
}
} else {
@@ -1202,7 +1202,7 @@ static void geneve_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
/* MTU range: 68 - (something less than 65535) */
dev->min_mtu = ETH_MIN_MTU;
/* The max_mtu calculation does not take account of GENEVE
diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c
index d2b3f5a59141..e3dcdeacc12c 100644
--- a/drivers/net/mctp/mctp-i2c.c
+++ b/drivers/net/mctp/mctp-i2c.c
@@ -177,8 +177,7 @@ static struct mctp_i2c_client *mctp_i2c_new_client(struct i2c_client *client)
return mcli;
err:
if (mcli) {
- if (mcli->client)
- i2c_unregister_device(mcli->client);
+ i2c_unregister_device(mcli->client);
kfree(mcli);
}
return ERR_PTR(rc);
diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index 2beb83154d39..cb53dccbde1a 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -17,37 +17,20 @@ static int octeon_mdiobus_probe(struct platform_device *pdev)
{
struct cavium_mdiobus *bus;
struct mii_bus *mii_bus;
- struct resource *res_mem;
- resource_size_t mdio_phys;
- resource_size_t regsize;
union cvmx_smix_en smi_en;
- int err = -ENOENT;
+ int err;
mii_bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*bus));
if (!mii_bus)
return -ENOMEM;
- res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res_mem == NULL) {
- dev_err(&pdev->dev, "found no memory resource\n");
- return -ENXIO;
- }
-
bus = mii_bus->priv;
bus->mii_bus = mii_bus;
- mdio_phys = res_mem->start;
- regsize = resource_size(res_mem);
- if (!devm_request_mem_region(&pdev->dev, mdio_phys, regsize,
- res_mem->name)) {
- dev_err(&pdev->dev, "request_mem_region failed\n");
- return -ENXIO;
- }
-
- bus->register_base = devm_ioremap(&pdev->dev, mdio_phys, regsize);
- if (!bus->register_base) {
+ bus->register_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(bus->register_base)) {
dev_err(&pdev->dev, "dev_ioremap failed\n");
- return -ENOMEM;
+ return PTR_ERR(bus->register_base);
}
smi_en.u64 = 0;
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 4ea44a2f48f7..f422a2f666ef 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -36,6 +36,7 @@
#include <linux/inet.h>
#include <linux/configfs.h>
#include <linux/etherdevice.h>
+#include <linux/u64_stats_sync.h>
#include <linux/utsname.h>
#include <linux/rtnetlink.h>
@@ -90,6 +91,12 @@ static DEFINE_MUTEX(target_cleanup_list_lock);
*/
static struct console netconsole_ext;
+struct netconsole_target_stats {
+ u64_stats_t xmit_drop_count;
+ u64_stats_t enomem_count;
+ struct u64_stats_sync syncp;
+};
+
/**
* struct netconsole_target - Represents a configured netconsole target.
* @list: Links this target into the target_list.
@@ -97,6 +104,7 @@ static struct console netconsole_ext;
* @userdata_group: Links to the userdata configfs hierarchy
* @userdata_complete: Cached, formatted string of append
* @userdata_length: String length of userdata_complete
+ * @stats: Packet send stats for the target. Used for debugging.
* @enabled: On / off knob to enable / disable target.
* Visible from userspace (read-write).
* We maintain a strict 1:1 correspondence between this and
@@ -124,6 +132,7 @@ struct netconsole_target {
char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS];
size_t userdata_length;
#endif
+ struct netconsole_target_stats stats;
bool enabled;
bool extended;
bool release;
@@ -262,6 +271,7 @@ static void netconsole_process_cleanups_core(void)
* | remote_ip
* | local_mac
* | remote_mac
+ * | transmit_errors
* | userdata/
* | <key>/
* | value
@@ -371,6 +381,21 @@ static ssize_t remote_mac_show(struct config_item *item, char *buf)
return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac);
}
+static ssize_t transmit_errors_show(struct config_item *item, char *buf)
+{
+ struct netconsole_target *nt = to_target(item);
+ u64 xmit_drop_count, enomem_count;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&nt->stats.syncp);
+ xmit_drop_count = u64_stats_read(&nt->stats.xmit_drop_count);
+ enomem_count = u64_stats_read(&nt->stats.enomem_count);
+ } while (u64_stats_fetch_retry(&nt->stats.syncp, start));
+
+ return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count);
+}
+
/*
* This one is special -- targets created through the configfs interface
* are not enabled (and the corresponding netpoll activated) by default.
@@ -842,6 +867,7 @@ CONFIGFS_ATTR(, remote_ip);
CONFIGFS_ATTR_RO(, local_mac);
CONFIGFS_ATTR(, remote_mac);
CONFIGFS_ATTR(, release);
+CONFIGFS_ATTR_RO(, transmit_errors);
static struct configfs_attribute *netconsole_target_attrs[] = {
&attr_enabled,
@@ -854,6 +880,7 @@ static struct configfs_attribute *netconsole_target_attrs[] = {
&attr_remote_ip,
&attr_local_mac,
&attr_remote_mac,
+ &attr_transmit_errors,
NULL,
};
@@ -1058,6 +1085,33 @@ static struct notifier_block netconsole_netdev_notifier = {
.notifier_call = netconsole_netdev_event,
};
+/**
+ * send_udp - Wrapper for netpoll_send_udp that counts errors
+ * @nt: target to send message to
+ * @msg: message to send
+ * @len: length of message
+ *
+ * Calls netpoll_send_udp and classifies the return value. If an error
+ * occurred it increments statistics in nt->stats accordingly.
+ * Only calls netpoll_send_udp if CONFIG_NETCONSOLE_DYNAMIC is disabled.
+ */
+static void send_udp(struct netconsole_target *nt, const char *msg, int len)
+{
+ int result = netpoll_send_udp(&nt->np, msg, len);
+
+ if (IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) {
+ if (result == NET_XMIT_DROP) {
+ u64_stats_update_begin(&nt->stats.syncp);
+ u64_stats_inc(&nt->stats.xmit_drop_count);
+ u64_stats_update_end(&nt->stats.syncp);
+ } else if (result == -ENOMEM) {
+ u64_stats_update_begin(&nt->stats.syncp);
+ u64_stats_inc(&nt->stats.enomem_count);
+ u64_stats_update_end(&nt->stats.syncp);
+ }
+ }
+}
+
static void send_msg_no_fragmentation(struct netconsole_target *nt,
const char *msg,
int msg_len,
@@ -1085,7 +1139,7 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt,
MAX_PRINT_CHUNK - msg_len,
"%s", userdata);
- netpoll_send_udp(&nt->np, buf, msg_len);
+ send_udp(nt, buf, msg_len);
}
static void append_release(char *buf)
@@ -1178,7 +1232,7 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf,
this_offset += this_chunk;
}
- netpoll_send_udp(&nt->np, buf, this_header + this_offset);
+ send_udp(nt, buf, this_header + this_offset);
offset += this_offset;
}
}
@@ -1288,7 +1342,7 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
tmp = msg;
for (left = len; left;) {
frag = min(left, MAX_PRINT_CHUNK);
- netpoll_send_udp(&nt->np, tmp, frag);
+ send_udp(nt, tmp, frag);
tmp += frag;
left -= frag;
}
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index b79aedad855b..767a8c0714ac 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -35,6 +35,27 @@ enum sgmii_speed {
#define phylink_pcs_to_lynx(pl_pcs) container_of((pl_pcs), struct lynx_pcs, pcs)
#define lynx_to_phylink_pcs(lynx) (&(lynx)->pcs)
+static unsigned int lynx_pcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ switch (interface) {
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+
+ case PHY_INTERFACE_MODE_10GBASER:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ return LINK_INBAND_DISABLE;
+
+ case PHY_INTERFACE_MODE_USXGMII:
+ return LINK_INBAND_ENABLE;
+
+ default:
+ return 0;
+ }
+}
+
static void lynx_pcs_get_state_usxgmii(struct mdio_device *pcs,
struct phylink_link_state *state)
{
@@ -306,6 +327,7 @@ static void lynx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
}
static const struct phylink_pcs_ops lynx_pcs_phylink_ops = {
+ .pcs_inband_caps = lynx_pcs_inband_caps,
.pcs_get_state = lynx_pcs_get_state,
.pcs_config = lynx_pcs_config,
.pcs_an_restart = lynx_pcs_an_restart,
diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c
index 4f63abe638c4..7de804535229 100644
--- a/drivers/net/pcs/pcs-mtk-lynxi.c
+++ b/drivers/net/pcs/pcs-mtk-lynxi.c
@@ -88,6 +88,21 @@ static struct mtk_pcs_lynxi *pcs_to_mtk_pcs_lynxi(struct phylink_pcs *pcs)
return container_of(pcs, struct mtk_pcs_lynxi, pcs);
}
+static unsigned int mtk_pcs_lynxi_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ switch (interface) {
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+
+ default:
+ return 0;
+ }
+}
+
static void mtk_pcs_lynxi_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state)
{
@@ -241,6 +256,7 @@ static void mtk_pcs_lynxi_disable(struct phylink_pcs *pcs)
}
static const struct phylink_pcs_ops mtk_pcs_lynxi_ops = {
+ .pcs_inband_caps = mtk_pcs_lynxi_inband_caps,
.pcs_get_state = mtk_pcs_lynxi_get_state,
.pcs_config = mtk_pcs_lynxi_config,
.pcs_an_restart = mtk_pcs_lynxi_restart_an,
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 7246a910728d..f70ca39f0905 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -567,6 +567,33 @@ static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
return 0;
}
+static unsigned int xpcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
+ const struct dw_xpcs_compat *compat;
+
+ compat = xpcs_find_compat(xpcs, interface);
+ if (!compat)
+ return 0;
+
+ switch (compat->an_mode) {
+ case DW_AN_C73:
+ return LINK_INBAND_ENABLE;
+
+ case DW_AN_C37_SGMII:
+ case DW_AN_C37_1000BASEX:
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE;
+
+ case DW_10GBASER:
+ case DW_2500BASEX:
+ return LINK_INBAND_DISABLE;
+
+ default:
+ return 0;
+ }
+}
+
void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
{
const struct dw_xpcs_compat *compat;
@@ -1306,6 +1333,7 @@ static const struct dw_xpcs_desc xpcs_desc_list[] = {
static const struct phylink_pcs_ops xpcs_phylink_ops = {
.pcs_validate = xpcs_validate,
+ .pcs_inband_caps = xpcs_inband_caps,
.pcs_pre_config = xpcs_pre_config,
.pcs_config = xpcs_config,
.pcs_get_state = xpcs_get_state,
diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c
index 97da3aee4942..47405bded677 100644
--- a/drivers/net/phy/bcm84881.c
+++ b/drivers/net/phy/bcm84881.c
@@ -235,11 +235,21 @@ static int bcm84881_read_status(struct phy_device *phydev)
return genphy_c45_read_mdix(phydev);
}
+/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII
+ * or 802.3z control word, so inband will not work.
+ */
+static unsigned int bcm84881_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface)
+{
+ return LINK_INBAND_DISABLE;
+}
+
static struct phy_driver bcm84881_drivers[] = {
{
.phy_id = 0xae025150,
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM84881",
+ .inband_caps = bcm84881_inband_caps,
.config_init = bcm84881_config_init,
.probe = bcm84881_probe,
.get_features = bcm84881_get_features,
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index cf8b6d0bfaa9..25ee09c48027 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -22,8 +22,6 @@
#define DP83826C_PHY_ID 0x2000a130
#define DP83826NC_PHY_ID 0x2000a110
-#define DP83822_DEVADDR 0x1f
-
#define MII_DP83822_CTRL_2 0x0a
#define MII_DP83822_PHYSTS 0x10
#define MII_DP83822_PHYSCR 0x11
@@ -159,14 +157,14 @@ static int dp83822_config_wol(struct phy_device *phydev,
/* MAC addresses start with byte 5, but stored in mac[0].
* 822 PHYs store bytes 4|5, 2|3, 0|1
*/
- phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA1,
+ phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_DA1,
(mac[1] << 8) | mac[0]);
- phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA2,
+ phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_DA2,
(mac[3] << 8) | mac[2]);
- phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA3,
+ phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_DA3,
(mac[5] << 8) | mac[4]);
- value = phy_read_mmd(phydev, DP83822_DEVADDR,
+ value = phy_read_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_WOL_CFG);
if (wol->wolopts & WAKE_MAGIC)
value |= DP83822_WOL_MAGIC_EN;
@@ -174,13 +172,13 @@ static int dp83822_config_wol(struct phy_device *phydev,
value &= ~DP83822_WOL_MAGIC_EN;
if (wol->wolopts & WAKE_MAGICSECURE) {
- phy_write_mmd(phydev, DP83822_DEVADDR,
+ phy_write_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RXSOP1,
(wol->sopass[1] << 8) | wol->sopass[0]);
- phy_write_mmd(phydev, DP83822_DEVADDR,
+ phy_write_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RXSOP2,
(wol->sopass[3] << 8) | wol->sopass[2]);
- phy_write_mmd(phydev, DP83822_DEVADDR,
+ phy_write_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RXSOP3,
(wol->sopass[5] << 8) | wol->sopass[4]);
value |= DP83822_WOL_SECURE_ON;
@@ -194,10 +192,10 @@ static int dp83822_config_wol(struct phy_device *phydev,
value |= DP83822_WOL_EN | DP83822_WOL_INDICATION_SEL |
DP83822_WOL_CLR_INDICATION;
- return phy_write_mmd(phydev, DP83822_DEVADDR,
+ return phy_write_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_WOL_CFG, value);
} else {
- return phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
+ return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_WOL_CFG,
DP83822_WOL_EN |
DP83822_WOL_MAGIC_EN |
@@ -226,23 +224,23 @@ static void dp83822_get_wol(struct phy_device *phydev,
wol->supported = (WAKE_MAGIC | WAKE_MAGICSECURE);
wol->wolopts = 0;
- value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
+ value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG);
if (value & DP83822_WOL_MAGIC_EN)
wol->wolopts |= WAKE_MAGIC;
if (value & DP83822_WOL_SECURE_ON) {
- sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR,
+ sopass_val = phy_read_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RXSOP1);
wol->sopass[0] = (sopass_val & 0xff);
wol->sopass[1] = (sopass_val >> 8);
- sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR,
+ sopass_val = phy_read_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RXSOP2);
wol->sopass[2] = (sopass_val & 0xff);
wol->sopass[3] = (sopass_val >> 8);
- sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR,
+ sopass_val = phy_read_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RXSOP3);
wol->sopass[4] = (sopass_val & 0xff);
wol->sopass[5] = (sopass_val >> 8);
@@ -430,18 +428,18 @@ static int dp83822_config_init(struct phy_device *phydev)
if (tx_int_delay <= 0)
rgmii_delay |= DP83822_TX_CLK_SHIFT;
- err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ err = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR,
DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay);
if (err)
return err;
- err = phy_set_bits_mmd(phydev, DP83822_DEVADDR,
+ err = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RCSR, DP83822_RGMII_MODE_EN);
if (err)
return err;
} else {
- err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
+ err = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_RCSR, DP83822_RGMII_MODE_EN);
if (err)
@@ -496,7 +494,7 @@ static int dp83822_config_init(struct phy_device *phydev)
return err;
if (dp83822->fx_signal_det_low) {
- err = phy_set_bits_mmd(phydev, DP83822_DEVADDR,
+ err = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
MII_DP83822_GENCFG,
DP83822_SIG_DET_LOW);
if (err)
@@ -514,10 +512,10 @@ static int dp8382x_config_rmii_mode(struct phy_device *phydev)
if (!device_property_read_string(dev, "ti,rmii-mode", &of_val)) {
if (strcmp(of_val, "master") == 0) {
- ret = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR,
DP83822_RMII_MODE_SEL);
} else if (strcmp(of_val, "slave") == 0) {
- ret = phy_set_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR,
DP83822_RMII_MODE_SEL);
} else {
phydev_err(phydev, "Invalid value for ti,rmii-mode property (%s)\n",
@@ -539,7 +537,7 @@ static int dp83826_config_init(struct phy_device *phydev)
int ret;
if (phydev->interface == PHY_INTERFACE_MODE_RMII) {
- ret = phy_set_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR,
DP83822_RMII_MODE_EN);
if (ret)
return ret;
@@ -548,7 +546,7 @@ static int dp83826_config_init(struct phy_device *phydev)
if (ret)
return ret;
} else {
- ret = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR,
+ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR,
DP83822_RMII_MODE_EN);
if (ret)
return ret;
@@ -560,7 +558,7 @@ static int dp83826_config_init(struct phy_device *phydev)
FIELD_GET(DP83826_CFG_DAC_MINUS_MDIX_5_TO_4,
dp83822->cfg_dac_minus));
mask = DP83826_VOD_CFG1_MINUS_MDIX_MASK | DP83826_VOD_CFG1_MINUS_MDI_MASK;
- ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG1, mask, val);
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83826_VOD_CFG1, mask, val);
if (ret)
return ret;
@@ -568,7 +566,7 @@ static int dp83826_config_init(struct phy_device *phydev)
FIELD_GET(DP83826_CFG_DAC_MINUS_MDIX_3_TO_0,
dp83822->cfg_dac_minus));
mask = DP83826_VOD_CFG2_MINUS_MDIX_MASK;
- ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG2, mask, val);
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83826_VOD_CFG2, mask, val);
if (ret)
return ret;
}
@@ -577,7 +575,7 @@ static int dp83826_config_init(struct phy_device *phydev)
val = FIELD_PREP(DP83826_VOD_CFG2_PLUS_MDIX_MASK, dp83822->cfg_dac_plus) |
FIELD_PREP(DP83826_VOD_CFG2_PLUS_MDI_MASK, dp83822->cfg_dac_plus);
mask = DP83826_VOD_CFG2_PLUS_MDIX_MASK | DP83826_VOD_CFG2_PLUS_MDI_MASK;
- ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG2, mask, val);
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83826_VOD_CFG2, mask, val);
if (ret)
return ret;
}
@@ -673,7 +671,7 @@ static int dp83822_read_straps(struct phy_device *phydev)
int fx_enabled, fx_sd_enable;
int val;
- val = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_SOR1);
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_SOR1);
if (val < 0)
return val;
@@ -748,7 +746,7 @@ static int dp83822_suspend(struct phy_device *phydev)
{
int value;
- value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
+ value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG);
if (!(value & DP83822_WOL_EN))
genphy_suspend(phydev);
@@ -762,9 +760,9 @@ static int dp83822_resume(struct phy_device *phydev)
genphy_resume(phydev);
- value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
+ value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG);
- phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, value |
+ phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG, value |
DP83822_WOL_CLR_INDICATION);
return 0;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index cd50cd6a7f75..ffe223ad9e5f 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -717,6 +717,48 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev)
return genphy_check_and_restart_aneg(phydev, changed);
}
+static unsigned int m88e1111_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface)
+{
+ /* In 1000base-X and SGMII modes, the inband mode can be changed
+ * through the Fibre page BMCR ANENABLE bit.
+ */
+ if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+ interface == PHY_INTERFACE_MODE_SGMII)
+ return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE |
+ LINK_INBAND_BYPASS;
+
+ return 0;
+}
+
+static int m88e1111_config_inband(struct phy_device *phydev, unsigned int modes)
+{
+ u16 extsr, bmcr;
+ int err;
+
+ if (phydev->interface != PHY_INTERFACE_MODE_1000BASEX &&
+ phydev->interface != PHY_INTERFACE_MODE_SGMII)
+ return -EINVAL;
+
+ if (modes == LINK_INBAND_BYPASS)
+ extsr = MII_M1111_HWCFG_SERIAL_AN_BYPASS;
+ else
+ extsr = 0;
+
+ if (modes == LINK_INBAND_DISABLE)
+ bmcr = 0;
+ else
+ bmcr = BMCR_ANENABLE;
+
+ err = phy_modify(phydev, MII_M1111_PHY_EXT_SR,
+ MII_M1111_HWCFG_SERIAL_AN_BYPASS, extsr);
+ if (err < 0)
+ return extsr;
+
+ return phy_modify_paged(phydev, MII_MARVELL_FIBER_PAGE, MII_BMCR,
+ BMCR_ANENABLE, bmcr);
+}
+
static int m88e1111_config_aneg(struct phy_device *phydev)
{
int extsr = phy_read(phydev, MII_M1111_PHY_EXT_SR);
@@ -1508,7 +1550,6 @@ static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs)
static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs)
{
- struct ethtool_keee eee;
int val, ret;
if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF)
@@ -1518,8 +1559,7 @@ static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs)
/* According to the Marvell data sheet EEE must be disabled for
* Fast Link Down detection to work properly
*/
- ret = genphy_c45_ethtool_get_eee(phydev, &eee);
- if (!ret && eee.eee_enabled) {
+ if (phydev->eee_cfg.eee_enabled) {
phydev_warn(phydev, "Fast Link Down detection requires EEE to be disabled!\n");
return -EBUSY;
}
@@ -3677,6 +3717,8 @@ static struct phy_driver marvell_drivers[] = {
.name = "Marvell 88E1112",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
+ .inband_caps = m88e1111_inband_caps,
+ .config_inband = m88e1111_config_inband,
.config_init = m88e1112_config_init,
.config_aneg = marvell_config_aneg,
.config_intr = marvell_config_intr,
@@ -3698,6 +3740,8 @@ static struct phy_driver marvell_drivers[] = {
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
+ .inband_caps = m88e1111_inband_caps,
+ .config_inband = m88e1111_config_inband,
.config_init = m88e1111gbe_config_init,
.config_aneg = m88e1111_config_aneg,
.read_status = marvell_read_status,
@@ -3721,6 +3765,8 @@ static struct phy_driver marvell_drivers[] = {
.name = "Marvell 88E1111 (Finisar)",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
+ .inband_caps = m88e1111_inband_caps,
+ .config_inband = m88e1111_config_inband,
.config_init = m88e1111gbe_config_init,
.config_aneg = m88e1111_config_aneg,
.read_status = marvell_read_status,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 944ae98ad110..0dac08e85304 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -1469,18 +1469,17 @@ EXPORT_SYMBOL_GPL(genphy_c45_plca_get_status);
* @phydev: target phy_device struct
* @adv: variable to store advertised linkmodes
* @lp: variable to store LP advertised linkmodes
- * @is_enabled: variable to store EEE enabled/disabled configuration value
*
* Description: this function will read local and link partner PHY
* advertisements. Compare them return current EEE state.
*/
int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
- unsigned long *lp, bool *is_enabled)
+ unsigned long *lp)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_adv) = {};
__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_lp) = {};
__ETHTOOL_DECLARE_LINK_MODE_MASK(common);
- bool eee_enabled, eee_active;
+ bool eee_active;
int ret;
ret = genphy_c45_read_eee_adv(phydev, tmp_adv);
@@ -1491,9 +1490,8 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
if (ret)
return ret;
- eee_enabled = !linkmode_empty(tmp_adv);
linkmode_and(common, tmp_adv, tmp_lp);
- if (eee_enabled && !linkmode_empty(common))
+ if (!linkmode_empty(tmp_adv) && !linkmode_empty(common))
eee_active = phy_check_valid(phydev->speed, phydev->duplex,
common);
else
@@ -1503,8 +1501,6 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
linkmode_copy(adv, tmp_adv);
if (lp)
linkmode_copy(lp, tmp_lp);
- if (is_enabled)
- *is_enabled = eee_enabled;
return eee_active;
}
@@ -1521,15 +1517,13 @@ EXPORT_SYMBOL(genphy_c45_eee_is_active);
int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
struct ethtool_keee *data)
{
- bool is_enabled;
int ret;
ret = genphy_c45_eee_is_active(phydev, data->advertised,
- data->lp_advertised, &is_enabled);
+ data->lp_advertised);
if (ret < 0)
return ret;
- data->eee_enabled = is_enabled;
data->eee_active = phydev->eee_active;
linkmode_copy(data->supported, phydev->supported_eee);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 0d20b534122b..e4b04cdaa995 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -988,8 +988,7 @@ static int phy_check_link_status(struct phy_device *phydev)
if (phydev->link && phydev->state != PHY_RUNNING) {
phy_check_downshift(phydev);
phydev->state = PHY_RUNNING;
- err = genphy_c45_eee_is_active(phydev,
- NULL, NULL, NULL);
+ err = genphy_c45_eee_is_active(phydev, NULL, NULL);
phydev->eee_active = err > 0;
phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled &&
phydev->eee_active;
@@ -1006,6 +1005,59 @@ static int phy_check_link_status(struct phy_device *phydev)
}
/**
+ * phy_inband_caps - query which in-band signalling modes are supported
+ * @phydev: a pointer to a &struct phy_device
+ * @interface: the interface mode for the PHY
+ *
+ * Returns zero if it is unknown what in-band signalling is supported by the
+ * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise,
+ * returns a bit mask of the LINK_INBAND_* values from
+ * &enum link_inband_signalling to describe which inband modes are supported
+ * by the PHY for this interface mode.
+ */
+unsigned int phy_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface)
+{
+ if (phydev->drv && phydev->drv->inband_caps)
+ return phydev->drv->inband_caps(phydev, interface);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(phy_inband_caps);
+
+/**
+ * phy_config_inband - configure the desired PHY in-band mode
+ * @phydev: the phy_device struct
+ * @modes: in-band modes to configure
+ *
+ * Description: disables, enables or enables-with-bypass in-band signalling
+ * between the PHY and host system.
+ *
+ * Returns: zero on success, or negative errno value.
+ */
+int phy_config_inband(struct phy_device *phydev, unsigned int modes)
+{
+ int err;
+
+ if (!!(modes & LINK_INBAND_DISABLE) +
+ !!(modes & LINK_INBAND_ENABLE) +
+ !!(modes & LINK_INBAND_BYPASS) != 1)
+ return -EINVAL;
+
+ mutex_lock(&phydev->lock);
+ if (!phydev->drv)
+ err = -EIO;
+ else if (!phydev->drv->config_inband)
+ err = -EOPNOTSUPP;
+ else
+ err = phydev->drv->config_inband(phydev, modes);
+ mutex_unlock(&phydev->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(phy_config_inband);
+
+/**
* _phy_start_aneg - start auto-negotiation for this PHY device
* @phydev: the phy_device struct
*
@@ -1605,7 +1657,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
if (!phydev->drv)
return -EIO;
- ret = genphy_c45_eee_is_active(phydev, NULL, NULL, NULL);
+ ret = genphy_c45_eee_is_active(phydev, NULL, NULL);
if (ret < 0)
return ret;
if (!ret)
@@ -1649,8 +1701,8 @@ EXPORT_SYMBOL(phy_get_eee_err);
* @phydev: target phy_device struct
* @data: ethtool_keee data
*
- * Description: reports the Supported/Advertisement/LP Advertisement
- * capabilities, etc.
+ * Description: get the current EEE settings, filling in all members of
+ * @data.
*/
int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data)
{
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 30a654e98352..95fbc363f9a6 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -56,7 +56,8 @@ struct phylink {
struct phy_device *phydev;
phy_interface_t link_interface; /* PHY_INTERFACE_xxx */
u8 cfg_link_an_mode; /* MLO_AN_xxx */
- u8 cur_link_an_mode;
+ u8 req_link_an_mode; /* Requested MLO_AN_xxx mode */
+ u8 act_link_an_mode; /* Active MLO_AN_xxx mode */
u8 link_port; /* The current non-phy ethtool port */
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
@@ -74,6 +75,7 @@ struct phylink {
struct mutex state_mutex;
struct phylink_link_state phy_state;
+ unsigned int phy_ib_mode;
struct work_struct resolve;
unsigned int pcs_neg_mode;
unsigned int pcs_state;
@@ -174,6 +176,24 @@ static const char *phylink_an_mode_str(unsigned int mode)
return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
}
+static const char *phylink_pcs_mode_str(unsigned int mode)
+{
+ if (!mode)
+ return "none";
+
+ if (mode & PHYLINK_PCS_NEG_OUTBAND)
+ return "outband";
+
+ if (mode & PHYLINK_PCS_NEG_INBAND) {
+ if (mode & PHYLINK_PCS_NEG_ENABLED)
+ return "inband,an-enabled";
+ else
+ return "inband,an-disabled";
+ }
+
+ return "unknown";
+}
+
static unsigned int phylink_interface_signal_rate(phy_interface_t interface)
{
switch (interface) {
@@ -971,6 +991,15 @@ static void phylink_resolve_an_pause(struct phylink_link_state *state)
}
}
+static unsigned int phylink_pcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface)
+{
+ if (pcs && pcs->ops->pcs_inband_caps)
+ return pcs->ops->pcs_inband_caps(pcs, interface);
+
+ return 0;
+}
+
static void phylink_pcs_pre_config(struct phylink_pcs *pcs,
phy_interface_t interface)
{
@@ -1024,6 +1053,24 @@ static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
pcs->ops->pcs_link_up(pcs, neg_mode, interface, speed, duplex);
}
+/* Query inband for a specific interface mode, asking the MAC for the
+ * PCS which will be used to handle the interface mode.
+ */
+static unsigned int phylink_inband_caps(struct phylink *pl,
+ phy_interface_t interface)
+{
+ struct phylink_pcs *pcs;
+
+ if (!pl->mac_ops->mac_select_pcs)
+ return 0;
+
+ pcs = pl->mac_ops->mac_select_pcs(pl->config, interface);
+ if (!pcs)
+ return 0;
+
+ return phylink_pcs_inband_caps(pcs, interface);
+}
+
static void phylink_pcs_poll_stop(struct phylink *pl)
{
if (pl->cfg_link_an_mode == MLO_AN_INBAND)
@@ -1065,13 +1112,13 @@ static void phylink_mac_config(struct phylink *pl,
phylink_dbg(pl,
"%s: mode=%s/%s/%s adv=%*pb pause=%02x\n",
- __func__, phylink_an_mode_str(pl->cur_link_an_mode),
+ __func__, phylink_an_mode_str(pl->act_link_an_mode),
phy_modes(st.interface),
phy_rate_matching_to_str(st.rate_matching),
__ETHTOOL_LINK_MODE_MASK_NBITS, st.advertising,
st.pause);
- pl->mac_ops->mac_config(pl->config, pl->cur_link_an_mode, &st);
+ pl->mac_ops->mac_config(pl->config, pl->act_link_an_mode, &st);
}
static void phylink_pcs_an_restart(struct phylink *pl)
@@ -1079,13 +1126,14 @@ static void phylink_pcs_an_restart(struct phylink *pl)
if (pl->pcs && linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
pl->link_config.advertising) &&
phy_interface_mode_is_8023z(pl->link_config.interface) &&
- phylink_autoneg_inband(pl->cur_link_an_mode))
+ phylink_autoneg_inband(pl->act_link_an_mode))
pl->pcs->ops->pcs_an_restart(pl->pcs);
}
/**
* phylink_pcs_neg_mode() - helper to determine PCS inband mode
- * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @pcs: a pointer to &struct phylink_pcs
* @interface: interface mode to be used
* @advertising: adertisement ethtool link mode mask
*
@@ -1102,11 +1150,21 @@ static void phylink_pcs_an_restart(struct phylink *pl)
* Note: this is for cases where the PCS itself is involved in negotiation
* (e.g. Clause 37, SGMII and similar) not Clause 73.
*/
-static unsigned int phylink_pcs_neg_mode(unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertising)
+static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs,
+ phy_interface_t interface,
+ const unsigned long *advertising)
{
- unsigned int neg_mode;
+ unsigned int pcs_ib_caps = 0;
+ unsigned int phy_ib_caps = 0;
+ unsigned int neg_mode, mode;
+ enum {
+ INBAND_CISCO_SGMII,
+ INBAND_BASEX,
+ } type;
+
+ mode = pl->req_link_an_mode;
+
+ pl->phy_ib_mode = 0;
switch (interface) {
case PHY_INTERFACE_MODE_SGMII:
@@ -1119,10 +1177,7 @@ static unsigned int phylink_pcs_neg_mode(unsigned int mode,
* inband communication. Note: there exist PHYs that run
* with SGMII but do not send the inband data.
*/
- if (!phylink_autoneg_inband(mode))
- neg_mode = PHYLINK_PCS_NEG_OUTBAND;
- else
- neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
+ type = INBAND_CISCO_SGMII;
break;
case PHY_INTERFACE_MODE_1000BASEX:
@@ -1133,21 +1188,143 @@ static unsigned int phylink_pcs_neg_mode(unsigned int mode,
* as well, but drivers may not support this, so may
* need to override this.
*/
- if (!phylink_autoneg_inband(mode))
+ type = INBAND_BASEX;
+ break;
+
+ default:
+ pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE;
+ pl->act_link_an_mode = mode;
+ return;
+ }
+
+ if (pcs)
+ pcs_ib_caps = phylink_pcs_inband_caps(pcs, interface);
+
+ if (pl->phydev)
+ phy_ib_caps = phy_inband_caps(pl->phydev, interface);
+
+ phylink_dbg(pl, "interface %s inband modes: pcs=%02x phy=%02x\n",
+ phy_modes(interface), pcs_ib_caps, phy_ib_caps);
+
+ if (!phylink_autoneg_inband(mode)) {
+ bool pcs_ib_only = false;
+ bool phy_ib_only = false;
+
+ if (pcs_ib_caps && pcs_ib_caps != LINK_INBAND_DISABLE) {
+ /* PCS supports reporting in-band capabilities, and
+ * supports more than disable mode.
+ */
+ if (pcs_ib_caps & LINK_INBAND_DISABLE)
+ neg_mode = PHYLINK_PCS_NEG_OUTBAND;
+ else if (pcs_ib_caps & LINK_INBAND_ENABLE)
+ pcs_ib_only = true;
+ }
+
+ if (phy_ib_caps && phy_ib_caps != LINK_INBAND_DISABLE) {
+ /* PHY supports in-band capabilities, and supports
+ * more than disable mode.
+ */
+ if (phy_ib_caps & LINK_INBAND_DISABLE)
+ pl->phy_ib_mode = LINK_INBAND_DISABLE;
+ else if (phy_ib_caps & LINK_INBAND_BYPASS)
+ pl->phy_ib_mode = LINK_INBAND_BYPASS;
+ else if (phy_ib_caps & LINK_INBAND_ENABLE)
+ phy_ib_only = true;
+ }
+
+ /* If either the PCS or PHY requires inband to be enabled,
+ * this is an invalid configuration. Provide a diagnostic
+ * message for this case, but don't try to force the issue.
+ */
+ if (pcs_ib_only || phy_ib_only)
+ phylink_warn(pl,
+ "firmware wants %s mode, but %s%s%s requires inband\n",
+ phylink_an_mode_str(mode),
+ pcs_ib_only ? "PCS" : "",
+ pcs_ib_only && phy_ib_only ? " and " : "",
+ phy_ib_only ? "PHY" : "");
+
+ neg_mode = PHYLINK_PCS_NEG_OUTBAND;
+ } else if (type == INBAND_CISCO_SGMII || pl->phydev) {
+ /* For SGMII modes which are designed to be used with PHYs, or
+ * Base-X with a PHY, we try to use in-band mode where-ever
+ * possible. However, there are some PHYs e.g. BCM84881 which
+ * do not support in-band.
+ */
+ const unsigned int inband_ok = LINK_INBAND_ENABLE |
+ LINK_INBAND_BYPASS;
+ const unsigned int outband_ok = LINK_INBAND_DISABLE |
+ LINK_INBAND_BYPASS;
+ /* PCS PHY
+ * D E D E
+ * 0 0 0 0 no information inband enabled
+ * 1 0 0 0 pcs doesn't support outband
+ * 0 1 0 0 pcs required inband enabled
+ * 1 1 0 0 pcs optional inband enabled
+ * 0 0 1 0 phy doesn't support outband
+ * 1 0 1 0 pcs+phy doesn't support outband
+ * 0 1 1 0 pcs required, phy doesn't support, invalid
+ * 1 1 1 0 pcs optional, phy doesn't support, outband
+ * 0 0 0 1 phy required inband enabled
+ * 1 0 0 1 pcs doesn't support, phy required, invalid
+ * 0 1 0 1 pcs+phy required inband enabled
+ * 1 1 0 1 pcs optional, phy required inband enabled
+ * 0 0 1 1 phy optional inband enabled
+ * 1 0 1 1 pcs doesn't support, phy optional, outband
+ * 0 1 1 1 pcs required, phy optional inband enabled
+ * 1 1 1 1 pcs+phy optional inband enabled
+ */
+ if ((!pcs_ib_caps || pcs_ib_caps & inband_ok) &&
+ (!phy_ib_caps || phy_ib_caps & inband_ok)) {
+ /* In-band supported or unknown at both ends. Enable
+ * in-band mode with or without bypass at the PHY.
+ */
+ if (phy_ib_caps & LINK_INBAND_ENABLE)
+ pl->phy_ib_mode = LINK_INBAND_ENABLE;
+ else if (phy_ib_caps & LINK_INBAND_BYPASS)
+ pl->phy_ib_mode = LINK_INBAND_BYPASS;
+
+ neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
+ } else if ((!pcs_ib_caps || pcs_ib_caps & outband_ok) &&
+ (!phy_ib_caps || phy_ib_caps & outband_ok)) {
+ /* Either in-band not supported at at least one end.
+ * In-band bypass at the other end is possible.
+ */
+ if (phy_ib_caps & LINK_INBAND_DISABLE)
+ pl->phy_ib_mode = LINK_INBAND_DISABLE;
+ else if (phy_ib_caps & LINK_INBAND_BYPASS)
+ pl->phy_ib_mode = LINK_INBAND_BYPASS;
+
neg_mode = PHYLINK_PCS_NEG_OUTBAND;
+ if (pl->phydev)
+ mode = MLO_AN_PHY;
+ } else {
+ /* invalid */
+ phylink_warn(pl, "%s: incompatible in-band capabilities, trying in-band",
+ phy_modes(interface));
+ neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
+ }
+ } else {
+ /* For Base-X without a PHY */
+ if (pcs_ib_caps == LINK_INBAND_DISABLE)
+ /* If the PCS doesn't support inband, then inband must
+ * be disabled.
+ */
+ neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED;
+ else if (pcs_ib_caps == LINK_INBAND_ENABLE)
+ /* If the PCS requires inband, then inband must always
+ * be enabled.
+ */
+ neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
advertising))
neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
else
neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED;
- break;
-
- default:
- neg_mode = PHYLINK_PCS_NEG_NONE;
- break;
}
- return neg_mode;
+ pl->pcs_neg_mode = neg_mode;
+ pl->act_link_an_mode = mode;
}
static void phylink_major_config(struct phylink *pl, bool restart,
@@ -1159,11 +1336,9 @@ static void phylink_major_config(struct phylink *pl, bool restart,
unsigned int neg_mode;
int err;
- phylink_dbg(pl, "major config %s\n", phy_modes(state->interface));
-
- pl->pcs_neg_mode = phylink_pcs_neg_mode(pl->cur_link_an_mode,
- state->interface,
- state->advertising);
+ phylink_dbg(pl, "major config, requested %s/%s\n",
+ phylink_an_mode_str(pl->req_link_an_mode),
+ phy_modes(state->interface));
if (pl->mac_ops->mac_select_pcs) {
pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface);
@@ -1177,10 +1352,17 @@ static void phylink_major_config(struct phylink *pl, bool restart,
pcs_changed = pl->pcs != pcs;
}
+ phylink_pcs_neg_mode(pl, pcs, state->interface, state->advertising);
+
+ phylink_dbg(pl, "major config, active %s/%s/%s\n",
+ phylink_an_mode_str(pl->act_link_an_mode),
+ phylink_pcs_mode_str(pl->pcs_neg_mode),
+ phy_modes(state->interface));
+
phylink_pcs_poll_stop(pl);
if (pl->mac_ops->mac_prepare) {
- err = pl->mac_ops->mac_prepare(pl->config, pl->cur_link_an_mode,
+ err = pl->mac_ops->mac_prepare(pl->config, pl->act_link_an_mode,
state->interface);
if (err < 0) {
phylink_err(pl, "mac_prepare failed: %pe\n",
@@ -1214,7 +1396,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed)
phylink_pcs_enable(pl->pcs);
- neg_mode = pl->cur_link_an_mode;
+ neg_mode = pl->act_link_an_mode;
if (pl->pcs && pl->pcs->neg_mode)
neg_mode = pl->pcs_neg_mode;
@@ -1230,13 +1412,20 @@ static void phylink_major_config(struct phylink *pl, bool restart,
phylink_pcs_an_restart(pl);
if (pl->mac_ops->mac_finish) {
- err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode,
+ err = pl->mac_ops->mac_finish(pl->config, pl->act_link_an_mode,
state->interface);
if (err < 0)
phylink_err(pl, "mac_finish failed: %pe\n",
ERR_PTR(err));
}
+ if (pl->phydev && pl->phy_ib_mode) {
+ err = phy_config_inband(pl->phydev, pl->phy_ib_mode);
+ if (err < 0)
+ phylink_err(pl, "phy_config_inband: %pe\n",
+ ERR_PTR(err));
+ }
+
if (pl->sfp_bus) {
rate_kbd = phylink_interface_signal_rate(state->interface);
if (rate_kbd)
@@ -1261,17 +1450,16 @@ static int phylink_change_inband_advert(struct phylink *pl)
return 0;
phylink_dbg(pl, "%s: mode=%s/%s adv=%*pb pause=%02x\n", __func__,
- phylink_an_mode_str(pl->cur_link_an_mode),
+ phylink_an_mode_str(pl->req_link_an_mode),
phy_modes(pl->link_config.interface),
__ETHTOOL_LINK_MODE_MASK_NBITS, pl->link_config.advertising,
pl->link_config.pause);
/* Recompute the PCS neg mode */
- pl->pcs_neg_mode = phylink_pcs_neg_mode(pl->cur_link_an_mode,
- pl->link_config.interface,
- pl->link_config.advertising);
+ phylink_pcs_neg_mode(pl, pl->pcs, pl->link_config.interface,
+ pl->link_config.advertising);
- neg_mode = pl->cur_link_an_mode;
+ neg_mode = pl->act_link_an_mode;
if (pl->pcs->neg_mode)
neg_mode = pl->pcs_neg_mode;
@@ -1336,7 +1524,7 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
{
struct phylink_link_state link_state;
- switch (pl->cur_link_an_mode) {
+ switch (pl->req_link_an_mode) {
case MLO_AN_PHY:
link_state = pl->phy_state;
break;
@@ -1410,14 +1598,14 @@ static void phylink_link_up(struct phylink *pl,
pl->cur_interface = link_state.interface;
- neg_mode = pl->cur_link_an_mode;
+ neg_mode = pl->act_link_an_mode;
if (pl->pcs && pl->pcs->neg_mode)
neg_mode = pl->pcs_neg_mode;
phylink_pcs_link_up(pl->pcs, neg_mode, pl->cur_interface, speed,
duplex);
- pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->cur_link_an_mode,
+ pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->act_link_an_mode,
pl->cur_interface, speed, duplex,
!!(link_state.pause & MLO_PAUSE_TX), rx_pause);
@@ -1437,7 +1625,7 @@ static void phylink_link_down(struct phylink *pl)
if (ndev)
netif_carrier_off(ndev);
- pl->mac_ops->mac_link_down(pl->config, pl->cur_link_an_mode,
+ pl->mac_ops->mac_link_down(pl->config, pl->act_link_an_mode,
pl->cur_interface);
phylink_info(pl, "Link is Down\n");
}
@@ -1463,10 +1651,10 @@ static void phylink_resolve(struct work_struct *w)
} else if (pl->link_failed) {
link_state.link = false;
retrigger = true;
- } else if (pl->cur_link_an_mode == MLO_AN_FIXED) {
+ } else if (pl->act_link_an_mode == MLO_AN_FIXED) {
phylink_get_fixed_state(pl, &link_state);
mac_config = link_state.link;
- } else if (pl->cur_link_an_mode == MLO_AN_PHY) {
+ } else if (pl->act_link_an_mode == MLO_AN_PHY) {
link_state = pl->phy_state;
mac_config = link_state.link;
} else {
@@ -1520,7 +1708,7 @@ static void phylink_resolve(struct work_struct *w)
}
}
- if (pl->cur_link_an_mode != MLO_AN_FIXED)
+ if (pl->act_link_an_mode != MLO_AN_FIXED)
phylink_apply_manual_flow(pl, &link_state);
if (mac_config) {
@@ -1644,7 +1832,7 @@ int phylink_set_fixed_link(struct phylink *pl,
pl->link_config.an_complete = 1;
pl->cfg_link_an_mode = MLO_AN_FIXED;
- pl->cur_link_an_mode = pl->cfg_link_an_mode;
+ pl->req_link_an_mode = pl->cfg_link_an_mode;
return 0;
}
@@ -1732,7 +1920,7 @@ struct phylink *phylink_create(struct phylink_config *config,
}
}
- pl->cur_link_an_mode = pl->cfg_link_an_mode;
+ pl->req_link_an_mode = pl->cfg_link_an_mode;
ret = phylink_register_sfp(pl, fwnode);
if (ret < 0) {
@@ -2189,7 +2377,7 @@ void phylink_start(struct phylink *pl)
ASSERT_RTNL();
phylink_info(pl, "configuring for %s/%s link mode\n",
- phylink_an_mode_str(pl->cur_link_an_mode),
+ phylink_an_mode_str(pl->req_link_an_mode),
phy_modes(pl->link_config.interface));
/* Always set the carrier off */
@@ -2474,7 +2662,7 @@ int phylink_ethtool_ksettings_get(struct phylink *pl,
linkmode_copy(kset->link_modes.supported, pl->supported);
- switch (pl->cur_link_an_mode) {
+ switch (pl->act_link_an_mode) {
case MLO_AN_FIXED:
/* We are using fixed settings. Report these as the
* current link settings - and note that these also
@@ -2505,6 +2693,26 @@ int phylink_ethtool_ksettings_get(struct phylink *pl,
}
EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get);
+static bool phylink_validate_pcs_inband_autoneg(struct phylink *pl,
+ phy_interface_t interface,
+ unsigned long *adv)
+{
+ unsigned int inband = phylink_inband_caps(pl, interface);
+ unsigned int mask;
+
+ /* If the PCS doesn't implement inband support, be permissive. */
+ if (!inband)
+ return true;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, adv))
+ mask = LINK_INBAND_ENABLE;
+ else
+ mask = LINK_INBAND_DISABLE;
+
+ /* Check whether the PCS implements the required mode */
+ return !!(inband & mask);
+}
+
/**
* phylink_ethtool_ksettings_set() - set the link settings
* @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -2566,7 +2774,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
/* If we have a fixed link, refuse to change link parameters.
* If the link parameters match, accept them but do nothing.
*/
- if (pl->cur_link_an_mode == MLO_AN_FIXED) {
+ if (pl->req_link_an_mode == MLO_AN_FIXED) {
if (s->speed != pl->link_config.speed ||
s->duplex != pl->link_config.duplex)
return -EINVAL;
@@ -2582,7 +2790,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
* is our default case) but do not allow the advertisement to
* be changed. If the advertisement matches, simply return.
*/
- if (pl->cur_link_an_mode == MLO_AN_FIXED) {
+ if (pl->req_link_an_mode == MLO_AN_FIXED) {
if (!linkmode_equal(config.advertising,
pl->link_config.advertising))
return -EINVAL;
@@ -2617,7 +2825,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
linkmode_copy(support, pl->supported);
if (phylink_validate(pl, support, &config)) {
phylink_err(pl, "validation of %s/%s with support %*pb failed\n",
- phylink_an_mode_str(pl->cur_link_an_mode),
+ phylink_an_mode_str(pl->req_link_an_mode),
phy_modes(config.interface),
__ETHTOOL_LINK_MODE_MASK_NBITS, support);
return -EINVAL;
@@ -2635,6 +2843,13 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
phylink_is_empty_linkmode(config.advertising))
return -EINVAL;
+ /* Validate the autonegotiation state. We don't have a PHY in this
+ * situation, so the PCS is the media-facing entity.
+ */
+ if (!phylink_validate_pcs_inband_autoneg(pl, config.interface,
+ config.advertising))
+ return -EINVAL;
+
mutex_lock(&pl->state_mutex);
pl->link_config.speed = config.speed;
pl->link_config.duplex = config.duplex;
@@ -2717,7 +2932,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
ASSERT_RTNL();
- if (pl->cur_link_an_mode == MLO_AN_FIXED)
+ if (pl->req_link_an_mode == MLO_AN_FIXED)
return -EOPNOTSUPP;
if (!phylink_test(pl->supported, Pause) &&
@@ -2981,7 +3196,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
struct phylink_link_state state;
int val = 0xffff;
- switch (pl->cur_link_an_mode) {
+ switch (pl->act_link_an_mode) {
case MLO_AN_FIXED:
if (phy_id == 0) {
phylink_get_fixed_state(pl, &state);
@@ -3006,7 +3221,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
static int phylink_mii_write(struct phylink *pl, unsigned int phy_id,
unsigned int reg, unsigned int val)
{
- switch (pl->cur_link_an_mode) {
+ switch (pl->act_link_an_mode) {
case MLO_AN_FIXED:
break;
@@ -3176,10 +3391,11 @@ static phy_interface_t phylink_choose_sfp_interface(struct phylink *pl,
return interface;
}
-static void phylink_sfp_set_config(struct phylink *pl, u8 mode,
+static void phylink_sfp_set_config(struct phylink *pl,
unsigned long *supported,
struct phylink_link_state *state)
{
+ u8 mode = MLO_AN_INBAND;
bool changed = false;
phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n",
@@ -3196,9 +3412,9 @@ static void phylink_sfp_set_config(struct phylink *pl, u8 mode,
changed = true;
}
- if (pl->cur_link_an_mode != mode ||
+ if (pl->req_link_an_mode != mode ||
pl->link_config.interface != state->interface) {
- pl->cur_link_an_mode = mode;
+ pl->req_link_an_mode = mode;
pl->link_config.interface = state->interface;
changed = true;
@@ -3213,8 +3429,7 @@ static void phylink_sfp_set_config(struct phylink *pl, u8 mode,
phylink_mac_initial_config(pl, false);
}
-static int phylink_sfp_config_phy(struct phylink *pl, u8 mode,
- struct phy_device *phy)
+static int phylink_sfp_config_phy(struct phylink *pl, struct phy_device *phy)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(support);
struct phylink_link_state config;
@@ -3258,7 +3473,7 @@ static int phylink_sfp_config_phy(struct phylink *pl, u8 mode,
pl->link_port = pl->sfp_port;
- phylink_sfp_set_config(pl, mode, support, &config);
+ phylink_sfp_set_config(pl, support, &config);
return 0;
}
@@ -3314,6 +3529,12 @@ static int phylink_sfp_config_optical(struct phylink *pl)
phylink_dbg(pl, "optical SFP: chosen %s interface\n",
phy_modes(interface));
+ if (!phylink_validate_pcs_inband_autoneg(pl, interface,
+ config.advertising)) {
+ phylink_err(pl, "autoneg setting not compatible with PCS");
+ return -EINVAL;
+ }
+
config.interface = interface;
/* Ignore errors if we're expecting a PHY to attach later */
@@ -3327,7 +3548,7 @@ static int phylink_sfp_config_optical(struct phylink *pl)
pl->link_port = pl->sfp_port;
- phylink_sfp_set_config(pl, MLO_AN_INBAND, pl->sfp_support, &config);
+ phylink_sfp_set_config(pl, pl->sfp_support, &config);
return 0;
}
@@ -3398,19 +3619,9 @@ static void phylink_sfp_link_up(void *upstream)
phylink_enable_and_run_resolve(pl, PHYLINK_DISABLE_LINK);
}
-/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII
- * or 802.3z control word, so inband will not work.
- */
-static bool phylink_phy_no_inband(struct phy_device *phy)
-{
- return phy->is_c45 && phy_id_compare(phy->c45_ids.device_ids[1],
- 0xae025150, 0xfffffff0);
-}
-
static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
{
struct phylink *pl = upstream;
- u8 mode;
/*
* This is the new way of dealing with flow control for PHYs,
@@ -3421,17 +3632,12 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
*/
phy_support_asym_pause(phy);
- if (phylink_phy_no_inband(phy))
- mode = MLO_AN_PHY;
- else
- mode = MLO_AN_INBAND;
-
/* Set the PHY's host supported interfaces */
phy_interface_and(phy->host_interfaces, phylink_sfp_interfaces,
pl->config->supported_interfaces);
/* Do the initial configuration */
- return phylink_sfp_config_phy(pl, mode, phy);
+ return phylink_sfp_config_phy(pl, phy);
}
static void phylink_sfp_disconnect_phy(void *upstream,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d7a865ef370b..8e94df88392c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -574,14 +574,18 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
return ret;
}
-static inline bool tun_not_capable(struct tun_struct *tun)
+static inline bool tun_capable(struct tun_struct *tun)
{
const struct cred *cred = current_cred();
struct net *net = dev_net(tun->dev);
- return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
- (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
- !ns_capable(net->user_ns, CAP_NET_ADMIN);
+ if (ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return 1;
+ if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner))
+ return 1;
+ if (gid_valid(tun->group) && in_egroup_p(tun->group))
+ return 1;
+ return 0;
}
static void tun_set_real_num_queues(struct tun_struct *tun)
@@ -2778,7 +2782,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
!!(tun->flags & IFF_MULTI_QUEUE))
return -EINVAL;
- if (tun_not_capable(tun))
+ if (!tun_capable(tun))
return -EPERM;
err = security_tun_dev_open(tun->security);
if (err < 0)
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 531b1b6a37d1..4661d131b190 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -472,10 +472,6 @@ struct lan78xx_net {
struct irq_domain_data domain_data;
};
-/* define external phy id */
-#define PHY_LAN8835 (0x0007C130)
-#define PHY_KSZ9031RNX (0x00221620)
-
/* use ethtool to change the level for any given device */
static int msg_level = -1;
module_param(msg_level, int, 0);
@@ -625,8 +621,8 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
*data = *buf;
} else if (net_ratelimit()) {
netdev_warn(dev->net,
- "Failed to read register index 0x%08x. ret = %d",
- index, ret);
+ "Failed to read register index 0x%08x. ret = %pe",
+ index, ERR_PTR(ret));
}
kfree(buf);
@@ -656,8 +652,8 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
if (unlikely(ret < 0) &&
net_ratelimit()) {
netdev_warn(dev->net,
- "Failed to write register index 0x%08x. ret = %d",
- index, ret);
+ "Failed to write register index 0x%08x. ret = %pe",
+ index, ERR_PTR(ret));
}
kfree(buf);
@@ -678,11 +674,7 @@ static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask,
buf &= ~mask;
buf |= (mask & data);
- ret = lan78xx_write_reg(dev, reg, buf);
- if (ret < 0)
- return ret;
-
- return 0;
+ return lan78xx_write_reg(dev, reg, buf);
}
static int lan78xx_read_stats(struct lan78xx_net *dev,
@@ -812,8 +804,158 @@ static void lan78xx_update_stats(struct lan78xx_net *dev)
usb_autopm_put_interface(dev->intf);
}
+static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable)
+{
+ return lan78xx_update_reg(dev, reg, hw_enable, hw_enable);
+}
+
+static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled,
+ u32 hw_disabled)
+{
+ unsigned long timeout;
+ bool stopped = true;
+ int ret;
+ u32 buf;
+
+ /* Stop the h/w block (if not already stopped) */
+
+ ret = lan78xx_read_reg(dev, reg, &buf);
+ if (ret < 0)
+ return ret;
+
+ if (buf & hw_enabled) {
+ buf &= ~hw_enabled;
+
+ ret = lan78xx_write_reg(dev, reg, buf);
+ if (ret < 0)
+ return ret;
+
+ stopped = false;
+ timeout = jiffies + HW_DISABLE_TIMEOUT;
+ do {
+ ret = lan78xx_read_reg(dev, reg, &buf);
+ if (ret < 0)
+ return ret;
+
+ if (buf & hw_disabled)
+ stopped = true;
+ else
+ msleep(HW_DISABLE_DELAY_MS);
+ } while (!stopped && !time_after(jiffies, timeout));
+ }
+
+ ret = stopped ? 0 : -ETIME;
+
+ return ret;
+}
+
+static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush)
+{
+ return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush);
+}
+
+static int lan78xx_start_tx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "start tx path");
+
+ /* Start the MAC transmitter */
+
+ ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_);
+ if (ret < 0)
+ return ret;
+
+ /* Start the Tx FIFO */
+
+ ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int lan78xx_stop_tx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "stop tx path");
+
+ /* Stop the Tx FIFO */
+
+ ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_);
+ if (ret < 0)
+ return ret;
+
+ /* Stop the MAC transmitter */
+
+ ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/* The caller must ensure the Tx path is stopped before calling
+ * lan78xx_flush_tx_fifo().
+ */
+static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev)
+{
+ return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_);
+}
+
+static int lan78xx_start_rx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "start rx path");
+
+ /* Start the Rx FIFO */
+
+ ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_);
+ if (ret < 0)
+ return ret;
+
+ /* Start the MAC receiver*/
+
+ ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int lan78xx_stop_rx_path(struct lan78xx_net *dev)
+{
+ int ret;
+
+ netif_dbg(dev, drv, dev->net, "stop rx path");
+
+ /* Stop the MAC receiver */
+
+ ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_);
+ if (ret < 0)
+ return ret;
+
+ /* Stop the Rx FIFO */
+
+ ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/* The caller must ensure the Rx path is stopped before calling
+ * lan78xx_flush_rx_fifo().
+ */
+static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev)
+{
+ return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_);
+}
+
/* Loop until the read is completed with timeout called with phy_mutex held */
-static int lan78xx_phy_wait_not_busy(struct lan78xx_net *dev)
+static int lan78xx_mdiobus_wait_not_busy(struct lan78xx_net *dev)
{
unsigned long start_time = jiffies;
u32 val;
@@ -821,14 +963,14 @@ static int lan78xx_phy_wait_not_busy(struct lan78xx_net *dev)
do {
ret = lan78xx_read_reg(dev, MII_ACC, &val);
- if (unlikely(ret < 0))
- return -EIO;
+ if (ret < 0)
+ return ret;
if (!(val & MII_ACC_MII_BUSY_))
return 0;
} while (!time_after(jiffies, start_time + HZ));
- return -EIO;
+ return -ETIMEDOUT;
}
static inline u32 mii_access(int id, int index, int read)
@@ -854,8 +996,8 @@ static int lan78xx_wait_eeprom(struct lan78xx_net *dev)
do {
ret = lan78xx_read_reg(dev, E2P_CMD, &val);
- if (unlikely(ret < 0))
- return -EIO;
+ if (ret < 0)
+ return ret;
if (!(val & E2P_CMD_EPC_BUSY_) ||
(val & E2P_CMD_EPC_TIMEOUT_))
@@ -865,7 +1007,7 @@ static int lan78xx_wait_eeprom(struct lan78xx_net *dev)
if (val & (E2P_CMD_EPC_TIMEOUT_ | E2P_CMD_EPC_BUSY_)) {
netdev_warn(dev->net, "EEPROM read operation timeout");
- return -EIO;
+ return -ETIMEDOUT;
}
return 0;
@@ -879,8 +1021,8 @@ static int lan78xx_eeprom_confirm_not_busy(struct lan78xx_net *dev)
do {
ret = lan78xx_read_reg(dev, E2P_CMD, &val);
- if (unlikely(ret < 0))
- return -EIO;
+ if (ret < 0)
+ return ret;
if (!(val & E2P_CMD_EPC_BUSY_))
return 0;
@@ -889,75 +1031,81 @@ static int lan78xx_eeprom_confirm_not_busy(struct lan78xx_net *dev)
} while (!time_after(jiffies, start_time + HZ));
netdev_warn(dev->net, "EEPROM is busy");
- return -EIO;
+ return -ETIMEDOUT;
}
static int lan78xx_read_raw_eeprom(struct lan78xx_net *dev, u32 offset,
u32 length, u8 *data)
{
- u32 val;
- u32 saved;
+ u32 val, saved;
int i, ret;
- int retval;
/* depends on chip, some EEPROM pins are muxed with LED function.
* disable & restore LED function to access EEPROM.
*/
ret = lan78xx_read_reg(dev, HW_CFG, &val);
+ if (ret < 0)
+ return ret;
+
saved = val;
if (dev->chipid == ID_REV_CHIP_ID_7800_) {
val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
ret = lan78xx_write_reg(dev, HW_CFG, val);
+ if (ret < 0)
+ return ret;
}
- retval = lan78xx_eeprom_confirm_not_busy(dev);
- if (retval)
- return retval;
+ ret = lan78xx_eeprom_confirm_not_busy(dev);
+ if (ret == -ETIMEDOUT)
+ goto read_raw_eeprom_done;
+ /* If USB fails, there is nothing to do */
+ if (ret < 0)
+ return ret;
for (i = 0; i < length; i++) {
val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_;
val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
ret = lan78xx_write_reg(dev, E2P_CMD, val);
- if (unlikely(ret < 0)) {
- retval = -EIO;
- goto exit;
- }
+ if (ret < 0)
+ return ret;
- retval = lan78xx_wait_eeprom(dev);
- if (retval < 0)
- goto exit;
+ ret = lan78xx_wait_eeprom(dev);
+ /* Looks like not USB specific error, try to recover */
+ if (ret == -ETIMEDOUT)
+ goto read_raw_eeprom_done;
+ /* If USB fails, there is nothing to do */
+ if (ret < 0)
+ return ret;
ret = lan78xx_read_reg(dev, E2P_DATA, &val);
- if (unlikely(ret < 0)) {
- retval = -EIO;
- goto exit;
- }
+ if (ret < 0)
+ return ret;
data[i] = val & 0xFF;
offset++;
}
- retval = 0;
-exit:
+read_raw_eeprom_done:
if (dev->chipid == ID_REV_CHIP_ID_7800_)
- ret = lan78xx_write_reg(dev, HW_CFG, saved);
+ return lan78xx_write_reg(dev, HW_CFG, saved);
- return retval;
+ return 0;
}
static int lan78xx_read_eeprom(struct lan78xx_net *dev, u32 offset,
u32 length, u8 *data)
{
- u8 sig;
int ret;
+ u8 sig;
ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
- if ((ret == 0) && (sig == EEPROM_INDICATOR))
- ret = lan78xx_read_raw_eeprom(dev, offset, length, data);
- else
- ret = -EINVAL;
+ if (ret < 0)
+ return ret;
- return ret;
+ if (sig != EEPROM_INDICATOR)
+ return -ENODATA;
+
+ return lan78xx_read_raw_eeprom(dev, offset, length, data);
}
static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset,
@@ -966,113 +1114,144 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset,
u32 val;
u32 saved;
int i, ret;
- int retval;
/* depends on chip, some EEPROM pins are muxed with LED function.
* disable & restore LED function to access EEPROM.
*/
ret = lan78xx_read_reg(dev, HW_CFG, &val);
+ if (ret < 0)
+ return ret;
+
saved = val;
if (dev->chipid == ID_REV_CHIP_ID_7800_) {
val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_);
ret = lan78xx_write_reg(dev, HW_CFG, val);
+ if (ret < 0)
+ return ret;
}
- retval = lan78xx_eeprom_confirm_not_busy(dev);
- if (retval)
- goto exit;
+ ret = lan78xx_eeprom_confirm_not_busy(dev);
+ /* Looks like not USB specific error, try to recover */
+ if (ret == -ETIMEDOUT)
+ goto write_raw_eeprom_done;
+ /* If USB fails, there is nothing to do */
+ if (ret < 0)
+ return ret;
/* Issue write/erase enable command */
val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_;
ret = lan78xx_write_reg(dev, E2P_CMD, val);
- if (unlikely(ret < 0)) {
- retval = -EIO;
- goto exit;
- }
+ if (ret < 0)
+ return ret;
- retval = lan78xx_wait_eeprom(dev);
- if (retval < 0)
- goto exit;
+ ret = lan78xx_wait_eeprom(dev);
+ /* Looks like not USB specific error, try to recover */
+ if (ret == -ETIMEDOUT)
+ goto write_raw_eeprom_done;
+ /* If USB fails, there is nothing to do */
+ if (ret < 0)
+ return ret;
for (i = 0; i < length; i++) {
/* Fill data register */
val = data[i];
ret = lan78xx_write_reg(dev, E2P_DATA, val);
- if (ret < 0) {
- retval = -EIO;
- goto exit;
- }
+ if (ret < 0)
+ return ret;
/* Send "write" command */
val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_;
val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
ret = lan78xx_write_reg(dev, E2P_CMD, val);
- if (ret < 0) {
- retval = -EIO;
- goto exit;
- }
+ if (ret < 0)
+ return ret;
- retval = lan78xx_wait_eeprom(dev);
- if (retval < 0)
- goto exit;
+ ret = lan78xx_wait_eeprom(dev);
+ /* Looks like not USB specific error, try to recover */
+ if (ret == -ETIMEDOUT)
+ goto write_raw_eeprom_done;
+ /* If USB fails, there is nothing to do */
+ if (ret < 0)
+ return ret;
offset++;
}
- retval = 0;
-exit:
+write_raw_eeprom_done:
if (dev->chipid == ID_REV_CHIP_ID_7800_)
- ret = lan78xx_write_reg(dev, HW_CFG, saved);
+ return lan78xx_write_reg(dev, HW_CFG, saved);
- return retval;
+ return 0;
}
static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
u32 length, u8 *data)
{
- int i;
- u32 buf;
unsigned long timeout;
+ int ret, i;
+ u32 buf;
- lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ if (ret < 0)
+ return ret;
if (buf & OTP_PWR_DN_PWRDN_N_) {
/* clear it and wait to be cleared */
- lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+ ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+ if (ret < 0)
+ return ret;
timeout = jiffies + HZ;
do {
usleep_range(1, 10);
- lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ if (ret < 0)
+ return ret;
+
if (time_after(jiffies, timeout)) {
netdev_warn(dev->net,
"timeout on OTP_PWR_DN");
- return -EIO;
+ return -ETIMEDOUT;
}
} while (buf & OTP_PWR_DN_PWRDN_N_);
}
for (i = 0; i < length; i++) {
- lan78xx_write_reg(dev, OTP_ADDR1,
- ((offset + i) >> 8) & OTP_ADDR1_15_11);
- lan78xx_write_reg(dev, OTP_ADDR2,
- ((offset + i) & OTP_ADDR2_10_3));
+ ret = lan78xx_write_reg(dev, OTP_ADDR1,
+ ((offset + i) >> 8) & OTP_ADDR1_15_11);
+ if (ret < 0)
+ return ret;
- lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
- lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+ ret = lan78xx_write_reg(dev, OTP_ADDR2,
+ ((offset + i) & OTP_ADDR2_10_3));
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+ if (ret < 0)
+ return ret;
timeout = jiffies + HZ;
do {
udelay(1);
- lan78xx_read_reg(dev, OTP_STATUS, &buf);
+ ret = lan78xx_read_reg(dev, OTP_STATUS, &buf);
+ if (ret < 0)
+ return ret;
+
if (time_after(jiffies, timeout)) {
netdev_warn(dev->net,
"timeout on OTP_STATUS");
- return -EIO;
+ return -ETIMEDOUT;
}
} while (buf & OTP_STATUS_BUSY_);
- lan78xx_read_reg(dev, OTP_RD_DATA, &buf);
+ ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf);
+ if (ret < 0)
+ return ret;
data[i] = (u8)(buf & 0xFF);
}
@@ -1086,45 +1265,72 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
int i;
u32 buf;
unsigned long timeout;
+ int ret;
- lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ if (ret < 0)
+ return ret;
if (buf & OTP_PWR_DN_PWRDN_N_) {
/* clear it and wait to be cleared */
- lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+ ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+ if (ret < 0)
+ return ret;
timeout = jiffies + HZ;
do {
udelay(1);
- lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ if (ret < 0)
+ return ret;
+
if (time_after(jiffies, timeout)) {
netdev_warn(dev->net,
"timeout on OTP_PWR_DN completion");
- return -EIO;
+ return -ETIMEDOUT;
}
} while (buf & OTP_PWR_DN_PWRDN_N_);
}
/* set to BYTE program mode */
- lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
+ ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
+ if (ret < 0)
+ return ret;
for (i = 0; i < length; i++) {
- lan78xx_write_reg(dev, OTP_ADDR1,
- ((offset + i) >> 8) & OTP_ADDR1_15_11);
- lan78xx_write_reg(dev, OTP_ADDR2,
- ((offset + i) & OTP_ADDR2_10_3));
- lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
- lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
- lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+ ret = lan78xx_write_reg(dev, OTP_ADDR1,
+ ((offset + i) >> 8) & OTP_ADDR1_15_11);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, OTP_ADDR2,
+ ((offset + i) & OTP_ADDR2_10_3));
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+ if (ret < 0)
+ return ret;
timeout = jiffies + HZ;
do {
udelay(1);
- lan78xx_read_reg(dev, OTP_STATUS, &buf);
+ ret = lan78xx_read_reg(dev, OTP_STATUS, &buf);
+ if (ret < 0)
+ return ret;
+
if (time_after(jiffies, timeout)) {
netdev_warn(dev->net,
"Timeout on OTP_STATUS completion");
- return -EIO;
+ return -ETIMEDOUT;
}
} while (buf & OTP_STATUS_BUSY_);
}
@@ -1161,7 +1367,7 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev)
ret = lan78xx_read_reg(dev, DP_SEL, &dp_sel);
if (unlikely(ret < 0))
- return -EIO;
+ return ret;
if (dp_sel & DP_SEL_DPRDY_)
return 0;
@@ -1171,44 +1377,51 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev)
netdev_warn(dev->net, "%s timed out", __func__);
- return -EIO;
+ return -ETIMEDOUT;
}
static int lan78xx_dataport_write(struct lan78xx_net *dev, u32 ram_select,
u32 addr, u32 length, u32 *buf)
{
struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
- u32 dp_sel;
int i, ret;
- if (usb_autopm_get_interface(dev->intf) < 0)
- return 0;
+ ret = usb_autopm_get_interface(dev->intf);
+ if (ret < 0)
+ return ret;
mutex_lock(&pdata->dataport_mutex);
ret = lan78xx_dataport_wait_not_busy(dev);
if (ret < 0)
- goto done;
-
- ret = lan78xx_read_reg(dev, DP_SEL, &dp_sel);
+ goto dataport_write;
- dp_sel &= ~DP_SEL_RSEL_MASK_;
- dp_sel |= ram_select;
- ret = lan78xx_write_reg(dev, DP_SEL, dp_sel);
+ ret = lan78xx_update_reg(dev, DP_SEL, DP_SEL_RSEL_MASK_, ram_select);
+ if (ret < 0)
+ goto dataport_write;
for (i = 0; i < length; i++) {
ret = lan78xx_write_reg(dev, DP_ADDR, addr + i);
+ if (ret < 0)
+ goto dataport_write;
ret = lan78xx_write_reg(dev, DP_DATA, buf[i]);
+ if (ret < 0)
+ goto dataport_write;
ret = lan78xx_write_reg(dev, DP_CMD, DP_CMD_WRITE_);
+ if (ret < 0)
+ goto dataport_write;
ret = lan78xx_dataport_wait_not_busy(dev);
if (ret < 0)
- goto done;
+ goto dataport_write;
}
-done:
+dataport_write:
+ if (ret < 0)
+ netdev_warn(dev->net, "dataport write failed %pe", ERR_PTR(ret));
+
mutex_unlock(&pdata->dataport_mutex);
usb_autopm_put_interface(dev->intf);
@@ -1244,23 +1457,39 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
struct lan78xx_priv *pdata =
container_of(param, struct lan78xx_priv, set_multicast);
struct lan78xx_net *dev = pdata->dev;
- int i;
+ int i, ret;
netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
pdata->rfe_ctl);
- lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, DP_SEL_VHF_VLAN_LEN,
- DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+ ret = lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_,
+ DP_SEL_VHF_VLAN_LEN,
+ DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+ if (ret < 0)
+ goto multicast_write_done;
for (i = 1; i < NUM_OF_MAF; i++) {
- lan78xx_write_reg(dev, MAF_HI(i), 0);
- lan78xx_write_reg(dev, MAF_LO(i),
- pdata->pfilter_table[i][1]);
- lan78xx_write_reg(dev, MAF_HI(i),
- pdata->pfilter_table[i][0]);
+ ret = lan78xx_write_reg(dev, MAF_HI(i), 0);
+ if (ret < 0)
+ goto multicast_write_done;
+
+ ret = lan78xx_write_reg(dev, MAF_LO(i),
+ pdata->pfilter_table[i][1]);
+ if (ret < 0)
+ goto multicast_write_done;
+
+ ret = lan78xx_write_reg(dev, MAF_HI(i),
+ pdata->pfilter_table[i][0]);
+ if (ret < 0)
+ goto multicast_write_done;
}
- lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+ ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+
+multicast_write_done:
+ if (ret < 0)
+ netdev_warn(dev->net, "multicast write failed %pe", ERR_PTR(ret));
+ return;
}
static void lan78xx_set_multicast(struct net_device *netdev)
@@ -1375,7 +1604,7 @@ static int lan78xx_mac_reset(struct lan78xx_net *dev)
* bus can result in the MAC interface locking up and not
* completing register access transactions.
*/
- ret = lan78xx_phy_wait_not_busy(dev);
+ ret = lan78xx_mdiobus_wait_not_busy(dev);
if (ret < 0)
goto done;
@@ -1920,13 +2149,19 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
.get_regs = lan78xx_get_regs,
};
-static void lan78xx_init_mac_address(struct lan78xx_net *dev)
+static int lan78xx_init_mac_address(struct lan78xx_net *dev)
{
u32 addr_lo, addr_hi;
u8 addr[6];
+ int ret;
- lan78xx_read_reg(dev, RX_ADDRL, &addr_lo);
- lan78xx_read_reg(dev, RX_ADDRH, &addr_hi);
+ ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi);
+ if (ret < 0)
+ return ret;
addr[0] = addr_lo & 0xFF;
addr[1] = (addr_lo >> 8) & 0xFF;
@@ -1959,14 +2194,26 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
(addr[2] << 16) | (addr[3] << 24);
addr_hi = addr[4] | (addr[5] << 8);
- lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
- lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+ ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+ if (ret < 0)
+ return ret;
}
- lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
- lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+ ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+ if (ret < 0)
+ return ret;
eth_hw_addr_set(dev->net, addr);
+
+ return 0;
}
/* MDIO read and write wrappers for phylib */
@@ -1983,19 +2230,23 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx)
mutex_lock(&dev->phy_mutex);
/* confirm MII not busy */
- ret = lan78xx_phy_wait_not_busy(dev);
+ ret = lan78xx_mdiobus_wait_not_busy(dev);
if (ret < 0)
goto done;
/* set the address, index & direction (read from PHY) */
addr = mii_access(phy_id, idx, MII_READ);
ret = lan78xx_write_reg(dev, MII_ACC, addr);
+ if (ret < 0)
+ goto done;
- ret = lan78xx_phy_wait_not_busy(dev);
+ ret = lan78xx_mdiobus_wait_not_busy(dev);
if (ret < 0)
goto done;
ret = lan78xx_read_reg(dev, MII_DATA, &val);
+ if (ret < 0)
+ goto done;
ret = (int)(val & 0xFFFF);
@@ -2020,25 +2271,29 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx,
mutex_lock(&dev->phy_mutex);
/* confirm MII not busy */
- ret = lan78xx_phy_wait_not_busy(dev);
+ ret = lan78xx_mdiobus_wait_not_busy(dev);
if (ret < 0)
goto done;
val = (u32)regval;
ret = lan78xx_write_reg(dev, MII_DATA, val);
+ if (ret < 0)
+ goto done;
/* set the address, index & direction (write to PHY) */
addr = mii_access(phy_id, idx, MII_WRITE);
ret = lan78xx_write_reg(dev, MII_ACC, addr);
+ if (ret < 0)
+ goto done;
- ret = lan78xx_phy_wait_not_busy(dev);
+ ret = lan78xx_mdiobus_wait_not_busy(dev);
if (ret < 0)
goto done;
done:
mutex_unlock(&dev->phy_mutex);
usb_autopm_put_interface(dev->intf);
- return 0;
+ return ret;
}
static int lan78xx_mdio_init(struct lan78xx_net *dev)
@@ -2164,13 +2419,22 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd)
struct lan78xx_net *dev =
container_of(data, struct lan78xx_net, domain_data);
u32 buf;
+ int ret;
/* call register access here because irq_bus_lock & irq_bus_sync_unlock
* are only two callbacks executed in non-atomic contex.
*/
- lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+ ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+ if (ret < 0)
+ goto irq_bus_sync_unlock;
+
if (buf != data->irqenable)
- lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
+ ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
+
+irq_bus_sync_unlock:
+ if (ret < 0)
+ netdev_err(dev->net, "Failed to sync IRQ enable register: %pe\n",
+ ERR_PTR(ret));
mutex_unlock(&data->irq_lock);
}
@@ -2195,7 +2459,10 @@ static int lan78xx_setup_irq_domain(struct lan78xx_net *dev)
mutex_init(&dev->domain_data.irq_lock);
- lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+ ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+ if (ret < 0)
+ return ret;
+
dev->domain_data.irqenable = buf;
dev->domain_data.irqchip = &lan78xx_irqchip;
@@ -2234,46 +2501,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev)
dev->domain_data.irqdomain = NULL;
}
-static int lan8835_fixup(struct phy_device *phydev)
-{
- int buf;
- struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
-
- /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */
- buf = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x8010);
- buf &= ~0x1800;
- buf |= 0x0800;
- phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf);
-
- /* RGMII MAC TXC Delay Enable */
- lan78xx_write_reg(dev, MAC_RGMII_ID,
- MAC_RGMII_ID_TXC_DELAY_EN_);
-
- /* RGMII TX DLL Tune Adjust */
- lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
-
- dev->interface = PHY_INTERFACE_MODE_RGMII_TXID;
-
- return 1;
-}
-
-static int ksz9031rnx_fixup(struct phy_device *phydev)
-{
- struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
-
- /* Micrel9301RNX PHY configuration */
- /* RGMII Control Signal Pad Skew */
- phy_write_mmd(phydev, MDIO_MMD_WIS, 4, 0x0077);
- /* RGMII RX Data Pad Skew */
- phy_write_mmd(phydev, MDIO_MMD_WIS, 5, 0x7777);
- /* RGMII RX Clock Pad Skew */
- phy_write_mmd(phydev, MDIO_MMD_WIS, 8, 0x1FF);
-
- dev->interface = PHY_INTERFACE_MODE_RGMII_RXID;
-
- return 1;
-}
-
static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev)
{
u32 buf;
@@ -2307,22 +2534,11 @@ static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev)
netdev_err(dev->net, "no PHY driver found\n");
return NULL;
}
- dev->interface = PHY_INTERFACE_MODE_RGMII;
- /* external PHY fixup for KSZ9031RNX */
- ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0,
- ksz9031rnx_fixup);
- if (ret < 0) {
- netdev_err(dev->net, "Failed to register fixup for PHY_KSZ9031RNX\n");
- return NULL;
- }
- /* external PHY fixup for LAN8835 */
- ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0,
- lan8835_fixup);
- if (ret < 0) {
- netdev_err(dev->net, "Failed to register fixup for PHY_LAN8835\n");
- return NULL;
- }
- /* add more external PHY fixup here if needed */
+ dev->interface = PHY_INTERFACE_MODE_RGMII_ID;
+ /* The PHY driver is responsible to configure proper RGMII
+ * interface delays. Disable RGMII delays on MAC side.
+ */
+ lan78xx_write_reg(dev, MAC_RGMII_ID, 0);
phydev->is_internal = false;
}
@@ -2381,11 +2597,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
if (phy_is_pseudo_fixed_link(phydev)) {
fixed_phy_unregister(phydev);
phy_device_free(phydev);
- } else {
- phy_unregister_fixup_for_uid(PHY_KSZ9031RNX,
- 0xfffffff0);
- phy_unregister_fixup_for_uid(PHY_LAN8835,
- 0xfffffff0);
}
}
return -EIO;
@@ -2437,27 +2648,36 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
{
- u32 buf;
bool rxenabled;
+ u32 buf;
+ int ret;
- lan78xx_read_reg(dev, MAC_RX, &buf);
+ ret = lan78xx_read_reg(dev, MAC_RX, &buf);
+ if (ret < 0)
+ return ret;
rxenabled = ((buf & MAC_RX_RXEN_) != 0);
if (rxenabled) {
buf &= ~MAC_RX_RXEN_;
- lan78xx_write_reg(dev, MAC_RX, buf);
+ ret = lan78xx_write_reg(dev, MAC_RX, buf);
+ if (ret < 0)
+ return ret;
}
/* add 4 to size for FCS */
buf &= ~MAC_RX_MAX_SIZE_MASK_;
buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_);
- lan78xx_write_reg(dev, MAC_RX, buf);
+ ret = lan78xx_write_reg(dev, MAC_RX, buf);
+ if (ret < 0)
+ return ret;
if (rxenabled) {
buf |= MAC_RX_RXEN_;
- lan78xx_write_reg(dev, MAC_RX, buf);
+ ret = lan78xx_write_reg(dev, MAC_RX, buf);
+ if (ret < 0)
+ return ret;
}
return 0;
@@ -2523,7 +2743,10 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
return ret;
ret = lan78xx_set_rx_max_frame_length(dev, max_frame_len);
- if (!ret)
+ if (ret < 0)
+ netdev_err(dev->net, "MTU changed to %d from %d failed with %pe\n",
+ new_mtu, netdev->mtu, ERR_PTR(ret));
+ else
WRITE_ONCE(netdev->mtu, new_mtu);
usb_autopm_put_interface(dev->intf);
@@ -2536,6 +2759,7 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
struct lan78xx_net *dev = netdev_priv(netdev);
struct sockaddr *addr = p;
u32 addr_lo, addr_hi;
+ int ret;
if (netif_running(netdev))
return -EBUSY;
@@ -2552,14 +2776,20 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
addr_hi = netdev->dev_addr[4] |
netdev->dev_addr[5] << 8;
- lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
- lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+ ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+ if (ret < 0)
+ return ret;
+
+ ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+ if (ret < 0)
+ return ret;
/* Added to support MAC address changes */
- lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
- lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+ ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
+ if (ret < 0)
+ return ret;
- return 0;
+ return lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
}
/* Enable or disable Rx checksum offload engine */
@@ -2592,9 +2822,7 @@ static int lan78xx_set_features(struct net_device *netdev,
spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags);
- lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
-
- return 0;
+ return lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
}
static void lan78xx_deferred_vlan_write(struct work_struct *param)
@@ -2645,13 +2873,16 @@ static int lan78xx_vlan_rx_kill_vid(struct net_device *netdev,
return 0;
}
-static void lan78xx_init_ltm(struct lan78xx_net *dev)
+static int lan78xx_init_ltm(struct lan78xx_net *dev)
{
+ u32 regs[6] = { 0 };
int ret;
u32 buf;
- u32 regs[6] = { 0 };
ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+ if (ret < 0)
+ goto init_ltm_failed;
+
if (buf & USB_CFG1_LTM_ENABLE_) {
u8 temp[2];
/* Get values from EEPROM first */
@@ -2662,7 +2893,7 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev)
24,
(u8 *)regs);
if (ret < 0)
- return;
+ return ret;
}
} else if (lan78xx_read_otp(dev, 0x3F, 2, temp) == 0) {
if (temp[0] == 24) {
@@ -2671,17 +2902,40 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev)
24,
(u8 *)regs);
if (ret < 0)
- return;
+ return ret;
}
}
}
- lan78xx_write_reg(dev, LTM_BELT_IDLE0, regs[0]);
- lan78xx_write_reg(dev, LTM_BELT_IDLE1, regs[1]);
- lan78xx_write_reg(dev, LTM_BELT_ACT0, regs[2]);
- lan78xx_write_reg(dev, LTM_BELT_ACT1, regs[3]);
- lan78xx_write_reg(dev, LTM_INACTIVE0, regs[4]);
- lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]);
+ ret = lan78xx_write_reg(dev, LTM_BELT_IDLE0, regs[0]);
+ if (ret < 0)
+ goto init_ltm_failed;
+
+ ret = lan78xx_write_reg(dev, LTM_BELT_IDLE1, regs[1]);
+ if (ret < 0)
+ goto init_ltm_failed;
+
+ ret = lan78xx_write_reg(dev, LTM_BELT_ACT0, regs[2]);
+ if (ret < 0)
+ goto init_ltm_failed;
+
+ ret = lan78xx_write_reg(dev, LTM_BELT_ACT1, regs[3]);
+ if (ret < 0)
+ goto init_ltm_failed;
+
+ ret = lan78xx_write_reg(dev, LTM_INACTIVE0, regs[4]);
+ if (ret < 0)
+ goto init_ltm_failed;
+
+ ret = lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]);
+ if (ret < 0)
+ goto init_ltm_failed;
+
+ return 0;
+
+init_ltm_failed:
+ netdev_err(dev->net, "Failed to init LTM with error %pe\n", ERR_PTR(ret));
+ return ret;
}
static int lan78xx_urb_config_init(struct lan78xx_net *dev)
@@ -2722,156 +2976,6 @@ static int lan78xx_urb_config_init(struct lan78xx_net *dev)
return result;
}
-static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable)
-{
- return lan78xx_update_reg(dev, reg, hw_enable, hw_enable);
-}
-
-static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled,
- u32 hw_disabled)
-{
- unsigned long timeout;
- bool stopped = true;
- int ret;
- u32 buf;
-
- /* Stop the h/w block (if not already stopped) */
-
- ret = lan78xx_read_reg(dev, reg, &buf);
- if (ret < 0)
- return ret;
-
- if (buf & hw_enabled) {
- buf &= ~hw_enabled;
-
- ret = lan78xx_write_reg(dev, reg, buf);
- if (ret < 0)
- return ret;
-
- stopped = false;
- timeout = jiffies + HW_DISABLE_TIMEOUT;
- do {
- ret = lan78xx_read_reg(dev, reg, &buf);
- if (ret < 0)
- return ret;
-
- if (buf & hw_disabled)
- stopped = true;
- else
- msleep(HW_DISABLE_DELAY_MS);
- } while (!stopped && !time_after(jiffies, timeout));
- }
-
- ret = stopped ? 0 : -ETIME;
-
- return ret;
-}
-
-static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush)
-{
- return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush);
-}
-
-static int lan78xx_start_tx_path(struct lan78xx_net *dev)
-{
- int ret;
-
- netif_dbg(dev, drv, dev->net, "start tx path");
-
- /* Start the MAC transmitter */
-
- ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_);
- if (ret < 0)
- return ret;
-
- /* Start the Tx FIFO */
-
- ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-
-static int lan78xx_stop_tx_path(struct lan78xx_net *dev)
-{
- int ret;
-
- netif_dbg(dev, drv, dev->net, "stop tx path");
-
- /* Stop the Tx FIFO */
-
- ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_);
- if (ret < 0)
- return ret;
-
- /* Stop the MAC transmitter */
-
- ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-
-/* The caller must ensure the Tx path is stopped before calling
- * lan78xx_flush_tx_fifo().
- */
-static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev)
-{
- return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_);
-}
-
-static int lan78xx_start_rx_path(struct lan78xx_net *dev)
-{
- int ret;
-
- netif_dbg(dev, drv, dev->net, "start rx path");
-
- /* Start the Rx FIFO */
-
- ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_);
- if (ret < 0)
- return ret;
-
- /* Start the MAC receiver*/
-
- ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-
-static int lan78xx_stop_rx_path(struct lan78xx_net *dev)
-{
- int ret;
-
- netif_dbg(dev, drv, dev->net, "stop rx path");
-
- /* Stop the MAC receiver */
-
- ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_);
- if (ret < 0)
- return ret;
-
- /* Stop the Rx FIFO */
-
- ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-
-/* The caller must ensure the Rx path is stopped before calling
- * lan78xx_flush_rx_fifo().
- */
-static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev)
-{
- return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_);
-}
-
static int lan78xx_reset(struct lan78xx_net *dev)
{
struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
@@ -2905,7 +3009,9 @@ static int lan78xx_reset(struct lan78xx_net *dev)
}
} while (buf & HW_CFG_LRST_);
- lan78xx_init_mac_address(dev);
+ ret = lan78xx_init_mac_address(dev);
+ if (ret < 0)
+ return ret;
/* save DEVID for later usage */
ret = lan78xx_read_reg(dev, ID_REV, &buf);
@@ -2927,7 +3033,9 @@ static int lan78xx_reset(struct lan78xx_net *dev)
return ret;
/* Init LTM */
- lan78xx_init_ltm(dev);
+ ret = lan78xx_init_ltm(dev);
+ if (ret < 0)
+ return ret;
ret = lan78xx_write_reg(dev, BURST_CAP, dev->burst_cap);
if (ret < 0)
@@ -4242,9 +4350,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
phydev = net->phydev;
- phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
- phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
-
phy_disconnect(net->phydev);
if (phy_is_pseudo_fixed_link(phydev)) {
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 67d25f4f94ef..ca81b212a246 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -122,16 +122,6 @@ struct net_vrf {
int ifindex;
};
-static void vrf_rx_stats(struct net_device *dev, int len)
-{
- struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
-
- u64_stats_update_begin(&dstats->syncp);
- u64_stats_inc(&dstats->rx_packets);
- u64_stats_add(&dstats->rx_bytes, len);
- u64_stats_update_end(&dstats->syncp);
-}
-
static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
{
vrf_dev->stats.tx_errors++;
@@ -369,7 +359,7 @@ static bool qdisc_tx_is_default(const struct net_device *dev)
static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
struct dst_entry *dst)
{
- int len = skb->len;
+ unsigned int len = skb->len;
skb_orphan(skb);
@@ -382,15 +372,10 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
skb->protocol = eth_type_trans(skb, dev);
- if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
- vrf_rx_stats(dev, len);
- } else {
- struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
-
- u64_stats_update_begin(&dstats->syncp);
- u64_stats_inc(&dstats->rx_drops);
- u64_stats_update_end(&dstats->syncp);
- }
+ if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
+ dev_dstats_rx_add(dev, len);
+ else
+ dev_dstats_rx_dropped(dev);
return NETDEV_TX_OK;
}
@@ -578,20 +563,14 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
-
- int len = skb->len;
- netdev_tx_t ret = is_ip_tx_frame(skb, dev);
-
- u64_stats_update_begin(&dstats->syncp);
- if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
+ unsigned int len = skb->len;
+ netdev_tx_t ret;
- u64_stats_inc(&dstats->tx_packets);
- u64_stats_add(&dstats->tx_bytes, len);
- } else {
- u64_stats_inc(&dstats->tx_drops);
- }
- u64_stats_update_end(&dstats->syncp);
+ ret = is_ip_tx_frame(skb, dev);
+ if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN))
+ dev_dstats_tx_add(dev, len);
+ else
+ dev_dstats_tx_dropped(dev);
return ret;
}
@@ -1364,7 +1343,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
if (!is_ndisc) {
struct net_device *orig_dev = skb->dev;
- vrf_rx_stats(vrf_dev, skb->len);
+ dev_dstats_rx_add(vrf_dev, skb->len);
skb->dev = vrf_dev;
skb->skb_iif = vrf_dev->ifindex;
@@ -1420,7 +1399,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
goto out;
}
- vrf_rx_stats(vrf_dev, skb->len);
+ dev_dstats_rx_add(vrf_dev, skb->len);
if (!list_empty(&vrf_dev->ptype_all)) {
int err;
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 9ea63059d52d..0c356e0a61ef 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -622,9 +622,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
return 1;
}
-static bool vxlan_parse_gpe_proto(struct vxlanhdr *hdr, __be16 *protocol)
+static bool vxlan_parse_gpe_proto(const struct vxlanhdr *hdr, __be16 *protocol)
{
- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)hdr;
+ const struct vxlanhdr_gpe *gpe = (const struct vxlanhdr_gpe *)hdr;
/* Need to have Next Protocol set for interfaces in GPE mode. */
if (!gpe->np_applied)
@@ -1352,6 +1352,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int *idx)
{
+ struct ndo_fdb_dump_context *ctx = (void *)cb->ctx;
struct vxlan_dev *vxlan = netdev_priv(dev);
unsigned int h;
int err = 0;
@@ -1364,7 +1365,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct vxlan_rdst *rd;
if (rcu_access_pointer(f->nh)) {
- if (*idx < cb->args[2])
+ if (*idx < ctx->fdb_idx)
goto skip_nh;
err = vxlan_fdb_info(skb, vxlan, f,
NETLINK_CB(cb->skb).portid,
@@ -1381,7 +1382,7 @@ skip_nh:
}
list_for_each_entry_rcu(rd, &f->remotes, list) {
- if (*idx < cb->args[2])
+ if (*idx < ctx->fdb_idx)
goto skip;
err = vxlan_fdb_info(skb, vxlan, f,
@@ -1554,18 +1555,17 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
#endif
}
-static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed,
- struct sk_buff *skb,
- u32 vxflags)
+static enum skb_drop_reason vxlan_remcsum(struct sk_buff *skb, u32 vxflags)
{
+ const struct vxlanhdr *vh = vxlan_hdr(skb);
enum skb_drop_reason reason;
size_t start, offset;
- if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
- goto out;
+ if (!(vh->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
+ return SKB_NOT_DROPPED_YET;
- start = vxlan_rco_start(unparsed->vx_vni);
- offset = start + vxlan_rco_offset(unparsed->vx_vni);
+ start = vxlan_rco_start(vh->vx_vni);
+ offset = start + vxlan_rco_offset(vh->vx_vni);
reason = pskb_may_pull_reason(skb, offset + sizeof(u16));
if (reason)
@@ -1573,22 +1573,20 @@ static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed,
skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
!!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
-out:
- unparsed->vx_flags &= ~VXLAN_HF_RCO;
- unparsed->vx_vni &= VXLAN_VNI_MASK;
-
return SKB_NOT_DROPPED_YET;
}
-static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
- struct sk_buff *skb, u32 vxflags,
+static void vxlan_parse_gbp_hdr(struct sk_buff *skb, u32 vxflags,
struct vxlan_metadata *md)
{
- struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
+ const struct vxlanhdr *vh = vxlan_hdr(skb);
+ const struct vxlanhdr_gbp *gbp;
struct metadata_dst *tun_dst;
- if (!(unparsed->vx_flags & VXLAN_HF_GBP))
- goto out;
+ gbp = (const struct vxlanhdr_gbp *)vh;
+
+ if (!(vh->vx_flags & VXLAN_HF_GBP))
+ return;
md->gbp = ntohs(gbp->policy_id);
@@ -1607,8 +1605,6 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
/* In flow-based mode, GBP is carried in dst_metadata */
if (!(vxflags & VXLAN_F_COLLECT_METADATA))
skb->mark = md->gbp;
-out:
- unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
static enum skb_drop_reason vxlan_set_mac(struct vxlan_dev *vxlan,
@@ -1672,9 +1668,9 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
{
struct vxlan_vni_node *vninode = NULL;
+ const struct vxlanhdr *vh;
struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
- struct vxlanhdr unparsed;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 protocol = htons(ETH_P_TEB);
@@ -1689,24 +1685,21 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (reason)
goto drop;
- unparsed = *vxlan_hdr(skb);
+ vh = vxlan_hdr(skb);
/* VNI flag always required to be set */
- if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
+ if (!(vh->vx_flags & VXLAN_HF_VNI)) {
netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
- ntohl(vxlan_hdr(skb)->vx_flags),
- ntohl(vxlan_hdr(skb)->vx_vni));
+ ntohl(vh->vx_flags), ntohl(vh->vx_vni));
reason = SKB_DROP_REASON_VXLAN_INVALID_HDR;
/* Return non vxlan pkt */
goto drop;
}
- unparsed.vx_flags &= ~VXLAN_HF_VNI;
- unparsed.vx_vni &= ~VXLAN_VNI_MASK;
vs = rcu_dereference_sk_user_data(sk);
if (!vs)
goto drop;
- vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+ vni = vxlan_vni(vh->vx_vni);
vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode);
if (!vxlan) {
@@ -1714,13 +1707,27 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- /* For backwards compatibility, only allow reserved fields to be
- * used by VXLAN extensions if explicitly requested.
- */
- if (vs->flags & VXLAN_F_GPE) {
- if (!vxlan_parse_gpe_proto(&unparsed, &protocol))
+ if (vh->vx_flags & vxlan->cfg.reserved_bits.vx_flags ||
+ vh->vx_vni & vxlan->cfg.reserved_bits.vx_vni) {
+ /* If the header uses bits besides those enabled by the
+ * netdevice configuration, treat this as a malformed packet.
+ * This behavior diverges from VXLAN RFC (RFC7348) which
+ * stipulates that bits in reserved in reserved fields are to be
+ * ignored. The approach here maintains compatibility with
+ * previous stack code, and also is more robust and provides a
+ * little more security in adding extensions to VXLAN.
+ */
+ reason = SKB_DROP_REASON_VXLAN_INVALID_HDR;
+ DEV_STATS_INC(vxlan->dev, rx_frame_errors);
+ DEV_STATS_INC(vxlan->dev, rx_errors);
+ vxlan_vnifilter_count(vxlan, vni, vninode,
+ VXLAN_VNI_STATS_RX_ERRORS, 0);
+ goto drop;
+ }
+
+ if (vxlan->cfg.flags & VXLAN_F_GPE) {
+ if (!vxlan_parse_gpe_proto(vh, &protocol))
goto drop;
- unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS;
raw_proto = true;
}
@@ -1730,8 +1737,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (vs->flags & VXLAN_F_REMCSUM_RX) {
- reason = vxlan_remcsum(&unparsed, skb, vs->flags);
+ if (vxlan->cfg.flags & VXLAN_F_REMCSUM_RX) {
+ reason = vxlan_remcsum(skb, vxlan->cfg.flags);
if (unlikely(reason))
goto drop;
}
@@ -1756,25 +1763,12 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
memset(md, 0, sizeof(*md));
}
- if (vs->flags & VXLAN_F_GBP)
- vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
+ if (vxlan->cfg.flags & VXLAN_F_GBP)
+ vxlan_parse_gbp_hdr(skb, vxlan->cfg.flags, md);
/* Note that GBP and GPE can never be active together. This is
* ensured in vxlan_dev_configure.
*/
- if (unparsed.vx_flags || unparsed.vx_vni) {
- /* If there are any unprocessed flags remaining treat
- * this as a malformed packet. This behavior diverges from
- * VXLAN RFC (RFC7348) which stipulates that bits in reserved
- * in reserved fields are to be ignored. The approach here
- * maintains compatibility with previous stack code, and also
- * is more robust and provides a little more security in
- * adding extensions to VXLAN.
- */
- reason = SKB_DROP_REASON_VXLAN_INVALID_HDR;
- goto drop;
- }
-
if (!raw_proto) {
reason = vxlan_set_mac(vxlan, vs, skb, vni);
if (reason)
@@ -1818,14 +1812,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
rcu_read_unlock();
- dev_core_stats_rx_dropped_inc(vxlan->dev);
+ dev_dstats_rx_dropped(vxlan->dev);
vxlan_vnifilter_count(vxlan, vni, vninode,
VXLAN_VNI_STATS_RX_DROPS, 0);
reason = SKB_DROP_REASON_DEV_READY;
goto drop;
}
- dev_sw_netstats_rx_add(vxlan->dev, skb->len);
+ dev_dstats_rx_add(vxlan->dev, skb->len);
vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX, skb->len);
gro_cells_receive(&vxlan->gro_cells, skb);
@@ -1880,7 +1874,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
goto out;
if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
- dev_core_stats_tx_dropped_inc(dev);
+ dev_dstats_tx_dropped(dev);
vxlan_vnifilter_count(vxlan, vni, NULL,
VXLAN_VNI_STATS_TX_DROPS, 0);
goto out;
@@ -1938,7 +1932,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
reply->pkt_type = PACKET_HOST;
if (netif_rx(reply) == NET_RX_DROP) {
- dev_core_stats_rx_dropped_inc(dev);
+ dev_dstats_rx_dropped(dev);
vxlan_vnifilter_count(vxlan, vni, NULL,
VXLAN_VNI_STATS_RX_DROPS, 0);
}
@@ -2097,7 +2091,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
goto out;
if (netif_rx(reply) == NET_RX_DROP) {
- dev_core_stats_rx_dropped_inc(dev);
+ dev_dstats_rx_dropped(dev);
vxlan_vnifilter_count(vxlan, vni, NULL,
VXLAN_VNI_STATS_RX_DROPS, 0);
}
@@ -2271,8 +2265,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
{
union vxlan_addr loopback;
union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
+ unsigned int len = skb->len;
struct net_device *dev;
- int len = skb->len;
skb->pkt_type = PACKET_HOST;
skb->encapsulation = 0;
@@ -2299,16 +2293,16 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
- dev_sw_netstats_tx_add(src_vxlan->dev, 1, len);
+ dev_dstats_tx_add(src_vxlan->dev, len);
vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len);
if (__netif_rx(skb) == NET_RX_SUCCESS) {
- dev_sw_netstats_rx_add(dst_vxlan->dev, len);
+ dev_dstats_rx_add(dst_vxlan->dev, len);
vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX,
len);
} else {
drop:
- dev_core_stats_rx_dropped_inc(dev);
+ dev_dstats_rx_dropped(dev);
vxlan_vnifilter_count(dst_vxlan, vni, NULL,
VXLAN_VNI_STATS_RX_DROPS, 0);
}
@@ -2621,7 +2615,7 @@ out_unlock:
return;
drop:
- dev_core_stats_tx_dropped_inc(dev);
+ dev_dstats_tx_dropped(dev);
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0);
kfree_skb_reason(skb, reason);
return;
@@ -2666,7 +2660,7 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
return;
drop:
- dev_core_stats_tx_dropped_inc(dev);
+ dev_dstats_tx_dropped(dev);
vxlan_vnifilter_count(netdev_priv(dev), vni, NULL,
VXLAN_VNI_STATS_TX_DROPS, 0);
dev_kfree_skb(skb);
@@ -2704,7 +2698,7 @@ static netdev_tx_t vxlan_xmit_nhid(struct sk_buff *skb, struct net_device *dev,
return NETDEV_TX_OK;
drop:
- dev_core_stats_tx_dropped_inc(dev);
+ dev_dstats_tx_dropped(dev);
vxlan_vnifilter_count(netdev_priv(dev), vni, NULL,
VXLAN_VNI_STATS_TX_DROPS, 0);
dev_kfree_skb(skb);
@@ -2801,7 +2795,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
!is_multicast_ether_addr(eth->h_dest))
vxlan_fdb_miss(vxlan, eth->h_dest);
- dev_core_stats_tx_dropped_inc(dev);
+ dev_dstats_tx_dropped(dev);
vxlan_vnifilter_count(vxlan, vni, NULL,
VXLAN_VNI_STATS_TX_DROPS, 0);
kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE);
@@ -3371,7 +3365,7 @@ static void vxlan_setup(struct net_device *dev)
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = ETH_MAX_MTU;
- dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
INIT_LIST_HEAD(&vxlan->next);
timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
@@ -3435,6 +3429,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
[IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
[IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
+ [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4070,6 +4065,10 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
struct net_device *dev, struct vxlan_config *conf,
bool changelink, struct netlink_ext_ack *extack)
{
+ struct vxlanhdr used_bits = {
+ .vx_flags = VXLAN_HF_VNI,
+ .vx_vni = VXLAN_VNI_MASK,
+ };
struct vxlan_dev *vxlan = netdev_priv(dev);
int err = 0;
@@ -4296,6 +4295,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
extack);
if (err)
return err;
+ used_bits.vx_flags |= VXLAN_HF_RCO;
+ used_bits.vx_vni |= ~VXLAN_VNI_MASK;
}
if (data[IFLA_VXLAN_GBP]) {
@@ -4303,6 +4304,7 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
VXLAN_F_GBP, changelink, false, extack);
if (err)
return err;
+ used_bits.vx_flags |= VXLAN_GBP_USED_BITS;
}
if (data[IFLA_VXLAN_GPE]) {
@@ -4311,6 +4313,46 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
extack);
if (err)
return err;
+
+ used_bits.vx_flags |= VXLAN_GPE_USED_BITS;
+ }
+
+ if (data[IFLA_VXLAN_RESERVED_BITS]) {
+ struct vxlanhdr reserved_bits;
+
+ if (changelink) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ data[IFLA_VXLAN_RESERVED_BITS],
+ "Cannot change reserved_bits");
+ return -EOPNOTSUPP;
+ }
+
+ nla_memcpy(&reserved_bits, data[IFLA_VXLAN_RESERVED_BITS],
+ sizeof(reserved_bits));
+ if (used_bits.vx_flags & reserved_bits.vx_flags ||
+ used_bits.vx_vni & reserved_bits.vx_vni) {
+ __be64 ub_be64, rb_be64;
+
+ memcpy(&ub_be64, &used_bits, sizeof(ub_be64));
+ memcpy(&rb_be64, &reserved_bits, sizeof(rb_be64));
+
+ NL_SET_ERR_MSG_ATTR_FMT(extack,
+ data[IFLA_VXLAN_RESERVED_BITS],
+ "Used bits %#018llx cannot overlap reserved bits %#018llx",
+ be64_to_cpu(ub_be64),
+ be64_to_cpu(rb_be64));
+ return -EINVAL;
+ }
+
+ conf->reserved_bits = reserved_bits;
+ } else {
+ /* For backwards compatibility, only allow reserved fields to be
+ * used by VXLAN extensions if explicitly requested.
+ */
+ conf->reserved_bits = (struct vxlanhdr) {
+ .vx_flags = ~used_bits.vx_flags,
+ .vx_vni = ~used_bits.vx_vni,
+ };
}
if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
@@ -4497,6 +4539,8 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(0) + /* IFLA_VXLAN_GPE */
nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */
+ /* IFLA_VXLAN_RESERVED_BITS */
+ nla_total_size(sizeof(struct vxlanhdr)) +
0;
}
@@ -4599,6 +4643,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
!!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
goto nla_put_failure;
+ if (nla_put(skb, IFLA_VXLAN_RESERVED_BITS,
+ sizeof(vxlan->cfg.reserved_bits),
+ &vxlan->cfg.reserved_bits))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
index 8381b0dc7acb..02f2ec7cf4ce 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.c
+++ b/drivers/net/wwan/t7xx/t7xx_pci.c
@@ -43,6 +43,8 @@
#include "t7xx_state_monitor.h"
#include "t7xx_port_proxy.h"
+#define DRIVER_NAME "mtk_t7xx"
+
#define T7XX_PCI_IREG_BASE 0
#define T7XX_PCI_EREG_BASE 2
@@ -833,6 +835,7 @@ static void t7xx_pci_infracfg_ao_calc(struct t7xx_pci_dev *t7xx_dev)
static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct t7xx_pci_dev *t7xx_dev;
+ void __iomem *iomem;
int ret;
t7xx_dev = devm_kzalloc(&pdev->dev, sizeof(*t7xx_dev), GFP_KERNEL);
@@ -848,12 +851,21 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_master(pdev);
- ret = pcim_iomap_regions(pdev, BIT(T7XX_PCI_IREG_BASE) | BIT(T7XX_PCI_EREG_BASE),
- pci_name(pdev));
+ iomem = pcim_iomap_region(pdev, T7XX_PCI_IREG_BASE, DRIVER_NAME);
+ ret = PTR_ERR_OR_ZERO(iomem);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not request IREG BAR: %d\n", ret);
+ return -ENOMEM;
+ }
+ IREG_BASE(t7xx_dev) = iomem;
+
+ iomem = pcim_iomap_region(pdev, T7XX_PCI_EREG_BASE, DRIVER_NAME);
+ ret = PTR_ERR_OR_ZERO(iomem);
if (ret) {
- dev_err(&pdev->dev, "Could not request BARs: %d\n", ret);
+ dev_err(&pdev->dev, "Could not request EREG BAR: %d\n", ret);
return -ENOMEM;
}
+ t7xx_dev->base_addr.pcie_ext_reg_base = iomem;
ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
@@ -867,9 +879,6 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return ret;
}
- IREG_BASE(t7xx_dev) = pcim_iomap_table(pdev)[T7XX_PCI_IREG_BASE];
- t7xx_dev->base_addr.pcie_ext_reg_base = pcim_iomap_table(pdev)[T7XX_PCI_EREG_BASE];
-
ret = t7xx_pci_pm_init(t7xx_dev);
if (ret)
return ret;
@@ -937,7 +946,7 @@ static const struct pci_device_id t7xx_pci_table[] = {
MODULE_DEVICE_TABLE(pci, t7xx_pci_table);
static struct pci_driver t7xx_pci_driver = {
- .name = "mtk_t7xx",
+ .name = DRIVER_NAME,
.id_table = t7xx_pci_table,
.probe = t7xx_pci_probe,
.remove = t7xx_pci_remove,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index eaee2a819f4c..ec3acb16359e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2591,10 +2591,10 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
- struct bpf_prog *xdp_prog);
+ const struct bpf_prog *xdp_prog);
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- bool exclude_ingress);
+ const struct bpf_prog *xdp_prog,
+ struct bpf_map *map, bool exclude_ingress);
void __cpu_map_flush(struct list_head *flush_list);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
@@ -2864,15 +2864,15 @@ struct sk_buff;
static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
return 0;
}
static inline
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- bool exclude_ingress)
+ const struct bpf_prog *xdp_prog,
+ struct bpf_map *map, bool exclude_ingress)
{
return 0;
}
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index ecf203f01034..9a1eacf35d37 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -81,7 +81,7 @@ static const u8 eth_ipv6_mcast_addr_base[ETH_ALEN] __aligned(2) =
* is_link_local_ether_addr - Determine if given Ethernet address is link-local
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per
+ * Return: true if address is link local reserved addr (01:80:c2:00:00:0X) per
* IEEE 802.1Q 8.6.3 Frame filtering.
*
* Please note: addr must be aligned to u16.
@@ -104,7 +104,7 @@ static inline bool is_link_local_ether_addr(const u8 *addr)
* is_zero_ether_addr - Determine if give Ethernet address is all zeros.
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return true if the address is all zeroes.
+ * Return: true if the address is all zeroes.
*
* Please note: addr must be aligned to u16.
*/
@@ -123,7 +123,7 @@ static inline bool is_zero_ether_addr(const u8 *addr)
* is_multicast_ether_addr - Determine if the Ethernet address is a multicast.
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return true if the address is a multicast address.
+ * Return: true if the address is a multicast address.
* By definition the broadcast address is also a multicast address.
*/
static inline bool is_multicast_ether_addr(const u8 *addr)
@@ -157,7 +157,7 @@ static inline bool is_multicast_ether_addr_64bits(const u8 *addr)
* is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802).
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return true if the address is a local address.
+ * Return: true if the address is a local address.
*/
static inline bool is_local_ether_addr(const u8 *addr)
{
@@ -168,7 +168,7 @@ static inline bool is_local_ether_addr(const u8 *addr)
* is_broadcast_ether_addr - Determine if the Ethernet address is broadcast
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return true if the address is the broadcast address.
+ * Return: true if the address is the broadcast address.
*
* Please note: addr must be aligned to u16.
*/
@@ -183,7 +183,7 @@ static inline bool is_broadcast_ether_addr(const u8 *addr)
* is_unicast_ether_addr - Determine if the Ethernet address is unicast
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return true if the address is a unicast address.
+ * Return: true if the address is a unicast address.
*/
static inline bool is_unicast_ether_addr(const u8 *addr)
{
@@ -197,7 +197,7 @@ static inline bool is_unicast_ether_addr(const u8 *addr)
* Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
* a multicast address, and is not FF:FF:FF:FF:FF:FF.
*
- * Return true if the address is valid.
+ * Return: true if the address is valid.
*
* Please note: addr must be aligned to u16.
*/
@@ -214,7 +214,7 @@ static inline bool is_valid_ether_addr(const u8 *addr)
*
* Check that the value from the Ethertype/length field is a valid Ethertype.
*
- * Return true if the valid is an 802.3 supported Ethertype.
+ * Return: true if the valid is an 802.3 supported Ethertype.
*/
static inline bool eth_proto_is_802_3(__be16 proto)
{
@@ -458,7 +458,7 @@ static inline bool ether_addr_is_ip_mcast(const u8 *addr)
* ether_addr_to_u64 - Convert an Ethernet address into a u64 value.
* @addr: Pointer to a six-byte array containing the Ethernet address
*
- * Return a u64 value of the address
+ * Return: a u64 value of the address
*/
static inline u64 ether_addr_to_u64(const u8 *addr)
{
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b8b935b52603..e217c6321ed0 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -257,7 +257,7 @@ struct ethtool_link_ksettings {
* @mode : one of the ETHTOOL_LINK_MODE_*_BIT
* (not atomic, no bound checking)
*
- * Returns true/false.
+ * Returns: true/false.
*/
#define ethtool_link_ksettings_test_link_mode(ptr, name, mode) \
test_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name)
@@ -1199,7 +1199,7 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings,
* @dev: pointer to net_device structure
* @vclock_index: pointer to pointer of vclock index
*
- * Return number of phc vclocks
+ * Return: number of phc vclocks
*/
int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index);
@@ -1253,7 +1253,7 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add,
* ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer.
* @dev: pointer to net_device structure
* @info: buffer to hold the result
- * Returns zero on success, non-zero otherwise.
+ * Returns: zero on success, non-zero otherwise.
*/
int ethtool_get_ts_info_by_layer(struct net_device *dev,
struct kernel_ethtool_ts_info *info);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3a21947f2fd4..9a5d23ae3855 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1179,17 +1179,18 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
* This does not appear to be a real limitation for existing software.
*/
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct xdp_buff *xdp, struct bpf_prog *prog);
+ struct xdp_buff *xdp, const struct bpf_prog *prog);
int xdp_do_redirect(struct net_device *dev,
struct xdp_buff *xdp,
- struct bpf_prog *prog);
+ const struct bpf_prog *prog);
int xdp_do_redirect_frame(struct net_device *dev,
struct xdp_buff *xdp,
struct xdp_frame *xdpf,
- struct bpf_prog *prog);
+ const struct bpf_prog *prog);
void xdp_do_flush(void);
-void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act);
+void bpf_warn_invalid_xdp_action(const struct net_device *dev,
+ const struct bpf_prog *prog, u32 act);
#ifdef CONFIG_INET
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index c1645c86eed9..d6326b53e336 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -310,7 +310,7 @@ static inline bool vlan_uses_dev(const struct net_device *dev)
* eth_type_vlan - check for valid vlan ether type.
* @ethertype: ether type to check
*
- * Returns true if the ether type is a vlan ether type.
+ * Returns: true if the ether type is a vlan ether type.
*/
static inline bool eth_type_vlan(__be16 ethertype)
{
@@ -341,9 +341,9 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
* @mac_len: MAC header length including outer vlan headers
*
* Inserts the VLAN tag into @skb as part of the payload at offset mac_len
- * Returns error if skb_cow_head fails.
- *
* Does not change skb->protocol so this function can be used during receive.
+ *
+ * Returns: error if skb_cow_head fails.
*/
static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
__be16 vlan_proto, u16 vlan_tci,
@@ -390,9 +390,9 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
* @vlan_tci: VLAN TCI to insert
*
* Inserts the VLAN tag into @skb as part of the payload
- * Returns error if skb_cow_head fails.
- *
* Does not change skb->protocol so this function can be used during receive.
+ *
+ * Returns: error if skb_cow_head fails.
*/
static inline int __vlan_insert_tag(struct sk_buff *skb,
__be16 vlan_proto, u16 vlan_tci)
@@ -533,7 +533,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb,
* @skb: skbuff to query
* @vlan_tci: buffer to store value
*
- * Returns error if the skb is not of VLAN type
+ * Returns: error if the skb is not of VLAN type
*/
static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
{
@@ -551,7 +551,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
* @skb: skbuff to query
* @vlan_tci: buffer to store value
*
- * Returns error if @skb->vlan_tci is not set correctly
+ * Returns: error if @skb->vlan_tci is not set correctly
*/
static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
u16 *vlan_tci)
@@ -570,7 +570,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
* @skb: skbuff to query
* @vlan_tci: buffer to store value
*
- * Returns error if the skb is not VLAN tagged
+ * Returns: error if the skb is not VLAN tagged
*/
static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
{
@@ -587,7 +587,7 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
* @type: first vlan protocol
* @depth: buffer to store length of eth and vlan tags in bytes
*
- * Returns the EtherType of the packet, regardless of whether it is
+ * Returns: the EtherType of the packet, regardless of whether it is
* vlan encapsulated (normal or hardware accelerated) or not.
*/
static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type,
@@ -629,7 +629,7 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type,
* vlan_get_protocol - get protocol EtherType.
* @skb: skbuff to query
*
- * Returns the EtherType of the packet, regardless of whether it is
+ * Returns: the EtherType of the packet, regardless of whether it is
* vlan encapsulated (normal or hardware accelerated) or not.
*/
static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
@@ -710,7 +710,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
* Expects the skb to contain a VLAN tag in the payload, and to have skb->data
* pointing at the MAC header.
*
- * Returns a new pointer to skb->data, or NULL on failure to pull.
+ * Returns: a new pointer to skb->data, or NULL on failure to pull.
*/
static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
{
@@ -727,7 +727,7 @@ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
* skb_vlan_tagged - check if skb is vlan tagged.
* @skb: skbuff to query
*
- * Returns true if the skb is tagged, regardless of whether it is hardware
+ * Returns: true if the skb is tagged, regardless of whether it is hardware
* accelerated or not.
*/
static inline bool skb_vlan_tagged(const struct sk_buff *skb)
@@ -743,7 +743,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb)
* skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers.
* @skb: skbuff to query
*
- * Returns true if the skb is tagged with multiple vlan headers, regardless
+ * Returns: true if the skb is tagged with multiple vlan headers, regardless
* of whether it is hardware accelerated or not.
*/
static inline bool skb_vlan_tagged_multi(struct sk_buff *skb)
@@ -774,7 +774,7 @@ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb)
* @skb: skbuff to query
* @features: features to be checked
*
- * Returns features without unsafe ones if the skb has multiple tags.
+ * Returns: features without unsafe ones if the skb has multiple tags.
*/
static inline netdev_features_t vlan_features_check(struct sk_buff *skb,
netdev_features_t features)
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 3a4e723eae0f..383ed9985802 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -222,6 +222,11 @@ static inline ktime_t ns_to_ktime(u64 ns)
return ns;
}
+static inline ktime_t us_to_ktime(u64 us)
+{
+ return us * NSEC_PER_USEC;
+}
+
static inline ktime_t ms_to_ktime(u64 ms)
{
return ms * NSEC_PER_MSEC;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ecc686409161..d917949bba03 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -509,7 +509,7 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n)
* is scheduled for example in the context of delayed timer
* that can be skipped if a NAPI is already scheduled.
*
- * Return True if NAPI is scheduled, False otherwise.
+ * Return: True if NAPI is scheduled, False otherwise.
*/
static inline bool napi_is_scheduled(struct napi_struct *n)
{
@@ -524,7 +524,7 @@ bool napi_schedule_prep(struct napi_struct *n);
*
* Schedule NAPI poll routine to be called if it is not already
* running.
- * Return true if we schedule a NAPI or false if not.
+ * Return: true if we schedule a NAPI or false if not.
* Refer to napi_schedule_prep() for additional reason on why
* a NAPI might not be scheduled.
*/
@@ -558,7 +558,7 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
* Mark NAPI processing as complete. Should only be called if poll budget
* has not been completely consumed.
* Prefer over napi_complete().
- * Return false if device should avoid rearming interrupts.
+ * Return: false if device should avoid rearming interrupts.
*/
bool napi_complete_done(struct napi_struct *n, int work_done);
@@ -2854,6 +2854,46 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len)
u64_stats_update_end(&lstats->syncp);
}
+static inline void dev_dstats_rx_add(struct net_device *dev,
+ unsigned int len)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->rx_packets);
+ u64_stats_add(&dstats->rx_bytes, len);
+ u64_stats_update_end(&dstats->syncp);
+}
+
+static inline void dev_dstats_rx_dropped(struct net_device *dev)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->rx_drops);
+ u64_stats_update_end(&dstats->syncp);
+}
+
+static inline void dev_dstats_tx_add(struct net_device *dev,
+ unsigned int len)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->tx_packets);
+ u64_stats_add(&dstats->tx_bytes, len);
+ u64_stats_update_end(&dstats->syncp);
+}
+
+static inline void dev_dstats_tx_dropped(struct net_device *dev)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->tx_drops);
+ u64_stats_update_end(&dstats->syncp);
+}
+
#define __netdev_alloc_pcpu_stats(type, gfp) \
({ \
typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
@@ -3322,6 +3362,7 @@ struct softnet_data {
};
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+DECLARE_PER_CPU(struct page_pool *, system_page_pool);
#ifndef CONFIG_PREEMPT_RT
static inline int dev_recursion_level(void)
@@ -3810,7 +3851,7 @@ static inline bool netif_attr_test_mask(unsigned long j,
* @online_mask: bitmask for CPUs/Rx queues that are online
* @nr_bits: number of bits in the bitmask
*
- * Returns true if a CPU/Rx queue is online.
+ * Returns: true if a CPU/Rx queue is online.
*/
static inline bool netif_attr_test_online(unsigned long j,
const unsigned long *online_mask,
@@ -3830,7 +3871,8 @@ static inline bool netif_attr_test_online(unsigned long j,
* @srcp: the cpumask/Rx queue mask pointer
* @nr_bits: number of bits in the bitmask
*
- * Returns >= nr_bits if no further CPUs/Rx queues set.
+ * Returns: next (after n) CPU/Rx queue index in the mask;
+ * >= nr_bits if no further CPUs/Rx queues set.
*/
static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
unsigned int nr_bits)
@@ -3852,7 +3894,8 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
* @src2p: the second CPUs/Rx queues mask pointer
* @nr_bits: number of bits in the bitmask
*
- * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ * Returns: next (after n) CPU/Rx queue index set in both masks;
+ * >= nr_bits if no further CPUs/Rx queues set in both.
*/
static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
const unsigned long *src2p,
@@ -3958,9 +4001,9 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
}
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog);
-void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
+ const struct bpf_prog *xdp_prog);
+void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog);
+int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 5897f3dbaf7c..f39f688d7285 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -357,7 +357,7 @@ extern struct static_key xt_tee_enabled;
* Begin packet processing : all readers must wait the end
* 1) Must be called with preemption disabled
* 2) softirqs must be disabled too (or we should use this_cpu_add())
- * Returns :
+ * Returns:
* 1 if no recursion on this cpu
* 0 if recursion detected
*/
diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
index 8676316547cc..3175073a66ba 100644
--- a/include/linux/netfilter_netdev.h
+++ b/include/linux/netfilter_netdev.h
@@ -66,7 +66,6 @@ static inline bool nf_hook_egress_active(void)
* @rc: result code which shall be returned by __dev_queue_xmit() on failure
* @dev: netdev whose egress hooks shall be applied to @skb
*
- * Returns @skb on success or %NULL if the packet was consumed or filtered.
* Caller must hold rcu_read_lock.
*
* On ingress, packets are classified first by tc, then by netfilter.
@@ -81,6 +80,8 @@ static inline bool nf_hook_egress_active(void)
* called recursively by tunnel drivers such as vxlan, the flag is reverted to
* false after sch_handle_egress(). This ensures that netfilter is applied
* both on the overlay and underlying network.
+ *
+ * Returns: @skb on success or %NULL if the packet was consumed or filtered.
*/
static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
struct net_device *dev)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index b34301650c47..f91e50a76efd 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -57,7 +57,7 @@ static inline void netpoll_poll_disable(struct net_device *dev) { return; }
static inline void netpoll_poll_enable(struct net_device *dev) { return; }
#endif
-void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
+int netpoll_send_udp(struct netpoll *np, const char *msg, int len);
void netpoll_print_options(struct netpoll *np);
int netpoll_parse_options(struct netpoll *np, char *opt);
int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
diff --git a/include/linux/packing.h b/include/linux/packing.h
index 5d36dcd06f60..0589d70bbe04 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -8,6 +8,83 @@
#include <linux/types.h>
#include <linux/bitops.h>
+#define GEN_PACKED_FIELD_STRUCT(__type) \
+ struct packed_field_ ## __type { \
+ __type startbit; \
+ __type endbit; \
+ __type offset; \
+ __type size; \
+ }
+
+/* struct packed_field_u8. Use with bit offsets < 256, buffers < 32B and
+ * unpacked structures < 256B.
+ */
+GEN_PACKED_FIELD_STRUCT(u8);
+
+/* struct packed_field_u16. Use with bit offsets < 65536, buffers < 8KB and
+ * unpacked structures < 64KB.
+ */
+GEN_PACKED_FIELD_STRUCT(u16);
+
+#define PACKED_FIELD(start, end, struct_name, struct_field) \
+{ \
+ (start), \
+ (end), \
+ offsetof(struct_name, struct_field), \
+ sizeof_field(struct_name, struct_field), \
+}
+
+#define CHECK_PACKED_FIELD_OVERLAP(fields, index1, index2) ({ \
+ typeof(&(fields)[0]) __f = (fields); \
+ typeof(__f[0]) _f1 = __f[index1]; typeof(__f[0]) _f2 = __f[index2]; \
+ const bool _ascending = __f[0].startbit < __f[1].startbit; \
+ BUILD_BUG_ON_MSG(_ascending && _f1.startbit >= _f2.startbit, \
+ __stringify(fields) " field " __stringify(index2) \
+ " breaks ascending order"); \
+ BUILD_BUG_ON_MSG(!_ascending && _f1.startbit <= _f2.startbit, \
+ __stringify(fields) " field " __stringify(index2) \
+ " breaks descending order"); \
+ BUILD_BUG_ON_MSG(max(_f1.endbit, _f2.endbit) <= \
+ min(_f1.startbit, _f2.startbit), \
+ __stringify(fields) " field " __stringify(index2) \
+ " overlaps with previous field"); \
+})
+
+#define CHECK_PACKED_FIELD(fields, index) ({ \
+ typeof(&(fields)[0]) _f = (fields); \
+ typeof(_f[0]) __f = _f[index]; \
+ BUILD_BUG_ON_MSG(__f.startbit < __f.endbit, \
+ __stringify(fields) " field " __stringify(index) \
+ " start bit must not be smaller than end bit"); \
+ BUILD_BUG_ON_MSG(__f.size != 1 && __f.size != 2 && \
+ __f.size != 4 && __f.size != 8, \
+ __stringify(fields) " field " __stringify(index) \
+ " has unsupported unpacked storage size"); \
+ BUILD_BUG_ON_MSG(__f.startbit - __f.endbit >= BITS_PER_BYTE * __f.size, \
+ __stringify(fields) " field " __stringify(index) \
+ " exceeds unpacked storage size"); \
+ __builtin_choose_expr(index != 0, \
+ CHECK_PACKED_FIELD_OVERLAP(fields, index - 1, index), \
+ 1); \
+})
+
+/* Note that the packed fields may be either in ascending or descending order.
+ * Thus, we must check that both the first and last field wit within the
+ * packed buffer size.
+ */
+#define CHECK_PACKED_FIELDS_SIZE(fields, pbuflen) ({ \
+ typeof(&(fields)[0]) _f = (fields); \
+ typeof(pbuflen) _len = (pbuflen); \
+ const size_t num_fields = ARRAY_SIZE(fields); \
+ BUILD_BUG_ON_MSG(!__builtin_constant_p(_len), \
+ __stringify(fields) " pbuflen " __stringify(pbuflen) \
+ " must be a compile time constant"); \
+ BUILD_BUG_ON_MSG(_f[0].startbit >= BITS_PER_BYTE * _len, \
+ __stringify(fields) " first field exceeds packed buffer size"); \
+ BUILD_BUG_ON_MSG(_f[num_fields - 1].startbit >= BITS_PER_BYTE * _len, \
+ __stringify(fields) " last field exceeds packed buffer size"); \
+})
+
#define QUIRK_MSB_ON_THE_RIGHT BIT(0)
#define QUIRK_LITTLE_ENDIAN BIT(1)
#define QUIRK_LSW32_IS_FIRST BIT(2)
@@ -26,4 +103,352 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
size_t pbuflen, u8 quirks);
+void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct,
+ const struct packed_field_u8 *fields, size_t num_fields,
+ u8 quirks);
+
+void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct,
+ const struct packed_field_u16 *fields, size_t num_fields,
+ u8 quirks);
+
+void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct,
+ const struct packed_field_u8 *fields, size_t num_fields,
+ u8 quirks);
+
+void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct,
+ const struct packed_field_u16 *fields, size_t num_fields,
+ u8 quirks);
+
+/* Do not hand-edit the following packed field check macros!
+ *
+ * They are generated using scripts/gen_packed_field_checks.c, which may be
+ * built via "make scripts_gen_packed_field_checks". If larger macro sizes are
+ * needed in the future, please use this program to re-generate the macros and
+ * insert them here.
+ */
+
+#define CHECK_PACKED_FIELDS_1(fields) \
+ CHECK_PACKED_FIELD(fields, 0)
+
+#define CHECK_PACKED_FIELDS_2(fields) do { \
+ CHECK_PACKED_FIELDS_1(fields); \
+ CHECK_PACKED_FIELD(fields, 1); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_3(fields) do { \
+ CHECK_PACKED_FIELDS_2(fields); \
+ CHECK_PACKED_FIELD(fields, 2); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_4(fields) do { \
+ CHECK_PACKED_FIELDS_3(fields); \
+ CHECK_PACKED_FIELD(fields, 3); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_5(fields) do { \
+ CHECK_PACKED_FIELDS_4(fields); \
+ CHECK_PACKED_FIELD(fields, 4); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_6(fields) do { \
+ CHECK_PACKED_FIELDS_5(fields); \
+ CHECK_PACKED_FIELD(fields, 5); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_7(fields) do { \
+ CHECK_PACKED_FIELDS_6(fields); \
+ CHECK_PACKED_FIELD(fields, 6); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_8(fields) do { \
+ CHECK_PACKED_FIELDS_7(fields); \
+ CHECK_PACKED_FIELD(fields, 7); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_9(fields) do { \
+ CHECK_PACKED_FIELDS_8(fields); \
+ CHECK_PACKED_FIELD(fields, 8); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_10(fields) do { \
+ CHECK_PACKED_FIELDS_9(fields); \
+ CHECK_PACKED_FIELD(fields, 9); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_11(fields) do { \
+ CHECK_PACKED_FIELDS_10(fields); \
+ CHECK_PACKED_FIELD(fields, 10); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_12(fields) do { \
+ CHECK_PACKED_FIELDS_11(fields); \
+ CHECK_PACKED_FIELD(fields, 11); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_13(fields) do { \
+ CHECK_PACKED_FIELDS_12(fields); \
+ CHECK_PACKED_FIELD(fields, 12); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_14(fields) do { \
+ CHECK_PACKED_FIELDS_13(fields); \
+ CHECK_PACKED_FIELD(fields, 13); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_15(fields) do { \
+ CHECK_PACKED_FIELDS_14(fields); \
+ CHECK_PACKED_FIELD(fields, 14); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_16(fields) do { \
+ CHECK_PACKED_FIELDS_15(fields); \
+ CHECK_PACKED_FIELD(fields, 15); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_17(fields) do { \
+ CHECK_PACKED_FIELDS_16(fields); \
+ CHECK_PACKED_FIELD(fields, 16); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_18(fields) do { \
+ CHECK_PACKED_FIELDS_17(fields); \
+ CHECK_PACKED_FIELD(fields, 17); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_19(fields) do { \
+ CHECK_PACKED_FIELDS_18(fields); \
+ CHECK_PACKED_FIELD(fields, 18); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_20(fields) do { \
+ CHECK_PACKED_FIELDS_19(fields); \
+ CHECK_PACKED_FIELD(fields, 19); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_21(fields) do { \
+ CHECK_PACKED_FIELDS_20(fields); \
+ CHECK_PACKED_FIELD(fields, 20); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_22(fields) do { \
+ CHECK_PACKED_FIELDS_21(fields); \
+ CHECK_PACKED_FIELD(fields, 21); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_23(fields) do { \
+ CHECK_PACKED_FIELDS_22(fields); \
+ CHECK_PACKED_FIELD(fields, 22); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_24(fields) do { \
+ CHECK_PACKED_FIELDS_23(fields); \
+ CHECK_PACKED_FIELD(fields, 23); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_25(fields) do { \
+ CHECK_PACKED_FIELDS_24(fields); \
+ CHECK_PACKED_FIELD(fields, 24); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_26(fields) do { \
+ CHECK_PACKED_FIELDS_25(fields); \
+ CHECK_PACKED_FIELD(fields, 25); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_27(fields) do { \
+ CHECK_PACKED_FIELDS_26(fields); \
+ CHECK_PACKED_FIELD(fields, 26); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_28(fields) do { \
+ CHECK_PACKED_FIELDS_27(fields); \
+ CHECK_PACKED_FIELD(fields, 27); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_29(fields) do { \
+ CHECK_PACKED_FIELDS_28(fields); \
+ CHECK_PACKED_FIELD(fields, 28); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_30(fields) do { \
+ CHECK_PACKED_FIELDS_29(fields); \
+ CHECK_PACKED_FIELD(fields, 29); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_31(fields) do { \
+ CHECK_PACKED_FIELDS_30(fields); \
+ CHECK_PACKED_FIELD(fields, 30); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_32(fields) do { \
+ CHECK_PACKED_FIELDS_31(fields); \
+ CHECK_PACKED_FIELD(fields, 31); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_33(fields) do { \
+ CHECK_PACKED_FIELDS_32(fields); \
+ CHECK_PACKED_FIELD(fields, 32); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_34(fields) do { \
+ CHECK_PACKED_FIELDS_33(fields); \
+ CHECK_PACKED_FIELD(fields, 33); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_35(fields) do { \
+ CHECK_PACKED_FIELDS_34(fields); \
+ CHECK_PACKED_FIELD(fields, 34); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_36(fields) do { \
+ CHECK_PACKED_FIELDS_35(fields); \
+ CHECK_PACKED_FIELD(fields, 35); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_37(fields) do { \
+ CHECK_PACKED_FIELDS_36(fields); \
+ CHECK_PACKED_FIELD(fields, 36); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_38(fields) do { \
+ CHECK_PACKED_FIELDS_37(fields); \
+ CHECK_PACKED_FIELD(fields, 37); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_39(fields) do { \
+ CHECK_PACKED_FIELDS_38(fields); \
+ CHECK_PACKED_FIELD(fields, 38); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_40(fields) do { \
+ CHECK_PACKED_FIELDS_39(fields); \
+ CHECK_PACKED_FIELD(fields, 39); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_41(fields) do { \
+ CHECK_PACKED_FIELDS_40(fields); \
+ CHECK_PACKED_FIELD(fields, 40); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_42(fields) do { \
+ CHECK_PACKED_FIELDS_41(fields); \
+ CHECK_PACKED_FIELD(fields, 41); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_43(fields) do { \
+ CHECK_PACKED_FIELDS_42(fields); \
+ CHECK_PACKED_FIELD(fields, 42); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_44(fields) do { \
+ CHECK_PACKED_FIELDS_43(fields); \
+ CHECK_PACKED_FIELD(fields, 43); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_45(fields) do { \
+ CHECK_PACKED_FIELDS_44(fields); \
+ CHECK_PACKED_FIELD(fields, 44); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_46(fields) do { \
+ CHECK_PACKED_FIELDS_45(fields); \
+ CHECK_PACKED_FIELD(fields, 45); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_47(fields) do { \
+ CHECK_PACKED_FIELDS_46(fields); \
+ CHECK_PACKED_FIELD(fields, 46); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_48(fields) do { \
+ CHECK_PACKED_FIELDS_47(fields); \
+ CHECK_PACKED_FIELD(fields, 47); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_49(fields) do { \
+ CHECK_PACKED_FIELDS_48(fields); \
+ CHECK_PACKED_FIELD(fields, 48); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS_50(fields) do { \
+ CHECK_PACKED_FIELDS_49(fields); \
+ CHECK_PACKED_FIELD(fields, 49); \
+} while (0)
+
+#define CHECK_PACKED_FIELDS(fields) \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 1, ({ CHECK_PACKED_FIELDS_1(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 2, ({ CHECK_PACKED_FIELDS_2(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 3, ({ CHECK_PACKED_FIELDS_3(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 4, ({ CHECK_PACKED_FIELDS_4(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 5, ({ CHECK_PACKED_FIELDS_5(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 6, ({ CHECK_PACKED_FIELDS_6(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 7, ({ CHECK_PACKED_FIELDS_7(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 8, ({ CHECK_PACKED_FIELDS_8(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 9, ({ CHECK_PACKED_FIELDS_9(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 10, ({ CHECK_PACKED_FIELDS_10(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 11, ({ CHECK_PACKED_FIELDS_11(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 12, ({ CHECK_PACKED_FIELDS_12(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 13, ({ CHECK_PACKED_FIELDS_13(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 14, ({ CHECK_PACKED_FIELDS_14(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 15, ({ CHECK_PACKED_FIELDS_15(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 16, ({ CHECK_PACKED_FIELDS_16(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 17, ({ CHECK_PACKED_FIELDS_17(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 18, ({ CHECK_PACKED_FIELDS_18(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 19, ({ CHECK_PACKED_FIELDS_19(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 20, ({ CHECK_PACKED_FIELDS_20(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 21, ({ CHECK_PACKED_FIELDS_21(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 22, ({ CHECK_PACKED_FIELDS_22(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 23, ({ CHECK_PACKED_FIELDS_23(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 24, ({ CHECK_PACKED_FIELDS_24(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 25, ({ CHECK_PACKED_FIELDS_25(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 26, ({ CHECK_PACKED_FIELDS_26(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 27, ({ CHECK_PACKED_FIELDS_27(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 28, ({ CHECK_PACKED_FIELDS_28(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 29, ({ CHECK_PACKED_FIELDS_29(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 30, ({ CHECK_PACKED_FIELDS_30(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 31, ({ CHECK_PACKED_FIELDS_31(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 32, ({ CHECK_PACKED_FIELDS_32(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 33, ({ CHECK_PACKED_FIELDS_33(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 34, ({ CHECK_PACKED_FIELDS_34(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 35, ({ CHECK_PACKED_FIELDS_35(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 36, ({ CHECK_PACKED_FIELDS_36(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 37, ({ CHECK_PACKED_FIELDS_37(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 38, ({ CHECK_PACKED_FIELDS_38(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 39, ({ CHECK_PACKED_FIELDS_39(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 40, ({ CHECK_PACKED_FIELDS_40(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 41, ({ CHECK_PACKED_FIELDS_41(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 42, ({ CHECK_PACKED_FIELDS_42(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 43, ({ CHECK_PACKED_FIELDS_43(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 44, ({ CHECK_PACKED_FIELDS_44(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 45, ({ CHECK_PACKED_FIELDS_45(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 46, ({ CHECK_PACKED_FIELDS_46(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 47, ({ CHECK_PACKED_FIELDS_47(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 48, ({ CHECK_PACKED_FIELDS_48(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 49, ({ CHECK_PACKED_FIELDS_49(fields); }), \
+ __builtin_choose_expr(ARRAY_SIZE(fields) == 50, ({ CHECK_PACKED_FIELDS_50(fields); }), \
+ ({ BUILD_BUG_ON_MSG(1, "CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than 50."); }) \
+))))))))))))))))))))))))))))))))))))))))))))))))))
+
+/* End of generated content */
+
+#define pack_fields(pbuf, pbuflen, ustruct, fields, quirks) \
+ ({ \
+ CHECK_PACKED_FIELDS(fields); \
+ CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \
+ _Generic((fields), \
+ const struct packed_field_u8 * : pack_fields_u8, \
+ const struct packed_field_u16 * : pack_fields_u16 \
+ )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \
+ })
+
+#define unpack_fields(pbuf, pbuflen, ustruct, fields, quirks) \
+ ({ \
+ CHECK_PACKED_FIELDS(fields); \
+ CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \
+ _Generic((fields), \
+ const struct packed_field_u8 * : unpack_fields_u8, \
+ const struct packed_field_u16 * : unpack_fields_u16 \
+ )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \
+ })
+
#endif
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 563c46205685..e597a32cc787 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -298,6 +298,29 @@ static inline const char *phy_modes(phy_interface_t interface)
}
}
+/**
+ * rgmii_clock - map link speed to the clock rate
+ * @speed: link speed value
+ *
+ * Description: maps RGMII supported link speeds
+ * into the clock rates.
+ *
+ * Returns: clock rate or negative errno
+ */
+static inline long rgmii_clock(int speed)
+{
+ switch (speed) {
+ case SPEED_10:
+ return 2500000;
+ case SPEED_100:
+ return 25000000;
+ case SPEED_1000:
+ return 125000000;
+ default:
+ return -EINVAL;
+ }
+}
+
#define PHY_INIT_TIMEOUT 100000
#define PHY_FORCE_TIMEOUT 10
@@ -818,6 +841,24 @@ struct phy_tdr_config {
#define PHY_PAIR_ALL -1
/**
+ * enum link_inband_signalling - in-band signalling modes that are supported
+ *
+ * @LINK_INBAND_DISABLE: in-band signalling can be disabled
+ * @LINK_INBAND_ENABLE: in-band signalling can be enabled without bypass
+ * @LINK_INBAND_BYPASS: in-band signalling can be enabled with bypass
+ *
+ * The possible and required bits can only be used if the valid bit is set.
+ * If possible is clear, that means inband signalling can not be used.
+ * Required is only valid when possible is set, and means that inband
+ * signalling must be used.
+ */
+enum link_inband_signalling {
+ LINK_INBAND_DISABLE = BIT(0),
+ LINK_INBAND_ENABLE = BIT(1),
+ LINK_INBAND_BYPASS = BIT(2),
+};
+
+/**
* struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision
* Avoidance) Reconciliation Sublayer.
*
@@ -957,6 +998,19 @@ struct phy_driver {
int (*get_features)(struct phy_device *phydev);
/**
+ * @inband_caps: query whether in-band is supported for the given PHY
+ * interface mode. Returns a bitmask of bits defined by enum
+ * link_inband_signalling.
+ */
+ unsigned int (*inband_caps)(struct phy_device *phydev,
+ phy_interface_t interface);
+
+ /**
+ * @config_inband: configure in-band mode for the PHY
+ */
+ int (*config_inband)(struct phy_device *phydev, unsigned int modes);
+
+ /**
* @get_rate_matching: Get the supported type of rate matching for a
* particular phy interface. This is used by phy consumers to determine
* whether to advertise lower-speed modes for that interface. It is
@@ -1818,6 +1872,9 @@ int phy_config_aneg(struct phy_device *phydev);
int _phy_start_aneg(struct phy_device *phydev);
int phy_start_aneg(struct phy_device *phydev);
int phy_aneg_done(struct phy_device *phydev);
+unsigned int phy_inband_caps(struct phy_device *phydev,
+ phy_interface_t interface);
+int phy_config_inband(struct phy_device *phydev, unsigned int modes);
int phy_speed_down(struct phy_device *phydev, bool sync);
int phy_speed_up(struct phy_device *phydev);
bool phy_check_valid(int speed, int duplex, unsigned long *features);
@@ -1957,7 +2014,7 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev,
int genphy_c45_plca_get_status(struct phy_device *phydev,
struct phy_plca_status *plca_st);
int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
- unsigned long *lp, bool *is_enabled);
+ unsigned long *lp);
int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
struct ethtool_keee *data);
int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 5c01048860c4..5462cc6a37dc 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -419,6 +419,7 @@ struct phylink_pcs {
/**
* struct phylink_pcs_ops - MAC PCS operations structure.
* @pcs_validate: validate the link configuration.
+ * @pcs_inband_caps: query inband support for interface mode.
* @pcs_enable: enable the PCS.
* @pcs_disable: disable the PCS.
* @pcs_pre_config: pre-mac_config method (for errata)
@@ -434,6 +435,8 @@ struct phylink_pcs {
struct phylink_pcs_ops {
int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state);
+ unsigned int (*pcs_inband_caps)(struct phylink_pcs *pcs,
+ phy_interface_t interface);
int (*pcs_enable)(struct phylink_pcs *pcs);
void (*pcs_disable)(struct phylink_pcs *pcs);
void (*pcs_pre_config)(struct phylink_pcs *pcs,
@@ -471,6 +474,20 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state);
/**
+ * pcs_inband_caps - query PCS in-band capabilities for interface mode.
+ * @pcs: a pointer to a &struct phylink_pcs.
+ * @interface: interface mode to be queried
+ *
+ * Returns zero if it is unknown what in-band signalling is supported by the
+ * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise,
+ * returns a bit mask of the LINK_INBAND_* values from
+ * &enum link_inband_signalling to describe which inband modes are supported
+ * for this interface mode.
+ */
+unsigned int pcs_inband_caps(struct phylink_pcs *pcs,
+ phy_interface_t interface);
+
+/**
* pcs_enable() - enable the PCS.
* @pcs: a pointer to a &struct phylink_pcs.
*/
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index c892d22ce0a7..0d68d09bedd1 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -307,7 +307,7 @@ static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm)
* @info: Structure describing the new clock.
* @parent: Pointer to the parent device of the new clock.
*
- * Returns a valid pointer on success or PTR_ERR on failure. If PHC
+ * Returns: a valid pointer on success or PTR_ERR on failure. If PHC
* support is missing at the configuration level, this function
* returns NULL, and drivers are expected to gracefully handle that
* case separately.
@@ -445,7 +445,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index);
* @hwtstamp: timestamp
* @vclock_index: phc index of ptp vclock.
*
- * Returns converted timestamp, or 0 on error.
+ * Returns: converted timestamp, or 0 on error.
*/
ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index);
#else
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 997b34197385..6816e4c5f3f0 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -241,7 +241,7 @@ bool rfkill_soft_blocked(struct rfkill *rfkill);
* rfkill_find_type - Helper for finding rfkill type by name
* @name: the name of the type
*
- * Returns enum rfkill_type that corresponds to the name.
+ * Returns: enum rfkill_type that corresponds to the name.
*/
enum rfkill_type rfkill_find_type(const char *name);
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 14b88f551920..3b9d132cbc9e 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -78,7 +78,7 @@ static inline bool lockdep_rtnl_is_held(void)
* rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
* @p: The pointer to read, prior to dereferencing
*
- * Return the value of the specified RCU-protected pointer, but omit
+ * Return: the value of the specified RCU-protected pointer, but omit
* the READ_ONCE(), because caller holds RTNL.
*/
#define rtnl_dereference(p) \
@@ -178,6 +178,12 @@ void rtnetlink_init(void);
void __rtnl_unlock(void);
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
+/* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */
+struct ndo_fdb_dump_context {
+ unsigned long ifindex;
+ unsigned long fdb_idx;
+};
+
extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 58009fa66102..69624b394cd9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1134,7 +1134,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb)
* skb_dst - returns skb dst_entry
* @skb: buffer
*
- * Returns skb dst_entry, regardless of reference taken or not.
+ * Returns: skb dst_entry, regardless of reference taken or not.
*/
static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
{
@@ -1222,7 +1222,7 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb)
* skb_unref - decrement the skb's reference count
* @skb: buffer
*
- * Returns true if we can free the skb.
+ * Returns: true if we can free the skb.
*/
static inline bool skb_unref(struct sk_buff *skb)
{
@@ -1344,7 +1344,7 @@ struct sk_buff_fclones {
* @sk: socket
* @skb: buffer
*
- * Returns true if skb is a fast clone, and its clone is not freed.
+ * Returns: true if skb is a fast clone, and its clone is not freed.
* Some drivers call skb_orphan() in their ndo_start_xmit(),
* so we also check that didn't happen.
*/
@@ -3516,7 +3516,7 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask)
* A page shouldn't be considered for reusing/recycling if it was allocated
* under memory pressure or at a distant memory node.
*
- * Returns false if this page should be returned to page allocator, true
+ * Returns: false if this page should be returned to page allocator, true
* otherwise.
*/
static inline bool dev_page_is_reusable(const struct page *page)
@@ -3627,13 +3627,13 @@ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag)
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
- struct bpf_prog *prog);
+ const struct bpf_prog *prog);
/**
* skb_frag_address - gets the address of the data contained in a paged fragment
* @frag: the paged fragment buffer
*
- * Returns the address of the data within @frag. The page must already
+ * Returns: the address of the data within @frag. The page must already
* be mapped.
*/
static inline void *skb_frag_address(const skb_frag_t *frag)
@@ -3648,7 +3648,7 @@ static inline void *skb_frag_address(const skb_frag_t *frag)
* skb_frag_address_safe - gets the address of the data contained in a paged fragment
* @frag: the paged fragment buffer
*
- * Returns the address of the data within @frag. Checks that the page
+ * Returns: the address of the data within @frag. Checks that the page
* is mapped and returns %NULL otherwise.
*/
static inline void *skb_frag_address_safe(const skb_frag_t *frag)
@@ -3890,7 +3890,7 @@ static inline int skb_linearize(struct sk_buff *skb)
* skb_has_shared_frag - can any frag be overwritten
* @skb: buffer to test
*
- * Return true if the skb has at least one frag that might be modified
+ * Return: true if the skb has at least one frag that might be modified
* by an external entity (as in vmsplice()/sendfile())
*/
static inline bool skb_has_shared_frag(const struct sk_buff *skb)
@@ -4612,7 +4612,7 @@ static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb)
/* Check if we need to perform checksum complete validation.
*
- * Returns true if checksum complete is needed, false otherwise
+ * Returns: true if checksum complete is needed, false otherwise
* (either checksum is unnecessary or zero checksum is allowed).
*/
static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index d79ff252cfdc..c9878a612e53 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -33,7 +33,9 @@
#define STMMAC_CSR_20_35M 0x2 /* MDC = clk_scr_i/16 */
#define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */
#define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */
-#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */
+#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */
+#define STMMAC_CSR_300_500M 0x6 /* MDC = clk_scr_i/204 */
+#define STMMAC_CSR_500_800M 0x7 /* MDC = clk_scr_i/324 */
/* MTL algorithms identifiers */
#define MTL_TX_ALGORITHM_WRR 0x0
@@ -250,8 +252,8 @@ struct plat_stmmacenet_data {
struct clk *stmmac_clk;
struct clk *pclk;
struct clk *clk_ptp_ref;
- unsigned int clk_ptp_rate;
- unsigned int clk_ref_rate;
+ unsigned long clk_ptp_rate;
+ unsigned long clk_ref_rate;
unsigned int mult_fact_100ns;
s32 ptp_max_adj;
u32 cdc_error_adj;
@@ -263,7 +265,7 @@ struct plat_stmmacenet_data {
int mac_port_sel_speed;
int has_xgmac;
u8 vlan_fail_q;
- unsigned int eee_usecs_rate;
+ unsigned long eee_usecs_rate;
struct pci_dev *pdev;
int int_snapshot_num;
int msi_mac_vec;
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 79c781875c09..a4d6cc0c9f68 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -97,7 +97,7 @@ struct wwan_port_caps {
*
* This function must be balanced with a call to wwan_remove_port().
*
- * Returns a valid pointer to wwan_port on success or PTR_ERR on failure
+ * Returns: a valid pointer to wwan_port on success or PTR_ERR on failure
*/
struct wwan_port *wwan_create_port(struct device *parent,
enum wwan_port_type type,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 27acf1292a5c..182f7965048f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5957,7 +5957,7 @@ int wiphy_register(struct wiphy *wiphy);
* @wiphy: the wiphy to check the locking on
* @p: The pointer to read, prior to dereferencing
*
- * Return the value of the specified RCU-protected pointer, but omit the
+ * Return: the value of the specified RCU-protected pointer, but omit the
* READ_ONCE(), because caller holds the wiphy mutex used for updates.
*/
#define wiphy_dereference(wiphy, p) \
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 6c5a1ea209a2..c29282fabae6 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -58,6 +58,9 @@
FN(TC_EGRESS) \
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
+ FN(FQ_BAND_LIMIT) \
+ FN(FQ_HORIZON_LIMIT) \
+ FN(FQ_FLOW_LIMIT) \
FN(CPU_BACKLOG) \
FN(XDP) \
FN(TC_INGRESS) \
@@ -312,6 +315,21 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_QDISC_DROP,
/**
+ * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band
+ * limit is reached.
+ */
+ SKB_DROP_REASON_FQ_BAND_LIMIT,
+ /**
+ * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet
+ * timestamp is too far in the future.
+ */
+ SKB_DROP_REASON_FQ_HORIZON_LIMIT,
+ /**
+ * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow
+ * exceeds its limits.
+ */
+ SKB_DROP_REASON_FQ_FLOW_LIMIT,
+ /**
* @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
* backlog queue. This can be caused by backlog queue full (see
* netdev_max_backlog in net.rst) or RPS flow limit
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 72ae65e7246a..4aeedb296d67 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -988,6 +988,7 @@ struct dsa_switch_ops {
/*
* Port's MAC EEE settings
*/
+ bool (*support_eee)(struct dsa_switch *ds, int port);
int (*set_mac_eee)(struct dsa_switch *ds, int port,
struct ethtool_keee *e);
int (*get_mac_eee)(struct dsa_switch *ds, int port,
@@ -1383,5 +1384,6 @@ static inline bool dsa_user_dev_check(const struct net_device *dev)
netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
+bool dsa_supports_eee(struct dsa_switch *ds, int port);
#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index 0f303cc60252..78c78cdce0e9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -307,7 +307,7 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
* @skb: buffer
*
* If dst is not yet refcounted and not destroyed, grab a ref on it.
- * Returns true if dst is refcounted.
+ * Returns: true if dst is refcounted.
*/
static inline bool skb_dst_force(struct sk_buff *skb)
{
@@ -440,6 +440,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
dst->expires = expires;
}
+static inline unsigned int dst_dev_overhead(struct dst_entry *dst,
+ struct sk_buff *skb)
+{
+ if (likely(dst))
+ return LL_RESERVED_SPACE(dst->dev);
+
+ return skb->mac_len;
+}
+
INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index d096cc6352de..a03d56765832 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -354,7 +354,7 @@ __genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags)
* such requests) or a struct initialized by genl_info_init_ntf()
* when constructing notifications.
*
- * Returns pointer to new genetlink header.
+ * Returns: pointer to new genetlink header.
*/
static inline void *
genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
@@ -366,7 +366,7 @@ genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
* genlmsg_nlhdr - Obtain netlink header from user specified header
* @user_hdr: user header as returned from genlmsg_put()
*
- * Returns pointer to netlink header.
+ * Returns: pointer to netlink header.
*/
static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr)
{
@@ -435,7 +435,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb,
* @flags: netlink message flags
* @cmd: generic netlink command
*
- * Returns pointer to user specific header
+ * Returns: pointer to user specific header
*/
static inline void *genlmsg_put_reply(struct sk_buff *skb,
struct genl_info *info,
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 62c0a7e65d6b..67a313575780 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -74,6 +74,10 @@ struct inet_timewait_sock {
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
+ /**
+ * @tw_reuse_stamp: Time of entry into %TCP_TIME_WAIT state in msec.
+ */
+ u32 tw_entry_stamp;
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
struct inet_bind2_bucket *tw_tb2;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 248bfb26e2af..f5c43ad1565e 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -471,7 +471,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
/* This helper is specialized for BIG TCP needs.
* It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
* It assumes headers are already in skb->head.
- * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there.
+ * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there.
*/
static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
{
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index dd9e93c12260..9804fa5d9c67 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -202,7 +202,7 @@ struct iucv_handler {
*
* Registers a driver with IUCV.
*
- * Returns 0 on success, -ENOMEM if the memory allocation for the pathid
+ * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid
* table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus.
*/
int iucv_register(struct iucv_handler *handler, int smp);
@@ -224,7 +224,7 @@ void iucv_unregister(struct iucv_handler *handle, int smp);
*
* Allocate a new path structure for use with iucv_connect.
*
- * Returns NULL if the memory allocation failed or a pointer to the
+ * Returns: NULL if the memory allocation failed or a pointer to the
* path structure.
*/
static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp)
@@ -260,7 +260,7 @@ static inline void iucv_path_free(struct iucv_path *path)
* This function is issued after the user received a connection pending
* external interrupt and now wishes to complete the IUCV communication path.
*
- * Returns the result of the CP IUCV call.
+ * Returns: the result of the CP IUCV call.
*/
int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
u8 *userdata, void *private);
@@ -278,7 +278,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
* successfully, you are not able to use the path until you receive an IUCV
* Connection Complete external interrupt.
*
- * Returns the result of the CP IUCV call.
+ * Returns: the result of the CP IUCV call.
*/
int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
u8 *userid, u8 *system, u8 *userdata,
@@ -292,7 +292,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
* This function temporarily suspends incoming messages on an IUCV path.
* You can later reactivate the path by invoking the iucv_resume function.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
@@ -304,7 +304,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
* This function resumes incoming messages on an IUCV path that has
* been stopped with iucv_path_quiesce.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_resume(struct iucv_path *path, u8 *userdata);
@@ -315,7 +315,7 @@ int iucv_path_resume(struct iucv_path *path, u8 *userdata);
*
* This function terminates an IUCV path.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_sever(struct iucv_path *path, u8 *userdata);
@@ -327,7 +327,7 @@ int iucv_path_sever(struct iucv_path *path, u8 *userdata);
*
* Cancels a message you have sent.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
u32 srccls);
@@ -347,7 +347,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: local_bh_enable/local_bh_disable
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size, size_t *residual);
@@ -367,7 +367,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: no locking.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size,
@@ -382,7 +382,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
* are notified of a message and the time that you complete the message,
* the message may be rejected.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
@@ -399,7 +399,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
* pathid, msgid, and trgcls. Prmmsg signifies the data is moved into
* the parameter list.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *reply, size_t size);
@@ -419,7 +419,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: local_bh_enable/local_bh_disable
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
@@ -439,7 +439,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: no locking.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
@@ -461,7 +461,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
* reply to the message and a buffer is provided into which IUCV moves
* the reply to this message.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size,
diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 5adf6fda11e8..06985530517b 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -49,7 +49,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
*
* nf_tproxy_handle_time_wait4() consumes the socket reference passed in.
*
- * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * Returns: the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
@@ -108,7 +108,7 @@ nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
*
* nf_tproxy_handle_time_wait6() consumes the socket reference passed in.
*
- * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * Returns: the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 39eaa6be6ca8..e015ffbed819 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -649,7 +649,7 @@ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
* @nlh: netlink message header
* @remaining: number of bytes remaining in message stream
*
- * Returns the next netlink message in the message stream and
+ * Returns: the next netlink message in the message stream and
* decrements remaining by the size of the current message.
*/
static inline struct nlmsghdr *
@@ -676,7 +676,7 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining)
* exceeding maxtype will be rejected, policy must be specified, attributes
* will be validated in the strictest way possible.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -701,7 +701,7 @@ static inline int nla_parse(struct nlattr **tb, int maxtype,
* exceeding maxtype will be ignored and attributes from the policy are not
* always strictly validated (only for new attributes).
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -726,7 +726,7 @@ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
* exceeding maxtype will be rejected as well as trailing data, but the
* policy is not completely strictly validated (only for new attributes).
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype,
const struct nlattr *head,
@@ -833,7 +833,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @attrtype: type of attribute to look for
*
- * Returns the first attribute which matches the specified type.
+ * Returns: the first attribute which matches the specified type.
*/
static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
int hdrlen, int attrtype)
@@ -854,7 +854,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
* specified policy. Validation is done in liberal mode.
* See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_validate_deprecated(const struct nlattr *head, int len,
int maxtype,
@@ -877,7 +877,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len,
* specified policy. Validation is done in strict mode.
* See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
const struct nla_policy *policy,
@@ -914,7 +914,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
* nlmsg_report - need to report back to application?
* @nlh: netlink message header
*
- * Returns 1 if a report back to the application is requested.
+ * Returns: 1 if a report back to the application is requested.
*/
static inline int nlmsg_report(const struct nlmsghdr *nlh)
{
@@ -925,7 +925,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh)
* nlmsg_seq - return the seq number of netlink message
* @nlh: netlink message header
*
- * Returns 0 if netlink message is NULL
+ * Returns: 0 if netlink message is NULL
*/
static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
{
@@ -952,7 +952,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
* @payload: length of message payload
* @flags: message flags
*
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
@@ -971,7 +971,7 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se
*
* Append data to an existing nlmsg, used when constructing a message
* with multiple fixed-format headers (which is rare).
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the extra payload.
*/
static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
@@ -993,7 +993,7 @@ static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
* @payload: length of message payload
* @flags: message flags
*
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
@@ -1050,7 +1050,7 @@ static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
* nlmsg_get_pos - return current position in netlink message
* @skb: socket buffer the message is stored in
*
- * Returns a pointer to the current tail of the message.
+ * Returns: a pointer to the current tail of the message.
*/
static inline void *nlmsg_get_pos(struct sk_buff *skb)
{
@@ -1276,7 +1276,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining)
* @nla: netlink attribute
* @remaining: number of bytes remaining in attribute stream
*
- * Returns the next netlink attribute in the attribute stream and
+ * Returns: the next netlink attribute in the attribute stream and
* decrements remaining by the size of the current attribute.
*/
static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
@@ -1292,7 +1292,7 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
* @nla: attribute containing the nested attributes
* @attrtype: type of attribute to look for
*
- * Returns the first attribute which matches the specified type.
+ * Returns: the first attribute which matches the specified type.
*/
static inline struct nlattr *
nla_find_nested(const struct nlattr *nla, int attrtype)
@@ -2091,7 +2091,7 @@ static inline int nla_get_flag(const struct nlattr *nla)
* nla_get_msecs - return payload of msecs attribute
* @nla: msecs netlink attribute
*
- * Returns the number of milliseconds in jiffies.
+ * Returns: the number of milliseconds in jiffies.
*/
static inline unsigned long nla_get_msecs(const struct nlattr *nla)
{
@@ -2183,7 +2183,7 @@ static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp)
* marked their nest attributes with NLA_F_NESTED flag. New APIs should use
* nla_nest_start() which sets the flag.
*
- * Returns the container attribute or NULL on error
+ * Returns: the container attribute or NULL on error
*/
static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
int attrtype)
@@ -2204,7 +2204,7 @@ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
* Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED
* flag. This is the preferred function to use in new code.
*
- * Returns the container attribute or NULL on error
+ * Returns: the container attribute or NULL on error
*/
static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
{
@@ -2219,7 +2219,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
* Corrects the container attribute header to include the all
* appended attributes.
*
- * Returns the total data length of the skb.
+ * Returns: the total data length of the skb.
*/
static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
{
@@ -2252,7 +2252,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
* specified policy. Attributes with a type exceeding maxtype will be
* ignored. See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int __nla_validate_nested(const struct nlattr *start, int maxtype,
const struct nla_policy *policy,
@@ -2285,7 +2285,7 @@ nla_validate_nested_deprecated(const struct nlattr *start, int maxtype,
* nla_need_padding_for_64bit - test 64-bit alignment of the next attribute
* @skb: socket buffer the message is stored in
*
- * Return true if padding is needed to align the next attribute (nla_data()) to
+ * Return: true if padding is needed to align the next attribute (nla_data()) to
* a 64-bit aligned area.
*/
static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
@@ -2312,7 +2312,7 @@ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
* This will only be done in architectures which do not have
* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined.
*
- * Returns zero on success or a negative error code.
+ * Returns: zero on success or a negative error code.
*/
static inline int nla_align_64bit(struct sk_buff *skb, int padattr)
{
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 8a6e20be4b9d..1b58faa4f20f 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -72,6 +72,22 @@ static inline bool netmem_is_net_iov(const netmem_ref netmem)
return (__force unsigned long)netmem & NET_IOV;
}
+/**
+ * __netmem_to_page - unsafely get pointer to the &page backing @netmem
+ * @netmem: netmem reference to convert
+ *
+ * Unsafe version of netmem_to_page(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (no check for the LSB, no WARN). When @netmem points to IOV,
+ * provokes undefined behaviour.
+ *
+ * Return: pointer to the &page (garbage if @netmem is not page-backed).
+ */
+static inline struct page *__netmem_to_page(netmem_ref netmem)
+{
+ return (__force struct page *)netmem;
+}
+
/* This conversion fails (returns NULL) if the netmem_ref is not struct page
* backed.
*/
@@ -80,7 +96,7 @@ static inline struct page *netmem_to_page(netmem_ref netmem)
if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
return NULL;
- return (__force struct page *)netmem;
+ return __netmem_to_page(netmem);
}
static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem)
@@ -103,6 +119,17 @@ static inline netmem_ref page_to_netmem(struct page *page)
return (__force netmem_ref)page;
}
+/**
+ * virt_to_netmem - convert virtual memory pointer to a netmem reference
+ * @data: host memory pointer to convert
+ *
+ * Return: netmem reference to the &page backing this virtual address.
+ */
+static inline netmem_ref virt_to_netmem(const void *data)
+{
+ return page_to_netmem(virt_to_page(data));
+}
+
static inline int netmem_ref_count(netmem_ref netmem)
{
/* The non-pp refcount of net_iov is always 1. On net_iov, we only
@@ -127,6 +154,22 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
}
+/**
+ * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem
+ * @netmem: netmem reference to get the pointer from
+ *
+ * Unsafe version of netmem_get_pp(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (avoids clearing the LSB). When @netmem points to IOV,
+ * provokes invalid memory access.
+ *
+ * Return: pointer to the &page_pool (garbage if @netmem is not page-backed).
+ */
+static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
+{
+ return __netmem_to_page(netmem)->pp;
+}
+
static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
{
return __netmem_clear_lsb(netmem)->pp;
@@ -158,12 +201,43 @@ static inline netmem_ref netmem_compound_head(netmem_ref netmem)
return page_to_netmem(compound_head(netmem_to_page(netmem)));
}
+/**
+ * __netmem_address - unsafely get pointer to the memory backing @netmem
+ * @netmem: netmem reference to get the pointer for
+ *
+ * Unsafe version of netmem_address(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (no check for the LSB). When @netmem points to IOV, provokes
+ * undefined behaviour.
+ *
+ * Return: pointer to the memory (garbage if @netmem is not page-backed).
+ */
+static inline void *__netmem_address(netmem_ref netmem)
+{
+ return page_address(__netmem_to_page(netmem));
+}
+
static inline void *netmem_address(netmem_ref netmem)
{
if (netmem_is_net_iov(netmem))
return NULL;
- return page_address(netmem_to_page(netmem));
+ return __netmem_address(netmem);
+}
+
+/**
+ * netmem_is_pfmemalloc - check if @netmem was allocated under memory pressure
+ * @netmem: netmem reference to check
+ *
+ * Return: true if @netmem is page-backed and the page was allocated under
+ * memory pressure, false otherwise.
+ */
+static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return false;
+
+ return page_is_pfmemalloc(netmem_to_page(netmem));
}
static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3c014170e001..46452da35206 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -175,6 +175,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_retries2;
u8 sysctl_tcp_orphan_retries;
u8 sysctl_tcp_tw_reuse;
+ unsigned int sysctl_tcp_tw_reuse_delay;
int sysctl_tcp_fin_timeout;
u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling;
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 793e6fd78bc5..26caa2c20912 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -104,8 +104,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
*
* Get a page fragment from the page allocator or page_pool caches.
*
- * Return:
- * Return allocated page fragment, otherwise return NULL.
+ * Return: allocated page fragment, otherwise return NULL.
*/
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
@@ -155,8 +154,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
* depending on the requested size in order to allocate memory with least memory
* utilization and performance penalty.
*
- * Return:
- * Return allocated page or page fragment, otherwise return NULL.
+ * Return: allocated page or page fragment, otherwise return NULL.
*/
static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
unsigned int *offset,
@@ -190,8 +188,7 @@ static inline void *page_pool_alloc_va(struct page_pool *pool,
* This is just a thin wrapper around the page_pool_alloc() API, and
* it returns va of the allocated page or page fragment.
*
- * Return:
- * Return the va for the allocated page or page fragment, otherwise return NULL.
+ * Return: the va for the allocated page or page fragment, otherwise return NULL.
*/
static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
unsigned int *size)
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index c022c410abe3..1ea16b0e9c79 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -259,8 +259,8 @@ void page_pool_disable_direct_recycling(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
const struct xdp_mem_info *mem);
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count);
+void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data,
+ u32 count);
#else
static inline void page_pool_destroy(struct page_pool *pool)
{
@@ -272,8 +272,8 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool,
{
}
-static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
+static inline void page_pool_put_netmem_bulk(struct page_pool *pool,
+ netmem_ref *data, u32 count)
{
}
#endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index cf199af85c52..22c5ab4269d7 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -319,7 +319,7 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts,
* tcf_exts_has_actions - check if at least one action is present
* @exts: tc filter extensions handle
*
- * Returns true if at least one action is present.
+ * Returns: true if at least one action is present.
*/
static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
{
@@ -501,7 +501,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
* through all ematches respecting their logic relations returning
* as soon as the result is obvious.
*
- * Returns 1 if the ematch tree as-one matches, no ematches are configured
+ * Returns: 1 if the ematch tree as-one matches, no ematches are configured
* or ematch is not enabled in the kernel, otherwise 0 is returned.
*/
static inline int tcf_em_tree_match(struct sk_buff *skb,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5d74fa7e694c..8074322dd636 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1245,6 +1245,14 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_DROP;
}
+static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free,
+ enum skb_drop_reason reason)
+{
+ tcf_set_drop_reason(skb, reason);
+ return qdisc_drop(skb, sch, to_free);
+}
+
static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e9b37b76e894..5b2b04835688 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1817,7 +1817,7 @@ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
* @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
* @c: returned tcp_sigpool for usage (uninitialized on failure)
*
- * Returns 0 on success, error otherwise.
+ * Returns: 0 on success, error otherwise.
*/
int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
/**
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 33ba6fc151cf..2dd23ee2bacd 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -227,6 +227,7 @@ struct vxlan_config {
unsigned int addrmax;
bool no_share;
enum ifla_vxlan_df df;
+ struct vxlanhdr reserved_bits;
};
enum {
diff --git a/include/net/xdp.h b/include/net/xdp.h
index e6770dd40c91..f4020b29122f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -88,7 +88,7 @@ struct xdp_buff {
u32 flags; /* supported values defined in xdp_buff_flags */
};
-static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
}
@@ -103,7 +103,8 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
}
-static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+static __always_inline bool
+xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
@@ -144,15 +145,16 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
static inline struct skb_shared_info *
-xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
+xdp_get_shared_info_from_buff(const struct xdp_buff *xdp)
{
return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}
-static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+static __always_inline unsigned int
+xdp_get_buff_len(const struct xdp_buff *xdp)
{
unsigned int len = xdp->data_end - xdp->data;
- struct skb_shared_info *sinfo;
+ const struct skb_shared_info *sinfo;
if (likely(!xdp_buff_has_frags(xdp)))
goto out;
@@ -177,12 +179,13 @@ struct xdp_frame {
u32 flags; /* supported values defined in xdp_buff_flags */
};
-static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
}
-static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+static __always_inline bool
+xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
@@ -191,7 +194,7 @@ static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame
struct xdp_frame_bulk {
int count;
void *xa;
- void *q[XDP_BULK_QUEUE_SIZE];
+ netmem_ref q[XDP_BULK_QUEUE_SIZE];
};
static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
@@ -201,7 +204,7 @@ static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
}
static inline struct skb_shared_info *
-xdp_get_shared_info_from_frame(struct xdp_frame *frame)
+xdp_get_shared_info_from_frame(const struct xdp_frame *frame)
{
void *data_hard_start = frame->data - frame->headroom - sizeof(*frame);
@@ -249,7 +252,8 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
static inline
-void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
+void xdp_convert_frame_to_buff(const struct xdp_frame *frame,
+ struct xdp_buff *xdp)
{
xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame);
xdp->data = frame->data;
@@ -260,7 +264,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
}
static inline
-int xdp_update_frame_from_buff(struct xdp_buff *xdp,
+int xdp_update_frame_from_buff(const struct xdp_buff *xdp,
struct xdp_frame *xdp_frame)
{
int metasize, headroom;
@@ -317,9 +321,10 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
+static __always_inline unsigned int
+xdp_get_frame_len(const struct xdp_frame *xdpf)
{
- struct skb_shared_info *sinfo;
+ const struct skb_shared_info *sinfo;
unsigned int len = xdpf->len;
if (likely(!xdp_frame_has_frags(xdpf)))
@@ -351,6 +356,38 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
int xdp_reg_mem_model(struct xdp_mem_info *mem,
enum xdp_mem_type type, void *allocator);
void xdp_unreg_mem_model(struct xdp_mem_info *mem);
+int xdp_reg_page_pool(struct page_pool *pool);
+void xdp_unreg_page_pool(const struct page_pool *pool);
+void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
+ const struct page_pool *pool);
+
+/**
+ * xdp_rxq_info_attach_mem_model - attach registered mem info to RxQ info
+ * @xdp_rxq: XDP RxQ info to attach the memory info to
+ * @mem: already registered memory info
+ *
+ * If the driver registers its memory providers manually, it must use this
+ * function instead of xdp_rxq_info_reg_mem_model().
+ */
+static inline void
+xdp_rxq_info_attach_mem_model(struct xdp_rxq_info *xdp_rxq,
+ const struct xdp_mem_info *mem)
+{
+ xdp_rxq->mem = *mem;
+}
+
+/**
+ * xdp_rxq_info_detach_mem_model - detach registered mem info from RxQ info
+ * @xdp_rxq: XDP RxQ info to detach the memory info from
+ *
+ * If the driver registers its memory providers manually and then attaches it
+ * via xdp_rxq_info_attach_mem_model(), it must call this function before
+ * xdp_rxq_info_unreg().
+ */
+static inline void xdp_rxq_info_detach_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->mem = (struct xdp_mem_info){ };
+}
/* Drivers not supporting XDP metadata can use this helper, which
* rejects any room expansion for metadata as a result.
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 40085afd9160..f3175a5d28f7 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -101,7 +101,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool);
}
-static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+static inline bool xsk_is_eop_desc(const struct xdp_desc *desc)
{
return !xp_mb_desc(desc);
}
@@ -143,7 +143,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
list_add_tail(&frag->list_node, &frag->pool->xskb_list);
}
-static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
struct xdp_buff *ret = NULL;
@@ -200,7 +200,8 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
XDP_TXMD_FLAGS_CHECKSUM | \
0)
-static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+static inline bool
+xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta)
{
return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
}
@@ -337,7 +338,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL;
}
-static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+static inline bool xsk_is_eop_desc(const struct xdp_desc *desc)
{
return false;
}
@@ -360,7 +361,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
{
}
-static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
{
return NULL;
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index bb03cee716b3..50779406bc2d 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,7 +29,7 @@ struct xdp_buff_xsk {
dma_addr_t frame_dma;
struct xsk_buff_pool *pool;
struct list_head list_node;
-};
+} __aligned_largest;
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
@@ -183,7 +183,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
!(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
-static inline bool xp_mb_desc(struct xdp_desc *desc)
+static inline bool xp_mb_desc(const struct xdp_desc *desc)
{
return desc->options & XDP_PKT_CONTD;
}
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index d03e0bd8c028..2f119d18a061 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -117,8 +117,10 @@
#define rxrpc_call_poke_traces \
EM(rxrpc_call_poke_abort, "Abort") \
EM(rxrpc_call_poke_complete, "Compl") \
+ EM(rxrpc_call_poke_conn_abort, "Conn-abort") \
EM(rxrpc_call_poke_error, "Error") \
EM(rxrpc_call_poke_idle, "Idle") \
+ EM(rxrpc_call_poke_rx_packet, "Rx-packet") \
EM(rxrpc_call_poke_set_timeout, "Set-timo") \
EM(rxrpc_call_poke_start, "Start") \
EM(rxrpc_call_poke_timer, "Timer") \
@@ -127,9 +129,9 @@
#define rxrpc_skb_traces \
EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \
EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \
+ EM(rxrpc_skb_get_call_rx, "GET call-rx ") \
EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \
EM(rxrpc_skb_get_conn_work, "GET conn-work") \
- EM(rxrpc_skb_get_last_nack, "GET last-nack") \
EM(rxrpc_skb_get_local_work, "GET locl-work") \
EM(rxrpc_skb_get_reject_work, "GET rej-work ") \
EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \
@@ -138,12 +140,12 @@
EM(rxrpc_skb_new_error_report, "NEW error-rpt") \
EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \
EM(rxrpc_skb_new_unshared, "NEW unshared ") \
+ EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \
EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \
EM(rxrpc_skb_put_conn_work, "PUT conn-work") \
EM(rxrpc_skb_put_error_report, "PUT error-rep") \
EM(rxrpc_skb_put_input, "PUT input ") \
EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \
- EM(rxrpc_skb_put_last_nack, "PUT last-nack") \
EM(rxrpc_skb_put_purge, "PUT purge ") \
EM(rxrpc_skb_put_rotate, "PUT rotate ") \
EM(rxrpc_skb_put_unknown, "PUT unknown ") \
@@ -282,6 +284,7 @@
EM(rxrpc_call_see_activate_client, "SEE act-clnt") \
EM(rxrpc_call_see_connect_failed, "SEE con-fail") \
EM(rxrpc_call_see_connected, "SEE connect ") \
+ EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \
EM(rxrpc_call_see_disconnected, "SEE disconn ") \
EM(rxrpc_call_see_distribute_error, "SEE dist-err") \
EM(rxrpc_call_see_input, "SEE input ") \
@@ -292,7 +295,6 @@
#define rxrpc_txqueue_traces \
EM(rxrpc_txqueue_await_reply, "AWR") \
- EM(rxrpc_txqueue_dequeue, "DEQ") \
EM(rxrpc_txqueue_end, "END") \
EM(rxrpc_txqueue_queue, "QUE") \
EM(rxrpc_txqueue_queue_last, "QLS") \
@@ -300,6 +302,13 @@
EM(rxrpc_txqueue_rotate_last, "RLS") \
E_(rxrpc_txqueue_wait, "WAI")
+#define rxrpc_txdata_traces \
+ EM(rxrpc_txdata_inject_loss, " *INJ-LOSS*") \
+ EM(rxrpc_txdata_new_data, " ") \
+ EM(rxrpc_txdata_retransmit, " *RETRANS*") \
+ EM(rxrpc_txdata_tlp_new_data, " *TLP-NEW*") \
+ E_(rxrpc_txdata_tlp_retransmit, " *TLP-RETRANS*")
+
#define rxrpc_receive_traces \
EM(rxrpc_receive_end, "END") \
EM(rxrpc_receive_front, "FRN") \
@@ -335,11 +344,10 @@
E_(rxrpc_rtt_tx_ping, "PING")
#define rxrpc_rtt_rx_traces \
- EM(rxrpc_rtt_rx_other_ack, "OACK") \
+ EM(rxrpc_rtt_rx_data_ack, "DACK") \
EM(rxrpc_rtt_rx_obsolete, "OBSL") \
EM(rxrpc_rtt_rx_lost, "LOST") \
- EM(rxrpc_rtt_rx_ping_response, "PONG") \
- E_(rxrpc_rtt_rx_requested_ack, "RACK")
+ E_(rxrpc_rtt_rx_ping_response, "PONG")
#define rxrpc_timer_traces \
EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \
@@ -347,11 +355,12 @@
EM(rxrpc_timer_trace_hard, "HardLimit") \
EM(rxrpc_timer_trace_idle, "IdleLimit") \
EM(rxrpc_timer_trace_keepalive, "KeepAlive") \
- EM(rxrpc_timer_trace_lost_ack, "LostAck ") \
EM(rxrpc_timer_trace_ping, "DelayPing") \
- EM(rxrpc_timer_trace_resend, "Resend ") \
- EM(rxrpc_timer_trace_resend_reset, "ResendRst") \
- E_(rxrpc_timer_trace_resend_tx, "ResendTx ")
+ EM(rxrpc_timer_trace_rack_off, "RACK-OFF ") \
+ EM(rxrpc_timer_trace_rack_zwp, "RACK-ZWP ") \
+ EM(rxrpc_timer_trace_rack_reo, "RACK-Reo ") \
+ EM(rxrpc_timer_trace_rack_tlp_pto, "TLP-PTO ") \
+ E_(rxrpc_timer_trace_rack_rto, "RTO ")
#define rxrpc_propose_ack_traces \
EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \
@@ -362,22 +371,24 @@
EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \
EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \
+ EM(rxrpc_propose_ack_ping_for_mtu_probe, "MTUProb") \
EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \
EM(rxrpc_propose_ack_ping_for_params, "Params ") \
EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \
EM(rxrpc_propose_ack_processing_op, "ProcOp ") \
EM(rxrpc_propose_ack_respond_to_ack, "Rsp2Ack") \
EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \
+ EM(rxrpc_propose_ack_retransmit, "Retrans") \
EM(rxrpc_propose_ack_retry_tx, "RetryTx") \
EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \
EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \
E_(rxrpc_propose_ack_terminal_ack, "ClTerm ")
-#define rxrpc_congest_modes \
- EM(RXRPC_CALL_CONGEST_AVOIDANCE, "CongAvoid") \
- EM(RXRPC_CALL_FAST_RETRANSMIT, "FastReTx ") \
- EM(RXRPC_CALL_PACKET_LOSS, "PktLoss ") \
- E_(RXRPC_CALL_SLOW_START, "SlowStart")
+#define rxrpc_ca_states \
+ EM(RXRPC_CA_CONGEST_AVOIDANCE, "CongAvoid") \
+ EM(RXRPC_CA_FAST_RETRANSMIT, "FastReTx ") \
+ EM(RXRPC_CA_PACKET_LOSS, "PktLoss ") \
+ E_(RXRPC_CA_SLOW_START, "SlowStart")
#define rxrpc_congest_changes \
EM(rxrpc_cong_begin_retransmission, " Retrans") \
@@ -450,7 +461,7 @@
#define rxrpc_req_ack_traces \
EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \
- EM(rxrpc_reqack_already_on, "ALREADY-ON") \
+ EM(rxrpc_reqack_app_stall, "APP-STALL ") \
EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \
EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \
EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \
@@ -460,21 +471,60 @@
/* ---- Must update size of stat_why_req_ack[] if more are added! */
#define rxrpc_txbuf_traces \
- EM(rxrpc_txbuf_alloc_ack, "ALLOC ACK ") \
EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \
EM(rxrpc_txbuf_free, "FREE ") \
EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \
EM(rxrpc_txbuf_get_trans, "GET TRANS ") \
EM(rxrpc_txbuf_get_retrans, "GET RETRANS") \
- EM(rxrpc_txbuf_put_ack_tx, "PUT ACK TX ") \
EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \
EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \
EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \
EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \
EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \
+ EM(rxrpc_txbuf_see_lost, "SEE LOST ") \
EM(rxrpc_txbuf_see_out_of_step, "OUT-OF-STEP") \
- EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \
- E_(rxrpc_txbuf_see_unacked, "SEE UNACKED")
+ E_(rxrpc_txbuf_see_send_more, "SEE SEND+ ")
+
+#define rxrpc_tq_traces \
+ EM(rxrpc_tq_alloc, "ALLOC") \
+ EM(rxrpc_tq_cleaned, "CLEAN") \
+ EM(rxrpc_tq_decant, "DCNT ") \
+ EM(rxrpc_tq_decant_advance, "DCNT>") \
+ EM(rxrpc_tq_queue, "QUEUE") \
+ EM(rxrpc_tq_queue_dup, "QUE!!") \
+ EM(rxrpc_tq_rotate, "ROT ") \
+ EM(rxrpc_tq_rotate_and_free, "ROT-F") \
+ EM(rxrpc_tq_rotate_and_keep, "ROT-K") \
+ EM(rxrpc_tq_transmit, "XMIT ") \
+ E_(rxrpc_tq_transmit_advance, "XMIT>")
+
+#define rxrpc_pmtud_reduce_traces \
+ EM(rxrpc_pmtud_reduce_ack, "Ack ") \
+ EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \
+ E_(rxrpc_pmtud_reduce_route, "Route")
+
+#define rxrpc_rotate_traces \
+ EM(rxrpc_rotate_trace_hack, "hard-ack") \
+ EM(rxrpc_rotate_trace_sack, "soft-ack") \
+ E_(rxrpc_rotate_trace_snak, "soft-nack")
+
+#define rxrpc_rack_timer_modes \
+ EM(RXRPC_CALL_RACKTIMER_OFF, "---") \
+ EM(RXRPC_CALL_RACKTIMER_RACK_REORDER, "REO") \
+ EM(RXRPC_CALL_RACKTIMER_TLP_PTO, "TLP") \
+ E_(RXRPC_CALL_RACKTIMER_RTO, "RTO")
+
+#define rxrpc_tlp_probe_traces \
+ EM(rxrpc_tlp_probe_trace_busy, "busy") \
+ EM(rxrpc_tlp_probe_trace_transmit_new, "transmit-new") \
+ E_(rxrpc_tlp_probe_trace_retransmit, "retransmit")
+
+#define rxrpc_tlp_ack_traces \
+ EM(rxrpc_tlp_ack_trace_acked, "acked") \
+ EM(rxrpc_tlp_ack_trace_dup_acked, "dup-acked") \
+ EM(rxrpc_tlp_ack_trace_hard_beyond, "hard-beyond") \
+ EM(rxrpc_tlp_ack_trace_incomplete, "incomplete") \
+ E_(rxrpc_tlp_ack_trace_new_data, "new-data")
/*
* Generate enums for tracing information.
@@ -496,18 +546,24 @@ enum rxrpc_congest_change { rxrpc_congest_changes } __mode(byte);
enum rxrpc_conn_trace { rxrpc_conn_traces } __mode(byte);
enum rxrpc_local_trace { rxrpc_local_traces } __mode(byte);
enum rxrpc_peer_trace { rxrpc_peer_traces } __mode(byte);
+enum rxrpc_pmtud_reduce_trace { rxrpc_pmtud_reduce_traces } __mode(byte);
enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte);
enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte);
enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte);
enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte);
enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte);
+enum rxrpc_rotate_trace { rxrpc_rotate_traces } __mode(byte);
enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte);
enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte);
enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte);
enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte);
+enum rxrpc_tlp_ack_trace { rxrpc_tlp_ack_traces } __mode(byte);
+enum rxrpc_tlp_probe_trace { rxrpc_tlp_probe_traces } __mode(byte);
+enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte);
enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte);
enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte);
+enum rxrpc_txdata_trace { rxrpc_txdata_traces } __mode(byte);
enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte);
#endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */
@@ -525,24 +581,31 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte);
rxrpc_abort_reasons;
rxrpc_bundle_traces;
+rxrpc_ca_states;
rxrpc_call_poke_traces;
rxrpc_call_traces;
rxrpc_client_traces;
rxrpc_congest_changes;
-rxrpc_congest_modes;
rxrpc_conn_traces;
rxrpc_local_traces;
+rxrpc_pmtud_reduce_traces;
rxrpc_propose_ack_traces;
+rxrpc_rack_timer_modes;
rxrpc_receive_traces;
rxrpc_recvmsg_traces;
rxrpc_req_ack_traces;
+rxrpc_rotate_traces;
rxrpc_rtt_rx_traces;
rxrpc_rtt_tx_traces;
rxrpc_sack_traces;
rxrpc_skb_traces;
rxrpc_timer_traces;
+rxrpc_tlp_ack_traces;
+rxrpc_tlp_probe_traces;
+rxrpc_tq_traces;
rxrpc_tx_points;
rxrpc_txbuf_traces;
+rxrpc_txdata_traces;
rxrpc_txqueue_traces;
/*
@@ -581,6 +644,20 @@ TRACE_EVENT(rxrpc_local,
__entry->usage)
);
+TRACE_EVENT(rxrpc_iothread_rx,
+ TP_PROTO(struct rxrpc_local *local, unsigned int nr_rx),
+ TP_ARGS(local, nr_rx),
+ TP_STRUCT__entry(
+ __field(unsigned int, local)
+ __field(unsigned int, nr_rx)
+ ),
+ TP_fast_assign(
+ __entry->local = local->debug_id;
+ __entry->nr_rx = nr_rx;
+ ),
+ TP_printk("L=%08x nrx=%u", __entry->local, __entry->nr_rx)
+ );
+
TRACE_EVENT(rxrpc_peer,
TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why),
@@ -865,34 +942,101 @@ TRACE_EVENT(rxrpc_txqueue,
TP_STRUCT__entry(
__field(unsigned int, call)
__field(enum rxrpc_txqueue_trace, why)
- __field(rxrpc_seq_t, acks_hard_ack)
__field(rxrpc_seq_t, tx_bottom)
+ __field(rxrpc_seq_t, acks_hard_ack)
__field(rxrpc_seq_t, tx_top)
- __field(rxrpc_seq_t, tx_prepared)
+ __field(rxrpc_seq_t, send_top)
__field(int, tx_winsize)
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->why = why;
- __entry->acks_hard_ack = call->acks_hard_ack;
__entry->tx_bottom = call->tx_bottom;
+ __entry->acks_hard_ack = call->acks_hard_ack;
__entry->tx_top = call->tx_top;
- __entry->tx_prepared = call->tx_prepared;
+ __entry->send_top = call->send_top;
__entry->tx_winsize = call->tx_winsize;
),
- TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u/%u",
+ TP_printk("c=%08x %s b=%08x h=%08x n=%u/%u/%u/%u",
__entry->call,
__print_symbolic(__entry->why, rxrpc_txqueue_traces),
__entry->tx_bottom,
__entry->acks_hard_ack,
- __entry->tx_top - __entry->tx_bottom,
+ __entry->acks_hard_ack - __entry->tx_bottom,
__entry->tx_top - __entry->acks_hard_ack,
- __entry->tx_prepared - __entry->tx_bottom,
+ __entry->send_top - __entry->tx_top,
__entry->tx_winsize)
);
+TRACE_EVENT(rxrpc_transmit,
+ TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t send_top, int space),
+
+ TP_ARGS(call, send_top, space),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, seq)
+ __field(u16, space)
+ __field(u16, tx_winsize)
+ __field(u16, cong_cwnd)
+ __field(u16, cong_extra)
+ __field(u16, in_flight)
+ __field(u16, prepared)
+ __field(u16, pmtud_jumbo)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->seq = call->tx_top + 1;
+ __entry->space = space;
+ __entry->tx_winsize = call->tx_winsize;
+ __entry->cong_cwnd = call->cong_cwnd;
+ __entry->cong_extra = call->cong_extra;
+ __entry->prepared = send_top - call->tx_bottom;
+ __entry->in_flight = call->tx_top - call->tx_bottom;
+ __entry->pmtud_jumbo = call->peer->pmtud_jumbo;
+ ),
+
+ TP_printk("c=%08x q=%08x sp=%u tw=%u cw=%u+%u pr=%u if=%u pj=%u",
+ __entry->call,
+ __entry->seq,
+ __entry->space,
+ __entry->tx_winsize,
+ __entry->cong_cwnd,
+ __entry->cong_extra,
+ __entry->prepared,
+ __entry->in_flight,
+ __entry->pmtud_jumbo)
+ );
+
+TRACE_EVENT(rxrpc_tx_rotate,
+ TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_seq_t to),
+
+ TP_ARGS(call, seq, to),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, seq)
+ __field(rxrpc_seq_t, to)
+ __field(rxrpc_seq_t, top)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->seq = seq;
+ __entry->to = to;
+ __entry->top = call->tx_top;
+ ),
+
+ TP_printk("c=%08x q=%08x-%08x-%08x",
+ __entry->call,
+ __entry->seq,
+ __entry->to,
+ __entry->top)
+ );
+
TRACE_EVENT(rxrpc_rx_data,
TP_PROTO(unsigned int call, rxrpc_seq_t seq,
rxrpc_serial_t serial, u8 flags),
@@ -921,11 +1065,9 @@ TRACE_EVENT(rxrpc_rx_data,
);
TRACE_EVENT(rxrpc_rx_ack,
- TP_PROTO(struct rxrpc_call *call,
- rxrpc_serial_t serial, rxrpc_serial_t ack_serial,
- rxrpc_seq_t first, rxrpc_seq_t prev, u8 reason, u8 n_acks),
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_skb_priv *sp),
- TP_ARGS(call, serial, ack_serial, first, prev, reason, n_acks),
+ TP_ARGS(call, sp),
TP_STRUCT__entry(
__field(unsigned int, call)
@@ -935,23 +1077,26 @@ TRACE_EVENT(rxrpc_rx_ack,
__field(rxrpc_seq_t, prev)
__field(u8, reason)
__field(u8, n_acks)
+ __field(u8, user_status)
),
TP_fast_assign(
- __entry->call = call->debug_id;
- __entry->serial = serial;
- __entry->ack_serial = ack_serial;
- __entry->first = first;
- __entry->prev = prev;
- __entry->reason = reason;
- __entry->n_acks = n_acks;
+ __entry->call = call->debug_id;
+ __entry->serial = sp->hdr.serial;
+ __entry->user_status = sp->hdr.userStatus;
+ __entry->ack_serial = sp->ack.acked_serial;
+ __entry->first = sp->ack.first_ack;
+ __entry->prev = sp->ack.prev_ack;
+ __entry->reason = sp->ack.reason;
+ __entry->n_acks = sp->ack.nr_acks;
),
- TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u",
+ TP_printk("c=%08x %08x %s r=%08x us=%02x f=%08x p=%08x n=%u",
__entry->call,
__entry->serial,
__print_symbolic(__entry->reason, rxrpc_ack_names),
__entry->ack_serial,
+ __entry->user_status,
__entry->first,
__entry->prev,
__entry->n_acks)
@@ -981,6 +1126,29 @@ TRACE_EVENT(rxrpc_rx_abort,
__entry->abort_code)
);
+TRACE_EVENT(rxrpc_rx_conn_abort,
+ TP_PROTO(const struct rxrpc_connection *conn, const struct sk_buff *skb),
+
+ TP_ARGS(conn, skb),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, conn)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, abort_code)
+ ),
+
+ TP_fast_assign(
+ __entry->conn = conn->debug_id;
+ __entry->serial = rxrpc_skb(skb)->hdr.serial;
+ __entry->abort_code = skb->priority;
+ ),
+
+ TP_printk("C=%08x ABORT %08x ac=%d",
+ __entry->conn,
+ __entry->serial,
+ __entry->abort_code)
+ );
+
TRACE_EVENT(rxrpc_rx_challenge,
TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
u32 version, u32 nonce, u32 min_level),
@@ -1102,9 +1270,10 @@ TRACE_EVENT(rxrpc_tx_packet,
TRACE_EVENT(rxrpc_tx_data,
TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
- rxrpc_serial_t serial, unsigned int flags, bool lose),
+ rxrpc_serial_t serial, unsigned int flags,
+ enum rxrpc_txdata_trace trace),
- TP_ARGS(call, seq, serial, flags, lose),
+ TP_ARGS(call, seq, serial, flags, trace),
TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1113,7 +1282,7 @@ TRACE_EVENT(rxrpc_tx_data,
__field(u32, cid)
__field(u32, call_id)
__field(u16, flags)
- __field(bool, lose)
+ __field(enum rxrpc_txdata_trace, trace)
),
TP_fast_assign(
@@ -1123,26 +1292,26 @@ TRACE_EVENT(rxrpc_tx_data,
__entry->seq = seq;
__entry->serial = serial;
__entry->flags = flags;
- __entry->lose = lose;
+ __entry->trace = trace;
),
- TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s%s",
+ TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s",
__entry->call,
__entry->cid,
__entry->call_id,
__entry->serial,
__entry->seq,
__entry->flags & RXRPC_TXBUF_WIRE_FLAGS,
- __entry->flags & RXRPC_TXBUF_RESENT ? " *RETRANS*" : "",
- __entry->lose ? " *LOSE*" : "")
+ __print_symbolic(__entry->trace, rxrpc_txdata_traces))
);
TRACE_EVENT(rxrpc_tx_ack,
TP_PROTO(unsigned int call, rxrpc_serial_t serial,
rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial,
- u8 reason, u8 n_acks, u16 rwind),
+ u8 reason, u8 n_acks, u16 rwind,
+ enum rxrpc_propose_ack_trace trace),
- TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind),
+ TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind, trace),
TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1152,6 +1321,7 @@ TRACE_EVENT(rxrpc_tx_ack,
__field(u8, reason)
__field(u8, n_acks)
__field(u16, rwind)
+ __field(enum rxrpc_propose_ack_trace, trace)
),
TP_fast_assign(
@@ -1162,16 +1332,18 @@ TRACE_EVENT(rxrpc_tx_ack,
__entry->reason = reason;
__entry->n_acks = n_acks;
__entry->rwind = rwind;
+ __entry->trace = trace;
),
- TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u",
+ TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u %s",
__entry->call,
__entry->serial,
__print_symbolic(__entry->reason, rxrpc_ack_names),
__entry->ack_first,
__entry->ack_serial,
__entry->n_acks,
- __entry->rwind)
+ __entry->rwind,
+ __print_symbolic(__entry->trace, rxrpc_propose_ack_traces))
);
TRACE_EVENT(rxrpc_receive,
@@ -1296,9 +1468,9 @@ TRACE_EVENT(rxrpc_rtt_rx,
TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
int slot,
rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
- u32 rtt, u32 rto),
+ u32 rtt, u32 srtt, u32 rto),
- TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, rto),
+ TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, srtt, rto),
TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1307,7 +1479,9 @@ TRACE_EVENT(rxrpc_rtt_rx,
__field(rxrpc_serial_t, send_serial)
__field(rxrpc_serial_t, resp_serial)
__field(u32, rtt)
+ __field(u32, srtt)
__field(u32, rto)
+ __field(u32, min_rtt)
),
TP_fast_assign(
@@ -1317,17 +1491,21 @@ TRACE_EVENT(rxrpc_rtt_rx,
__entry->send_serial = send_serial;
__entry->resp_serial = resp_serial;
__entry->rtt = rtt;
+ __entry->srtt = srtt;
__entry->rto = rto;
+ __entry->min_rtt = minmax_get(&call->min_rtt)
),
- TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u rto=%u",
+ TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u srtt=%u rto=%u min=%u",
__entry->call,
__entry->slot,
__print_symbolic(__entry->why, rxrpc_rtt_rx_traces),
__entry->send_serial,
__entry->resp_serial,
__entry->rtt,
- __entry->rto)
+ __entry->srtt / 8,
+ __entry->rto,
+ __entry->min_rtt)
);
TRACE_EVENT(rxrpc_timer_set,
@@ -1544,112 +1722,125 @@ TRACE_EVENT(rxrpc_drop_ack,
);
TRACE_EVENT(rxrpc_retransmit,
- TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
- rxrpc_serial_t serial, ktime_t expiry),
+ TP_PROTO(struct rxrpc_call *call,
+ struct rxrpc_send_data_req *req,
+ struct rxrpc_txbuf *txb),
- TP_ARGS(call, seq, serial, expiry),
+ TP_ARGS(call, req, txb),
TP_STRUCT__entry(
__field(unsigned int, call)
+ __field(unsigned int, qbase)
__field(rxrpc_seq_t, seq)
__field(rxrpc_serial_t, serial)
- __field(ktime_t, expiry)
),
TP_fast_assign(
__entry->call = call->debug_id;
- __entry->seq = seq;
- __entry->serial = serial;
- __entry->expiry = expiry;
+ __entry->qbase = req->tq->qbase;
+ __entry->seq = req->seq;
+ __entry->serial = txb->serial;
),
- TP_printk("c=%08x q=%x r=%x xp=%lld",
+ TP_printk("c=%08x tq=%x q=%x r=%x",
__entry->call,
+ __entry->qbase,
__entry->seq,
- __entry->serial,
- ktime_to_us(__entry->expiry))
+ __entry->serial)
);
TRACE_EVENT(rxrpc_congest,
- TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
- rxrpc_serial_t ack_serial, enum rxrpc_congest_change change),
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary),
- TP_ARGS(call, summary, ack_serial, change),
+ TP_ARGS(call, summary),
TP_STRUCT__entry(
__field(unsigned int, call)
- __field(enum rxrpc_congest_change, change)
+ __field(enum rxrpc_ca_state, ca_state)
__field(rxrpc_seq_t, hard_ack)
__field(rxrpc_seq_t, top)
__field(rxrpc_seq_t, lowest_nak)
- __field(rxrpc_serial_t, ack_serial)
+ __field(u16, nr_sacks)
+ __field(u16, nr_snacks)
+ __field(u16, cwnd)
+ __field(u16, ssthresh)
+ __field(u16, cumul_acks)
+ __field(u16, dup_acks)
__field_struct(struct rxrpc_ack_summary, sum)
),
TP_fast_assign(
__entry->call = call->debug_id;
- __entry->change = change;
+ __entry->ca_state = call->cong_ca_state;
__entry->hard_ack = call->acks_hard_ack;
__entry->top = call->tx_top;
__entry->lowest_nak = call->acks_lowest_nak;
- __entry->ack_serial = ack_serial;
+ __entry->nr_sacks = call->acks_nr_sacks;
+ __entry->nr_snacks = call->acks_nr_snacks;
+ __entry->cwnd = call->cong_cwnd;
+ __entry->ssthresh = call->cong_ssthresh;
+ __entry->cumul_acks = call->cong_cumul_acks;
+ __entry->dup_acks = call->cong_dup_acks;
memcpy(&__entry->sum, summary, sizeof(__entry->sum));
),
- TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s",
+ TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
__entry->call,
- __entry->ack_serial,
+ __entry->sum.acked_serial,
__print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
__entry->hard_ack,
- __print_symbolic(__entry->sum.mode, rxrpc_congest_modes),
- __entry->sum.cwnd,
- __entry->sum.ssthresh,
- __entry->sum.nr_acks, __entry->sum.nr_retained_nacks,
- __entry->sum.nr_new_acks,
- __entry->sum.nr_new_nacks,
+ __print_symbolic(__entry->ca_state, rxrpc_ca_states),
+ __entry->cwnd,
+ __entry->ssthresh,
+ __entry->nr_sacks, __entry->sum.nr_new_sacks,
+ __entry->nr_snacks, __entry->sum.nr_new_snacks,
+ __entry->sum.nr_new_hacks,
__entry->top - __entry->hard_ack,
- __entry->sum.cumulative_acks,
- __entry->sum.dup_acks,
- __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "",
- __print_symbolic(__entry->change, rxrpc_congest_changes),
+ __entry->cumul_acks,
+ __entry->dup_acks,
+ __entry->lowest_nak, __entry->sum.new_low_snack ? "!" : "",
+ __print_symbolic(__entry->sum.change, rxrpc_congest_changes),
__entry->sum.retrans_timeo ? " rTxTo" : "")
);
TRACE_EVENT(rxrpc_reset_cwnd,
- TP_PROTO(struct rxrpc_call *call, ktime_t now),
+ TP_PROTO(struct rxrpc_call *call, ktime_t since_last_tx, ktime_t rtt),
- TP_ARGS(call, now),
+ TP_ARGS(call, since_last_tx, rtt),
TP_STRUCT__entry(
__field(unsigned int, call)
- __field(enum rxrpc_congest_mode, mode)
+ __field(enum rxrpc_ca_state, ca_state)
__field(unsigned short, cwnd)
__field(unsigned short, extra)
__field(rxrpc_seq_t, hard_ack)
__field(rxrpc_seq_t, prepared)
__field(ktime_t, since_last_tx)
+ __field(ktime_t, rtt)
__field(bool, has_data)
),
TP_fast_assign(
__entry->call = call->debug_id;
- __entry->mode = call->cong_mode;
+ __entry->ca_state = call->cong_ca_state;
__entry->cwnd = call->cong_cwnd;
__entry->extra = call->cong_extra;
__entry->hard_ack = call->acks_hard_ack;
- __entry->prepared = call->tx_prepared - call->tx_bottom;
- __entry->since_last_tx = ktime_sub(now, call->tx_last_sent);
- __entry->has_data = !list_empty(&call->tx_sendmsg);
+ __entry->prepared = call->send_top - call->tx_bottom;
+ __entry->since_last_tx = since_last_tx;
+ __entry->rtt = rtt;
+ __entry->has_data = call->tx_bottom != call->tx_top;
),
- TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u",
+ TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu/%llu d=%u",
__entry->call,
__entry->hard_ack,
- __print_symbolic(__entry->mode, rxrpc_congest_modes),
+ __print_symbolic(__entry->ca_state, rxrpc_ca_states),
__entry->cwnd,
__entry->extra,
__entry->prepared,
- ktime_to_ns(__entry->since_last_tx),
+ ktime_to_us(__entry->since_last_tx),
+ ktime_to_us(__entry->rtt),
__entry->has_data)
);
@@ -1722,10 +1913,36 @@ TRACE_EVENT(rxrpc_connect_call,
&__entry->srx.transport)
);
+TRACE_EVENT(rxrpc_apply_acks,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq),
+
+ TP_ARGS(call, tq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(unsigned int, nr_rep)
+ __field(rxrpc_seq_t, qbase)
+ __field(unsigned long, acks)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->acks = tq->segment_acked;
+ __entry->nr_rep = tq->nr_reported_acks;
+ ),
+
+ TP_printk("c=%08x tq=%x acks=%016lx rep=%u",
+ __entry->call,
+ __entry->qbase,
+ __entry->acks,
+ __entry->nr_rep)
+ );
+
TRACE_EVENT(rxrpc_resend,
- TP_PROTO(struct rxrpc_call *call, struct sk_buff *ack),
+ TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t ack_serial),
- TP_ARGS(call, ack),
+ TP_ARGS(call, ack_serial),
TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1735,11 +1952,10 @@ TRACE_EVENT(rxrpc_resend,
),
TP_fast_assign(
- struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL;
__entry->call = call->debug_id;
__entry->seq = call->acks_hard_ack;
__entry->transmitted = call->tx_transmitted;
- __entry->ack_serial = sp ? sp->hdr.serial : 0;
+ __entry->ack_serial = ack_serial;
),
TP_printk("c=%08x r=%x q=%x tq=%x",
@@ -1749,6 +1965,63 @@ TRACE_EVENT(rxrpc_resend,
__entry->transmitted)
);
+TRACE_EVENT(rxrpc_resend_lost,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, unsigned long lost),
+
+ TP_ARGS(call, tq, lost),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, qbase)
+ __field(u8, nr_rep)
+ __field(unsigned long, lost)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->nr_rep = tq->nr_reported_acks;
+ __entry->lost = lost;
+ ),
+
+ TP_printk("c=%08x tq=%x lost=%016lx nr=%u",
+ __entry->call,
+ __entry->qbase,
+ __entry->lost,
+ __entry->nr_rep)
+ );
+
+TRACE_EVENT(rxrpc_rotate,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq,
+ struct rxrpc_ack_summary *summary, rxrpc_seq_t seq,
+ enum rxrpc_rotate_trace trace),
+
+ TP_ARGS(call, tq, summary, seq, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, qbase)
+ __field(rxrpc_seq_t, seq)
+ __field(unsigned int, nr_rep)
+ __field(enum rxrpc_rotate_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->seq = seq;
+ __entry->nr_rep = tq->nr_reported_acks;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("c=%08x tq=%x q=%x nr=%x %s",
+ __entry->call,
+ __entry->qbase,
+ __entry->seq,
+ __entry->nr_rep,
+ __print_symbolic(__entry->trace, rxrpc_rotate_traces))
+ );
+
TRACE_EVENT(rxrpc_rx_icmp,
TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee,
struct sockaddr_rxrpc *srx),
@@ -1858,38 +2131,36 @@ TRACE_EVENT(rxrpc_notify_socket,
);
TRACE_EVENT(rxrpc_rx_discard_ack,
- TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial,
- rxrpc_seq_t first_soft_ack, rxrpc_seq_t call_ackr_first,
- rxrpc_seq_t prev_pkt, rxrpc_seq_t call_ackr_prev),
+ TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
+ rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt),
- TP_ARGS(debug_id, serial, first_soft_ack, call_ackr_first,
- prev_pkt, call_ackr_prev),
+ TP_ARGS(call, serial, hard_ack, prev_pkt),
TP_STRUCT__entry(
__field(unsigned int, debug_id)
__field(rxrpc_serial_t, serial)
- __field(rxrpc_seq_t, first_soft_ack)
- __field(rxrpc_seq_t, call_ackr_first)
+ __field(rxrpc_seq_t, hard_ack)
__field(rxrpc_seq_t, prev_pkt)
- __field(rxrpc_seq_t, call_ackr_prev)
+ __field(rxrpc_seq_t, acks_hard_ack)
+ __field(rxrpc_seq_t, acks_prev_seq)
),
TP_fast_assign(
- __entry->debug_id = debug_id;
+ __entry->debug_id = call->debug_id;
__entry->serial = serial;
- __entry->first_soft_ack = first_soft_ack;
- __entry->call_ackr_first = call_ackr_first;
+ __entry->hard_ack = hard_ack;
__entry->prev_pkt = prev_pkt;
- __entry->call_ackr_prev = call_ackr_prev;
+ __entry->acks_hard_ack = call->acks_hard_ack;
+ __entry->acks_prev_seq = call->acks_prev_seq;
),
TP_printk("c=%08x r=%08x %08x<%08x %08x<%08x",
__entry->debug_id,
__entry->serial,
- __entry->first_soft_ack,
- __entry->call_ackr_first,
+ __entry->hard_ack,
+ __entry->acks_hard_ack,
__entry->prev_pkt,
- __entry->call_ackr_prev)
+ __entry->acks_prev_seq)
);
TRACE_EVENT(rxrpc_req_ack,
@@ -1947,6 +2218,33 @@ TRACE_EVENT(rxrpc_txbuf,
__entry->ref)
);
+TRACE_EVENT(rxrpc_tq,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq,
+ rxrpc_seq_t seq, enum rxrpc_tq_trace trace),
+
+ TP_ARGS(call, tq, seq, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_seq_t, qbase)
+ __field(rxrpc_seq_t, seq)
+ __field(enum rxrpc_tq_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->call_debug_id = call->debug_id;
+ __entry->qbase = tq ? tq->qbase : call->tx_qbase;
+ __entry->seq = seq;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("c=%08x bq=%08x q=%08x %s",
+ __entry->call_debug_id,
+ __entry->qbase,
+ __entry->seq,
+ __print_symbolic(__entry->trace, rxrpc_tq_traces))
+ );
+
TRACE_EVENT(rxrpc_poke_call,
TP_PROTO(struct rxrpc_call *call, bool busy,
enum rxrpc_call_poke_trace what),
@@ -2015,6 +2313,360 @@ TRACE_EVENT(rxrpc_sack,
__entry->sack)
);
+TRACE_EVENT(rxrpc_pmtud_tx,
+ TP_PROTO(struct rxrpc_call *call),
+
+ TP_ARGS(call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_serial_t, ping_serial)
+ __field(unsigned short, pmtud_trial)
+ __field(unsigned short, pmtud_good)
+ __field(unsigned short, pmtud_bad)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = call->peer->debug_id;
+ __entry->call_debug_id = call->debug_id;
+ __entry->ping_serial = call->conn->pmtud_probe;
+ __entry->pmtud_trial = call->peer->pmtud_trial;
+ __entry->pmtud_good = call->peer->pmtud_good;
+ __entry->pmtud_bad = call->peer->pmtud_bad;
+ ),
+
+ TP_printk("P=%08x c=%08x pr=%08x %u-%u-%u",
+ __entry->peer_debug_id,
+ __entry->call_debug_id,
+ __entry->ping_serial,
+ __entry->pmtud_good,
+ __entry->pmtud_trial,
+ __entry->pmtud_bad)
+ );
+
+TRACE_EVENT(rxrpc_pmtud_rx,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial),
+
+ TP_ARGS(conn, resp_serial),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_serial_t, ping_serial)
+ __field(rxrpc_serial_t, resp_serial)
+ __field(unsigned short, max_data)
+ __field(u8, jumbo_max)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = conn->peer->debug_id;
+ __entry->call_debug_id = conn->pmtud_call;
+ __entry->ping_serial = conn->pmtud_probe;
+ __entry->resp_serial = resp_serial;
+ __entry->max_data = conn->peer->max_data;
+ __entry->jumbo_max = conn->peer->pmtud_jumbo;
+ ),
+
+ TP_printk("P=%08x c=%08x pr=%08x rr=%08x max=%u jm=%u",
+ __entry->peer_debug_id,
+ __entry->call_debug_id,
+ __entry->ping_serial,
+ __entry->resp_serial,
+ __entry->max_data,
+ __entry->jumbo_max)
+ );
+
+TRACE_EVENT(rxrpc_pmtud_lost,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial),
+
+ TP_ARGS(conn, resp_serial),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_serial_t, ping_serial)
+ __field(rxrpc_serial_t, resp_serial)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = conn->peer->debug_id;
+ __entry->call_debug_id = conn->pmtud_call;
+ __entry->ping_serial = conn->pmtud_probe;
+ __entry->resp_serial = resp_serial;
+ ),
+
+ TP_printk("P=%08x c=%08x pr=%08x rr=%08x",
+ __entry->peer_debug_id,
+ __entry->call_debug_id,
+ __entry->ping_serial,
+ __entry->resp_serial)
+ );
+
+TRACE_EVENT(rxrpc_pmtud_reduce,
+ TP_PROTO(struct rxrpc_peer *peer, rxrpc_serial_t serial,
+ unsigned int max_data, enum rxrpc_pmtud_reduce_trace reason),
+
+ TP_ARGS(peer, serial, max_data, reason),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(rxrpc_serial_t, serial)
+ __field(unsigned int, max_data)
+ __field(enum rxrpc_pmtud_reduce_trace, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = peer->debug_id;
+ __entry->serial = serial;
+ __entry->max_data = max_data;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("P=%08x %s r=%08x m=%u",
+ __entry->peer_debug_id,
+ __print_symbolic(__entry->reason, rxrpc_pmtud_reduce_traces),
+ __entry->serial, __entry->max_data)
+ );
+
+TRACE_EVENT(rxrpc_rack,
+ TP_PROTO(struct rxrpc_call *call, ktime_t timo),
+
+ TP_ARGS(call, timo),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, ack_serial)
+ __field(rxrpc_seq_t, seq)
+ __field(enum rxrpc_rack_timer_mode, mode)
+ __field(unsigned short, nr_sent)
+ __field(unsigned short, nr_lost)
+ __field(unsigned short, nr_resent)
+ __field(unsigned short, nr_sacked)
+ __field(ktime_t, timo)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->ack_serial = call->rx_serial;
+ __entry->seq = call->rack_end_seq;
+ __entry->mode = call->rack_timer_mode;
+ __entry->nr_sent = call->tx_nr_sent;
+ __entry->nr_lost = call->tx_nr_lost;
+ __entry->nr_resent = call->tx_nr_resent;
+ __entry->nr_sacked = call->acks_nr_sacks;
+ __entry->timo = timo;
+ ),
+
+ TP_printk("c=%08x r=%08x q=%08x %s slrs=%u,%u,%u,%u t=%lld",
+ __entry->call, __entry->ack_serial, __entry->seq,
+ __print_symbolic(__entry->mode, rxrpc_rack_timer_modes),
+ __entry->nr_sent, __entry->nr_lost,
+ __entry->nr_resent, __entry->nr_sacked,
+ ktime_to_us(__entry->timo))
+ );
+
+TRACE_EVENT(rxrpc_rack_update,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary),
+
+ TP_ARGS(call, summary),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, ack_serial)
+ __field(rxrpc_seq_t, seq)
+ __field(int, xmit_ts)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->ack_serial = call->rx_serial;
+ __entry->seq = call->rack_end_seq;
+ __entry->xmit_ts = ktime_sub(call->acks_latest_ts, call->rack_xmit_ts);
+ ),
+
+ TP_printk("c=%08x r=%08x q=%08x xt=%lld",
+ __entry->call, __entry->ack_serial, __entry->seq,
+ ktime_to_us(__entry->xmit_ts))
+ );
+
+TRACE_EVENT(rxrpc_rack_scan_loss,
+ TP_PROTO(struct rxrpc_call *call),
+
+ TP_ARGS(call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(ktime_t, rack_rtt)
+ __field(ktime_t, rack_reo_wnd)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->rack_rtt = call->rack_rtt;
+ __entry->rack_reo_wnd = call->rack_reo_wnd;
+ ),
+
+ TP_printk("c=%08x rtt=%lld reow=%lld",
+ __entry->call, ktime_to_us(__entry->rack_rtt),
+ ktime_to_us(__entry->rack_reo_wnd))
+ );
+
+TRACE_EVENT(rxrpc_rack_scan_loss_tq,
+ TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq,
+ unsigned long nacks),
+
+ TP_ARGS(call, tq, nacks),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, qbase)
+ __field(unsigned long, nacks)
+ __field(unsigned long, lost)
+ __field(unsigned long, retrans)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->nacks = nacks;
+ __entry->lost = tq->segment_lost;
+ __entry->retrans = tq->segment_retransmitted;
+ ),
+
+ TP_printk("c=%08x q=%08x n=%lx l=%lx r=%lx",
+ __entry->call, __entry->qbase,
+ __entry->nacks, __entry->lost, __entry->retrans)
+ );
+
+TRACE_EVENT(rxrpc_rack_detect_loss,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
+ rxrpc_seq_t seq),
+
+ TP_ARGS(call, summary, seq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, ack_serial)
+ __field(rxrpc_seq_t, seq)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->ack_serial = call->rx_serial;
+ __entry->seq = seq;
+ ),
+
+ TP_printk("c=%08x r=%08x q=%08x",
+ __entry->call, __entry->ack_serial, __entry->seq)
+ );
+
+TRACE_EVENT(rxrpc_rack_mark_loss_tq,
+ TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq),
+
+ TP_ARGS(call, tq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, qbase)
+ __field(rxrpc_seq_t, trans)
+ __field(unsigned long, acked)
+ __field(unsigned long, lost)
+ __field(unsigned long, retrans)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->trans = call->tx_transmitted;
+ __entry->acked = tq->segment_acked;
+ __entry->lost = tq->segment_lost;
+ __entry->retrans = tq->segment_retransmitted;
+ ),
+
+ TP_printk("c=%08x tq=%08x txq=%08x a=%lx l=%lx r=%lx",
+ __entry->call, __entry->qbase, __entry->trans,
+ __entry->acked, __entry->lost, __entry->retrans)
+ );
+
+TRACE_EVENT(rxrpc_tlp_probe,
+ TP_PROTO(struct rxrpc_call *call, enum rxrpc_tlp_probe_trace trace),
+
+ TP_ARGS(call, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, serial)
+ __field(rxrpc_seq_t, seq)
+ __field(enum rxrpc_tlp_probe_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->serial = call->tlp_serial;
+ __entry->seq = call->tlp_seq;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("c=%08x r=%08x pq=%08x %s",
+ __entry->call, __entry->serial, __entry->seq,
+ __print_symbolic(__entry->trace, rxrpc_tlp_probe_traces))
+ );
+
+TRACE_EVENT(rxrpc_tlp_ack,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
+ enum rxrpc_tlp_ack_trace trace),
+
+ TP_ARGS(call, summary, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, serial)
+ __field(rxrpc_seq_t, tlp_seq)
+ __field(rxrpc_seq_t, hard_ack)
+ __field(enum rxrpc_tlp_ack_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->serial = call->tlp_serial;
+ __entry->tlp_seq = call->tlp_seq;
+ __entry->hard_ack = call->acks_hard_ack;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("c=%08x r=%08x pq=%08x hq=%08x %s",
+ __entry->call, __entry->serial,
+ __entry->tlp_seq, __entry->hard_ack,
+ __print_symbolic(__entry->trace, rxrpc_tlp_ack_traces))
+ );
+
+TRACE_EVENT(rxrpc_rack_timer,
+ TP_PROTO(struct rxrpc_call *call, ktime_t delay, bool exp),
+
+ TP_ARGS(call, delay, exp),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(bool, exp)
+ __field(enum rxrpc_rack_timer_mode, mode)
+ __field(ktime_t, delay)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->exp = exp;
+ __entry->mode = call->rack_timer_mode;
+ __entry->delay = delay;
+ ),
+
+ TP_printk("c=%08x %s %s to=%lld",
+ __entry->call,
+ __entry->exp ? "Exp" : "Set",
+ __print_symbolic(__entry->mode, rxrpc_rack_timer_modes),
+ ktime_to_us(__entry->delay))
+ );
+
#undef EM
#undef E_
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 283305f6b063..9c909ce733a5 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -10,545 +10,12 @@
#define _UAPI_LINUX_ETHTOOL_NETLINK_H_
#include <linux/ethtool.h>
-
-/* message types - userspace to kernel */
-enum {
- ETHTOOL_MSG_USER_NONE,
- ETHTOOL_MSG_STRSET_GET,
- ETHTOOL_MSG_LINKINFO_GET,
- ETHTOOL_MSG_LINKINFO_SET,
- ETHTOOL_MSG_LINKMODES_GET,
- ETHTOOL_MSG_LINKMODES_SET,
- ETHTOOL_MSG_LINKSTATE_GET,
- ETHTOOL_MSG_DEBUG_GET,
- ETHTOOL_MSG_DEBUG_SET,
- ETHTOOL_MSG_WOL_GET,
- ETHTOOL_MSG_WOL_SET,
- ETHTOOL_MSG_FEATURES_GET,
- ETHTOOL_MSG_FEATURES_SET,
- ETHTOOL_MSG_PRIVFLAGS_GET,
- ETHTOOL_MSG_PRIVFLAGS_SET,
- ETHTOOL_MSG_RINGS_GET,
- ETHTOOL_MSG_RINGS_SET,
- ETHTOOL_MSG_CHANNELS_GET,
- ETHTOOL_MSG_CHANNELS_SET,
- ETHTOOL_MSG_COALESCE_GET,
- ETHTOOL_MSG_COALESCE_SET,
- ETHTOOL_MSG_PAUSE_GET,
- ETHTOOL_MSG_PAUSE_SET,
- ETHTOOL_MSG_EEE_GET,
- ETHTOOL_MSG_EEE_SET,
- ETHTOOL_MSG_TSINFO_GET,
- ETHTOOL_MSG_CABLE_TEST_ACT,
- ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
- ETHTOOL_MSG_TUNNEL_INFO_GET,
- ETHTOOL_MSG_FEC_GET,
- ETHTOOL_MSG_FEC_SET,
- ETHTOOL_MSG_MODULE_EEPROM_GET,
- ETHTOOL_MSG_STATS_GET,
- ETHTOOL_MSG_PHC_VCLOCKS_GET,
- ETHTOOL_MSG_MODULE_GET,
- ETHTOOL_MSG_MODULE_SET,
- ETHTOOL_MSG_PSE_GET,
- ETHTOOL_MSG_PSE_SET,
- ETHTOOL_MSG_RSS_GET,
- ETHTOOL_MSG_PLCA_GET_CFG,
- ETHTOOL_MSG_PLCA_SET_CFG,
- ETHTOOL_MSG_PLCA_GET_STATUS,
- ETHTOOL_MSG_MM_GET,
- ETHTOOL_MSG_MM_SET,
- ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
- ETHTOOL_MSG_PHY_GET,
-
- /* add new constants above here */
- __ETHTOOL_MSG_USER_CNT,
- ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
-};
-
-/* message types - kernel to userspace */
-enum {
- ETHTOOL_MSG_KERNEL_NONE,
- ETHTOOL_MSG_STRSET_GET_REPLY,
- ETHTOOL_MSG_LINKINFO_GET_REPLY,
- ETHTOOL_MSG_LINKINFO_NTF,
- ETHTOOL_MSG_LINKMODES_GET_REPLY,
- ETHTOOL_MSG_LINKMODES_NTF,
- ETHTOOL_MSG_LINKSTATE_GET_REPLY,
- ETHTOOL_MSG_DEBUG_GET_REPLY,
- ETHTOOL_MSG_DEBUG_NTF,
- ETHTOOL_MSG_WOL_GET_REPLY,
- ETHTOOL_MSG_WOL_NTF,
- ETHTOOL_MSG_FEATURES_GET_REPLY,
- ETHTOOL_MSG_FEATURES_SET_REPLY,
- ETHTOOL_MSG_FEATURES_NTF,
- ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
- ETHTOOL_MSG_PRIVFLAGS_NTF,
- ETHTOOL_MSG_RINGS_GET_REPLY,
- ETHTOOL_MSG_RINGS_NTF,
- ETHTOOL_MSG_CHANNELS_GET_REPLY,
- ETHTOOL_MSG_CHANNELS_NTF,
- ETHTOOL_MSG_COALESCE_GET_REPLY,
- ETHTOOL_MSG_COALESCE_NTF,
- ETHTOOL_MSG_PAUSE_GET_REPLY,
- ETHTOOL_MSG_PAUSE_NTF,
- ETHTOOL_MSG_EEE_GET_REPLY,
- ETHTOOL_MSG_EEE_NTF,
- ETHTOOL_MSG_TSINFO_GET_REPLY,
- ETHTOOL_MSG_CABLE_TEST_NTF,
- ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
- ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
- ETHTOOL_MSG_FEC_GET_REPLY,
- ETHTOOL_MSG_FEC_NTF,
- ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
- ETHTOOL_MSG_STATS_GET_REPLY,
- ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
- ETHTOOL_MSG_MODULE_GET_REPLY,
- ETHTOOL_MSG_MODULE_NTF,
- ETHTOOL_MSG_PSE_GET_REPLY,
- ETHTOOL_MSG_RSS_GET_REPLY,
- ETHTOOL_MSG_PLCA_GET_CFG_REPLY,
- ETHTOOL_MSG_PLCA_GET_STATUS_REPLY,
- ETHTOOL_MSG_PLCA_NTF,
- ETHTOOL_MSG_MM_GET_REPLY,
- ETHTOOL_MSG_MM_NTF,
- ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
- ETHTOOL_MSG_PHY_GET_REPLY,
- ETHTOOL_MSG_PHY_NTF,
-
- /* add new constants above here */
- __ETHTOOL_MSG_KERNEL_CNT,
- ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
-};
-
-/* request header */
-
-enum ethtool_header_flags {
- ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */
- ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */
- ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */
-};
+#include <linux/ethtool_netlink_generated.h>
#define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \
ETHTOOL_FLAG_OMIT_REPLY | \
ETHTOOL_FLAG_STATS)
-enum {
- ETHTOOL_A_HEADER_UNSPEC,
- ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */
- ETHTOOL_A_HEADER_DEV_NAME, /* string */
- ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */
- ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_HEADER_CNT,
- ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
-};
-
-/* bit sets */
-
-enum {
- ETHTOOL_A_BITSET_BIT_UNSPEC,
- ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */
- ETHTOOL_A_BITSET_BIT_NAME, /* string */
- ETHTOOL_A_BITSET_BIT_VALUE, /* flag */
-
- /* add new constants above here */
- __ETHTOOL_A_BITSET_BIT_CNT,
- ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1
-};
-
-enum {
- ETHTOOL_A_BITSET_BITS_UNSPEC,
- ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */
-
- /* add new constants above here */
- __ETHTOOL_A_BITSET_BITS_CNT,
- ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1
-};
-
-enum {
- ETHTOOL_A_BITSET_UNSPEC,
- ETHTOOL_A_BITSET_NOMASK, /* flag */
- ETHTOOL_A_BITSET_SIZE, /* u32 */
- ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */
- ETHTOOL_A_BITSET_VALUE, /* binary */
- ETHTOOL_A_BITSET_MASK, /* binary */
-
- /* add new constants above here */
- __ETHTOOL_A_BITSET_CNT,
- ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1
-};
-
-/* string sets */
-
-enum {
- ETHTOOL_A_STRING_UNSPEC,
- ETHTOOL_A_STRING_INDEX, /* u32 */
- ETHTOOL_A_STRING_VALUE, /* string */
-
- /* add new constants above here */
- __ETHTOOL_A_STRING_CNT,
- ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1
-};
-
-enum {
- ETHTOOL_A_STRINGS_UNSPEC,
- ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */
-
- /* add new constants above here */
- __ETHTOOL_A_STRINGS_CNT,
- ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1
-};
-
-enum {
- ETHTOOL_A_STRINGSET_UNSPEC,
- ETHTOOL_A_STRINGSET_ID, /* u32 */
- ETHTOOL_A_STRINGSET_COUNT, /* u32 */
- ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */
-
- /* add new constants above here */
- __ETHTOOL_A_STRINGSET_CNT,
- ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1
-};
-
-enum {
- ETHTOOL_A_STRINGSETS_UNSPEC,
- ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */
-
- /* add new constants above here */
- __ETHTOOL_A_STRINGSETS_CNT,
- ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1
-};
-
-/* STRSET */
-
-enum {
- ETHTOOL_A_STRSET_UNSPEC,
- ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */
- ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */
-
- /* add new constants above here */
- __ETHTOOL_A_STRSET_CNT,
- ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1
-};
-
-/* LINKINFO */
-
-enum {
- ETHTOOL_A_LINKINFO_UNSPEC,
- ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_LINKINFO_PORT, /* u8 */
- ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */
- ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */
- ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */
- ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */
-
- /* add new constants above here */
- __ETHTOOL_A_LINKINFO_CNT,
- ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1
-};
-
-/* LINKMODES */
-
-enum {
- ETHTOOL_A_LINKMODES_UNSPEC,
- ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */
- ETHTOOL_A_LINKMODES_OURS, /* bitset */
- ETHTOOL_A_LINKMODES_PEER, /* bitset */
- ETHTOOL_A_LINKMODES_SPEED, /* u32 */
- ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */
- ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */
- ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */
- ETHTOOL_A_LINKMODES_LANES, /* u32 */
- ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */
-
- /* add new constants above here */
- __ETHTOOL_A_LINKMODES_CNT,
- ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1
-};
-
-/* LINKSTATE */
-
-enum {
- ETHTOOL_A_LINKSTATE_UNSPEC,
- ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_LINKSTATE_LINK, /* u8 */
- ETHTOOL_A_LINKSTATE_SQI, /* u32 */
- ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */
- ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */
- ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */
- ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_LINKSTATE_CNT,
- ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1
-};
-
-/* DEBUG */
-
-enum {
- ETHTOOL_A_DEBUG_UNSPEC,
- ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_DEBUG_MSGMASK, /* bitset */
-
- /* add new constants above here */
- __ETHTOOL_A_DEBUG_CNT,
- ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1
-};
-
-/* WOL */
-
-enum {
- ETHTOOL_A_WOL_UNSPEC,
- ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_WOL_MODES, /* bitset */
- ETHTOOL_A_WOL_SOPASS, /* binary */
-
- /* add new constants above here */
- __ETHTOOL_A_WOL_CNT,
- ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1
-};
-
-/* FEATURES */
-
-enum {
- ETHTOOL_A_FEATURES_UNSPEC,
- ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_FEATURES_HW, /* bitset */
- ETHTOOL_A_FEATURES_WANTED, /* bitset */
- ETHTOOL_A_FEATURES_ACTIVE, /* bitset */
- ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */
-
- /* add new constants above here */
- __ETHTOOL_A_FEATURES_CNT,
- ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1
-};
-
-/* PRIVFLAGS */
-
-enum {
- ETHTOOL_A_PRIVFLAGS_UNSPEC,
- ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */
-
- /* add new constants above here */
- __ETHTOOL_A_PRIVFLAGS_CNT,
- ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1
-};
-
-/* RINGS */
-
-enum {
- ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
- ETHTOOL_TCP_DATA_SPLIT_DISABLED,
- ETHTOOL_TCP_DATA_SPLIT_ENABLED,
-};
-
-enum {
- ETHTOOL_A_RINGS_UNSPEC,
- ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_RINGS_RX_MAX, /* u32 */
- ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */
- ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */
- ETHTOOL_A_RINGS_TX_MAX, /* u32 */
- ETHTOOL_A_RINGS_RX, /* u32 */
- ETHTOOL_A_RINGS_RX_MINI, /* u32 */
- ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */
- ETHTOOL_A_RINGS_TX, /* u32 */
- ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */
- ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */
- ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */
- ETHTOOL_A_RINGS_TX_PUSH, /* u8 */
- ETHTOOL_A_RINGS_RX_PUSH, /* u8 */
- ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */
- ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_RINGS_CNT,
- ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
-};
-
-/* CHANNELS */
-
-enum {
- ETHTOOL_A_CHANNELS_UNSPEC,
- ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */
- ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */
- ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */
- ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */
- ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */
- ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */
- ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */
- ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_CHANNELS_CNT,
- ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1)
-};
-
-/* COALESCE */
-
-enum {
- ETHTOOL_A_COALESCE_UNSPEC,
- ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_COALESCE_RX_USECS, /* u32 */
- ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */
- ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */
- ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */
- ETHTOOL_A_COALESCE_TX_USECS, /* u32 */
- ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */
- ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */
- ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */
- ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */
- ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */
- ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */
- ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */
- ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */
- ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */
- ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */
- ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */
- ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */
- ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */
- ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */
- ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */
- ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */
- ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */
- ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */
- ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */
- ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */
- ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */
- ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */
- /* nest - _A_PROFILE_IRQ_MODERATION */
- ETHTOOL_A_COALESCE_RX_PROFILE,
- /* nest - _A_PROFILE_IRQ_MODERATION */
- ETHTOOL_A_COALESCE_TX_PROFILE,
-
- /* add new constants above here */
- __ETHTOOL_A_COALESCE_CNT,
- ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_PROFILE_UNSPEC,
- /* nest, _A_IRQ_MODERATION_* */
- ETHTOOL_A_PROFILE_IRQ_MODERATION,
- __ETHTOOL_A_PROFILE_CNT,
- ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_IRQ_MODERATION_UNSPEC,
- ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */
- ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */
- ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */
-
- __ETHTOOL_A_IRQ_MODERATION_CNT,
- ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
-};
-
-/* PAUSE */
-
-enum {
- ETHTOOL_A_PAUSE_UNSPEC,
- ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_PAUSE_AUTONEG, /* u8 */
- ETHTOOL_A_PAUSE_RX, /* u8 */
- ETHTOOL_A_PAUSE_TX, /* u8 */
- ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */
- ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_PAUSE_CNT,
- ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_PAUSE_STAT_UNSPEC,
- ETHTOOL_A_PAUSE_STAT_PAD,
-
- ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
- ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
-
- /* add new constants above here
- * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
- */
- __ETHTOOL_A_PAUSE_STAT_CNT,
- ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
-};
-
-/* EEE */
-
-enum {
- ETHTOOL_A_EEE_UNSPEC,
- ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_EEE_MODES_OURS, /* bitset */
- ETHTOOL_A_EEE_MODES_PEER, /* bitset */
- ETHTOOL_A_EEE_ACTIVE, /* u8 */
- ETHTOOL_A_EEE_ENABLED, /* u8 */
- ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */
- ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_EEE_CNT,
- ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1)
-};
-
-/* TSINFO */
-
-enum {
- ETHTOOL_A_TSINFO_UNSPEC,
- ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */
- ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */
- ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */
- ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */
- ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */
-
- /* add new constants above here */
- __ETHTOOL_A_TSINFO_CNT,
- ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_TS_STAT_UNSPEC,
-
- ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */
- ETHTOOL_A_TS_STAT_TX_LOST, /* uint */
- ETHTOOL_A_TS_STAT_TX_ERR, /* uint */
-
- /* add new constants above here */
- __ETHTOOL_A_TS_STAT_CNT,
- ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
-
-};
-
-/* PHC VCLOCKS */
-
-enum {
- ETHTOOL_A_PHC_VCLOCKS_UNSPEC,
- ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */
- ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */
-
- /* add new constants above here */
- __ETHTOOL_A_PHC_VCLOCKS_CNT,
- ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1)
-};
-
-/* CABLE TEST */
-
-enum {
- ETHTOOL_A_CABLE_TEST_UNSPEC,
- ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */
-
- /* add new constants above here */
- __ETHTOOL_A_CABLE_TEST_CNT,
- ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
-};
-
/* CABLE TEST NOTIFY */
enum {
ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC,
@@ -583,73 +50,11 @@ enum {
};
enum {
- ETHTOOL_A_CABLE_RESULT_UNSPEC,
- ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */
- ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */
- ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */
-
- __ETHTOOL_A_CABLE_RESULT_CNT,
- ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
- ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */
- ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */
- ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */
-
- __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
- ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
-};
-
-enum {
ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC,
ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED,
ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED
};
-enum {
- ETHTOOL_A_CABLE_NEST_UNSPEC,
- ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */
- ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */
- __ETHTOOL_A_CABLE_NEST_CNT,
- ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_CABLE_TEST_NTF_UNSPEC,
- ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */
- ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */
- ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */
-
- __ETHTOOL_A_CABLE_TEST_NTF_CNT,
- ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
-};
-
-/* CABLE TEST TDR */
-
-enum {
- ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC,
- ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */
- ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */
- ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */
- ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */
-
- /* add new constants above here */
- __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
- ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1
-};
-
-enum {
- ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
- ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */
-
- /* add new constants above here */
- __ETHTOOL_A_CABLE_TEST_TDR_CNT,
- ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1
-};
-
/* CABLE TEST TDR NOTIFY */
enum {
@@ -690,132 +95,6 @@ enum {
};
enum {
- ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC,
- ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */
- ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */
- ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */
-
- /* add new constants above here */
- __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
- ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1
-};
-
-/* TUNNEL INFO */
-
-enum {
- ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
- ETHTOOL_UDP_TUNNEL_TYPE_GENEVE,
- ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE,
-
- __ETHTOOL_UDP_TUNNEL_TYPE_CNT
-};
-
-enum {
- ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC,
-
- ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */
- ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT,
- ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC,
-
- ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */
- ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */
- ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */
-
- /* add new constants above here */
- __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT,
- ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_TUNNEL_UDP_UNSPEC,
-
- ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */
-
- /* add new constants above here */
- __ETHTOOL_A_TUNNEL_UDP_CNT,
- ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_TUNNEL_INFO_UNSPEC,
- ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */
-
- ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */
-
- /* add new constants above here */
- __ETHTOOL_A_TUNNEL_INFO_CNT,
- ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
-};
-
-/* FEC */
-
-enum {
- ETHTOOL_A_FEC_UNSPEC,
- ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_FEC_MODES, /* bitset */
- ETHTOOL_A_FEC_AUTO, /* u8 */
- ETHTOOL_A_FEC_ACTIVE, /* u32 */
- ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */
-
- __ETHTOOL_A_FEC_CNT,
- ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_FEC_STAT_UNSPEC,
- ETHTOOL_A_FEC_STAT_PAD,
-
- ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */
- ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */
- ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */
-
- /* add new constants above here */
- __ETHTOOL_A_FEC_STAT_CNT,
- ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1)
-};
-
-/* MODULE EEPROM */
-
-enum {
- ETHTOOL_A_MODULE_EEPROM_UNSPEC,
- ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */
-
- ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */
- ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */
- ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */
- ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */
- ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */
- ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */
-
- __ETHTOOL_A_MODULE_EEPROM_CNT,
- ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
-};
-
-/* STATS */
-
-enum {
- ETHTOOL_A_STATS_UNSPEC,
- ETHTOOL_A_STATS_PAD,
- ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_STATS_GROUPS, /* bitset */
-
- ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */
-
- ETHTOOL_A_STATS_SRC, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_STATS_CNT,
- ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1)
-};
-
-enum {
ETHTOOL_STATS_ETH_PHY,
ETHTOOL_STATS_ETH_MAC,
ETHTOOL_STATS_ETH_CTRL,
@@ -826,27 +105,6 @@ enum {
};
enum {
- ETHTOOL_A_STATS_GRP_UNSPEC,
- ETHTOOL_A_STATS_GRP_PAD,
-
- ETHTOOL_A_STATS_GRP_ID, /* u32 */
- ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */
-
- ETHTOOL_A_STATS_GRP_STAT, /* nest */
-
- ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */
- ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */
-
- ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */
- ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */
- ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */
-
- /* add new constants above here */
- __ETHTOOL_A_STATS_GRP_CNT,
- ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
-};
-
-enum {
/* 30.3.2.1.5 aSymbolErrorDuringCarrier */
ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR,
@@ -935,155 +193,6 @@ enum {
ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
};
-/* MODULE */
-
-enum {
- ETHTOOL_A_MODULE_UNSPEC,
- ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */
- ETHTOOL_A_MODULE_POWER_MODE, /* u8 */
-
- /* add new constants above here */
- __ETHTOOL_A_MODULE_CNT,
- ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
-};
-
-/* Power Sourcing Equipment */
-enum {
- ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
- ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */
- ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */
-};
-
-enum {
- ETHTOOL_A_PSE_UNSPEC,
- ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */
- ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */
- ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */
- ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */
- ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */
- ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */
- ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */
- ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */
- ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */
- ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */
- ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */
- ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */
-
- /* add new constants above here */
- __ETHTOOL_A_PSE_CNT,
- ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_RSS_UNSPEC,
- ETHTOOL_A_RSS_HEADER,
- ETHTOOL_A_RSS_CONTEXT, /* u32 */
- ETHTOOL_A_RSS_HFUNC, /* u32 */
- ETHTOOL_A_RSS_INDIR, /* binary */
- ETHTOOL_A_RSS_HKEY, /* binary */
- ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */
- ETHTOOL_A_RSS_START_CONTEXT, /* u32 */
-
- __ETHTOOL_A_RSS_CNT,
- ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
-};
-
-/* PLCA */
-
-enum {
- ETHTOOL_A_PLCA_UNSPEC,
- ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_PLCA_VERSION, /* u16 */
- ETHTOOL_A_PLCA_ENABLED, /* u8 */
- ETHTOOL_A_PLCA_STATUS, /* u8 */
- ETHTOOL_A_PLCA_NODE_CNT, /* u32 */
- ETHTOOL_A_PLCA_NODE_ID, /* u32 */
- ETHTOOL_A_PLCA_TO_TMR, /* u32 */
- ETHTOOL_A_PLCA_BURST_CNT, /* u32 */
- ETHTOOL_A_PLCA_BURST_TMR, /* u32 */
-
- /* add new constants above here */
- __ETHTOOL_A_PLCA_CNT,
- ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1)
-};
-
-/* MAC Merge (802.3) */
-
-enum {
- ETHTOOL_A_MM_STAT_UNSPEC,
- ETHTOOL_A_MM_STAT_PAD,
-
- /* aMACMergeFrameAssErrorCount */
- ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */
- /* aMACMergeFrameSmdErrorCount */
- ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */
- /* aMACMergeFrameAssOkCount */
- ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */
- /* aMACMergeFragCountRx */
- ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */
- /* aMACMergeFragCountTx */
- ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */
- /* aMACMergeHoldCount */
- ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */
-
- /* add new constants above here */
- __ETHTOOL_A_MM_STAT_CNT,
- ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_MM_UNSPEC,
- ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */
- ETHTOOL_A_MM_TX_ENABLED, /* u8 */
- ETHTOOL_A_MM_TX_ACTIVE, /* u8 */
- ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */
- ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */
- ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */
- ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */
- ETHTOOL_A_MM_VERIFY_TIME, /* u32 */
- ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */
- ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */
-
- /* add new constants above here */
- __ETHTOOL_A_MM_CNT,
- ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
-};
-
-/* MODULE_FW_FLASH */
-
-enum {
- ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
- ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */
- ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */
- ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */
- ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */
- ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */
- ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */
-
- /* add new constants above here */
- __ETHTOOL_A_MODULE_FW_FLASH_CNT,
- ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
-};
-
-enum {
- ETHTOOL_A_PHY_UNSPEC,
- ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */
- ETHTOOL_A_PHY_INDEX, /* u32 */
- ETHTOOL_A_PHY_DRVNAME, /* string */
- ETHTOOL_A_PHY_NAME, /* string */
- ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */
- ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */
- ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */
- ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */
-
- /* add new constants above here */
- __ETHTOOL_A_PHY_CNT,
- ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
-};
/* generic netlink info */
#define ETHTOOL_GENL_NAME "ethtool"
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
new file mode 100644
index 000000000000..b58f352fe4f2
--- /dev/null
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -0,0 +1,792 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/ethtool.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
+#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
+
+#define ETHTOOL_FAMILY_NAME "ethtool"
+#define ETHTOOL_FAMILY_VERSION 1
+
+enum {
+ ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
+ ETHTOOL_UDP_TUNNEL_TYPE_GENEVE,
+ ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE,
+
+ /* private: */
+ __ETHTOOL_UDP_TUNNEL_TYPE_CNT,
+ ETHTOOL_UDP_TUNNEL_TYPE_MAX = (__ETHTOOL_UDP_TUNNEL_TYPE_CNT - 1)
+};
+
+/**
+ * enum ethtool_header_flags - common ethtool header flags
+ * @ETHTOOL_FLAG_COMPACT_BITSETS: use compact bitsets in reply
+ * @ETHTOOL_FLAG_OMIT_REPLY: provide optional reply for SET or ACT requests
+ * @ETHTOOL_FLAG_STATS: request statistics, if supported by the driver
+ */
+enum ethtool_header_flags {
+ ETHTOOL_FLAG_COMPACT_BITSETS = 1,
+ ETHTOOL_FLAG_OMIT_REPLY = 2,
+ ETHTOOL_FLAG_STATS = 4,
+};
+
+enum {
+ ETHTOOL_PHY_UPSTREAM_TYPE_MAC,
+ ETHTOOL_PHY_UPSTREAM_TYPE_PHY,
+};
+
+enum ethtool_tcp_data_split {
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN,
+ ETHTOOL_TCP_DATA_SPLIT_DISABLED,
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED,
+};
+
+enum {
+ ETHTOOL_A_HEADER_UNSPEC,
+ ETHTOOL_A_HEADER_DEV_INDEX,
+ ETHTOOL_A_HEADER_DEV_NAME,
+ ETHTOOL_A_HEADER_FLAGS,
+ ETHTOOL_A_HEADER_PHY_INDEX,
+
+ __ETHTOOL_A_HEADER_CNT,
+ ETHTOOL_A_HEADER_MAX = (__ETHTOOL_A_HEADER_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_BITSET_BIT_UNSPEC,
+ ETHTOOL_A_BITSET_BIT_INDEX,
+ ETHTOOL_A_BITSET_BIT_NAME,
+ ETHTOOL_A_BITSET_BIT_VALUE,
+
+ __ETHTOOL_A_BITSET_BIT_CNT,
+ ETHTOOL_A_BITSET_BIT_MAX = (__ETHTOOL_A_BITSET_BIT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_BITSET_BITS_UNSPEC,
+ ETHTOOL_A_BITSET_BITS_BIT,
+
+ __ETHTOOL_A_BITSET_BITS_CNT,
+ ETHTOOL_A_BITSET_BITS_MAX = (__ETHTOOL_A_BITSET_BITS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_BITSET_UNSPEC,
+ ETHTOOL_A_BITSET_NOMASK,
+ ETHTOOL_A_BITSET_SIZE,
+ ETHTOOL_A_BITSET_BITS,
+ ETHTOOL_A_BITSET_VALUE,
+ ETHTOOL_A_BITSET_MASK,
+
+ __ETHTOOL_A_BITSET_CNT,
+ ETHTOOL_A_BITSET_MAX = (__ETHTOOL_A_BITSET_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STRING_UNSPEC,
+ ETHTOOL_A_STRING_INDEX,
+ ETHTOOL_A_STRING_VALUE,
+
+ __ETHTOOL_A_STRING_CNT,
+ ETHTOOL_A_STRING_MAX = (__ETHTOOL_A_STRING_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STRINGS_UNSPEC,
+ ETHTOOL_A_STRINGS_STRING,
+
+ __ETHTOOL_A_STRINGS_CNT,
+ ETHTOOL_A_STRINGS_MAX = (__ETHTOOL_A_STRINGS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STRINGSET_UNSPEC,
+ ETHTOOL_A_STRINGSET_ID,
+ ETHTOOL_A_STRINGSET_COUNT,
+ ETHTOOL_A_STRINGSET_STRINGS,
+
+ __ETHTOOL_A_STRINGSET_CNT,
+ ETHTOOL_A_STRINGSET_MAX = (__ETHTOOL_A_STRINGSET_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STRINGSETS_UNSPEC,
+ ETHTOOL_A_STRINGSETS_STRINGSET,
+
+ __ETHTOOL_A_STRINGSETS_CNT,
+ ETHTOOL_A_STRINGSETS_MAX = (__ETHTOOL_A_STRINGSETS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STRSET_UNSPEC,
+ ETHTOOL_A_STRSET_HEADER,
+ ETHTOOL_A_STRSET_STRINGSETS,
+ ETHTOOL_A_STRSET_COUNTS_ONLY,
+
+ __ETHTOOL_A_STRSET_CNT,
+ ETHTOOL_A_STRSET_MAX = (__ETHTOOL_A_STRSET_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PRIVFLAGS_UNSPEC,
+ ETHTOOL_A_PRIVFLAGS_HEADER,
+ ETHTOOL_A_PRIVFLAGS_FLAGS,
+
+ __ETHTOOL_A_PRIVFLAGS_CNT,
+ ETHTOOL_A_PRIVFLAGS_MAX = (__ETHTOOL_A_PRIVFLAGS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_RINGS_UNSPEC,
+ ETHTOOL_A_RINGS_HEADER,
+ ETHTOOL_A_RINGS_RX_MAX,
+ ETHTOOL_A_RINGS_RX_MINI_MAX,
+ ETHTOOL_A_RINGS_RX_JUMBO_MAX,
+ ETHTOOL_A_RINGS_TX_MAX,
+ ETHTOOL_A_RINGS_RX,
+ ETHTOOL_A_RINGS_RX_MINI,
+ ETHTOOL_A_RINGS_RX_JUMBO,
+ ETHTOOL_A_RINGS_TX,
+ ETHTOOL_A_RINGS_RX_BUF_LEN,
+ ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
+ ETHTOOL_A_RINGS_CQE_SIZE,
+ ETHTOOL_A_RINGS_TX_PUSH,
+ ETHTOOL_A_RINGS_RX_PUSH,
+ ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
+ ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
+
+ __ETHTOOL_A_RINGS_CNT,
+ ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_MM_STAT_UNSPEC,
+ ETHTOOL_A_MM_STAT_PAD,
+ ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS,
+ ETHTOOL_A_MM_STAT_SMD_ERRORS,
+ ETHTOOL_A_MM_STAT_REASSEMBLY_OK,
+ ETHTOOL_A_MM_STAT_RX_FRAG_COUNT,
+ ETHTOOL_A_MM_STAT_TX_FRAG_COUNT,
+ ETHTOOL_A_MM_STAT_HOLD_COUNT,
+
+ __ETHTOOL_A_MM_STAT_CNT,
+ ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_MM_UNSPEC,
+ ETHTOOL_A_MM_HEADER,
+ ETHTOOL_A_MM_PMAC_ENABLED,
+ ETHTOOL_A_MM_TX_ENABLED,
+ ETHTOOL_A_MM_TX_ACTIVE,
+ ETHTOOL_A_MM_TX_MIN_FRAG_SIZE,
+ ETHTOOL_A_MM_RX_MIN_FRAG_SIZE,
+ ETHTOOL_A_MM_VERIFY_ENABLED,
+ ETHTOOL_A_MM_VERIFY_STATUS,
+ ETHTOOL_A_MM_VERIFY_TIME,
+ ETHTOOL_A_MM_MAX_VERIFY_TIME,
+ ETHTOOL_A_MM_STATS,
+
+ __ETHTOOL_A_MM_CNT,
+ ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_LINKINFO_UNSPEC,
+ ETHTOOL_A_LINKINFO_HEADER,
+ ETHTOOL_A_LINKINFO_PORT,
+ ETHTOOL_A_LINKINFO_PHYADDR,
+ ETHTOOL_A_LINKINFO_TP_MDIX,
+ ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,
+ ETHTOOL_A_LINKINFO_TRANSCEIVER,
+
+ __ETHTOOL_A_LINKINFO_CNT,
+ ETHTOOL_A_LINKINFO_MAX = (__ETHTOOL_A_LINKINFO_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_LINKMODES_UNSPEC,
+ ETHTOOL_A_LINKMODES_HEADER,
+ ETHTOOL_A_LINKMODES_AUTONEG,
+ ETHTOOL_A_LINKMODES_OURS,
+ ETHTOOL_A_LINKMODES_PEER,
+ ETHTOOL_A_LINKMODES_SPEED,
+ ETHTOOL_A_LINKMODES_DUPLEX,
+ ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,
+ ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,
+ ETHTOOL_A_LINKMODES_LANES,
+ ETHTOOL_A_LINKMODES_RATE_MATCHING,
+
+ __ETHTOOL_A_LINKMODES_CNT,
+ ETHTOOL_A_LINKMODES_MAX = (__ETHTOOL_A_LINKMODES_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_LINKSTATE_UNSPEC,
+ ETHTOOL_A_LINKSTATE_HEADER,
+ ETHTOOL_A_LINKSTATE_LINK,
+ ETHTOOL_A_LINKSTATE_SQI,
+ ETHTOOL_A_LINKSTATE_SQI_MAX,
+ ETHTOOL_A_LINKSTATE_EXT_STATE,
+ ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,
+ ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT,
+
+ __ETHTOOL_A_LINKSTATE_CNT,
+ ETHTOOL_A_LINKSTATE_MAX = (__ETHTOOL_A_LINKSTATE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_DEBUG_UNSPEC,
+ ETHTOOL_A_DEBUG_HEADER,
+ ETHTOOL_A_DEBUG_MSGMASK,
+
+ __ETHTOOL_A_DEBUG_CNT,
+ ETHTOOL_A_DEBUG_MAX = (__ETHTOOL_A_DEBUG_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_WOL_UNSPEC,
+ ETHTOOL_A_WOL_HEADER,
+ ETHTOOL_A_WOL_MODES,
+ ETHTOOL_A_WOL_SOPASS,
+
+ __ETHTOOL_A_WOL_CNT,
+ ETHTOOL_A_WOL_MAX = (__ETHTOOL_A_WOL_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_FEATURES_UNSPEC,
+ ETHTOOL_A_FEATURES_HEADER,
+ ETHTOOL_A_FEATURES_HW,
+ ETHTOOL_A_FEATURES_WANTED,
+ ETHTOOL_A_FEATURES_ACTIVE,
+ ETHTOOL_A_FEATURES_NOCHANGE,
+
+ __ETHTOOL_A_FEATURES_CNT,
+ ETHTOOL_A_FEATURES_MAX = (__ETHTOOL_A_FEATURES_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CHANNELS_UNSPEC,
+ ETHTOOL_A_CHANNELS_HEADER,
+ ETHTOOL_A_CHANNELS_RX_MAX,
+ ETHTOOL_A_CHANNELS_TX_MAX,
+ ETHTOOL_A_CHANNELS_OTHER_MAX,
+ ETHTOOL_A_CHANNELS_COMBINED_MAX,
+ ETHTOOL_A_CHANNELS_RX_COUNT,
+ ETHTOOL_A_CHANNELS_TX_COUNT,
+ ETHTOOL_A_CHANNELS_OTHER_COUNT,
+ ETHTOOL_A_CHANNELS_COMBINED_COUNT,
+
+ __ETHTOOL_A_CHANNELS_CNT,
+ ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_IRQ_MODERATION_UNSPEC,
+ ETHTOOL_A_IRQ_MODERATION_USEC,
+ ETHTOOL_A_IRQ_MODERATION_PKTS,
+ ETHTOOL_A_IRQ_MODERATION_COMPS,
+
+ __ETHTOOL_A_IRQ_MODERATION_CNT,
+ ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PROFILE_UNSPEC,
+ ETHTOOL_A_PROFILE_IRQ_MODERATION,
+
+ __ETHTOOL_A_PROFILE_CNT,
+ ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_COALESCE_UNSPEC,
+ ETHTOOL_A_COALESCE_HEADER,
+ ETHTOOL_A_COALESCE_RX_USECS,
+ ETHTOOL_A_COALESCE_RX_MAX_FRAMES,
+ ETHTOOL_A_COALESCE_RX_USECS_IRQ,
+ ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ,
+ ETHTOOL_A_COALESCE_TX_USECS,
+ ETHTOOL_A_COALESCE_TX_MAX_FRAMES,
+ ETHTOOL_A_COALESCE_TX_USECS_IRQ,
+ ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ,
+ ETHTOOL_A_COALESCE_STATS_BLOCK_USECS,
+ ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX,
+ ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX,
+ ETHTOOL_A_COALESCE_PKT_RATE_LOW,
+ ETHTOOL_A_COALESCE_RX_USECS_LOW,
+ ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW,
+ ETHTOOL_A_COALESCE_TX_USECS_LOW,
+ ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW,
+ ETHTOOL_A_COALESCE_PKT_RATE_HIGH,
+ ETHTOOL_A_COALESCE_RX_USECS_HIGH,
+ ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH,
+ ETHTOOL_A_COALESCE_TX_USECS_HIGH,
+ ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,
+ ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,
+ ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
+ ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
+ ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,
+ ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,
+ ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
+ ETHTOOL_A_COALESCE_RX_PROFILE,
+ ETHTOOL_A_COALESCE_TX_PROFILE,
+
+ __ETHTOOL_A_COALESCE_CNT,
+ ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PAUSE_STAT_UNSPEC,
+ ETHTOOL_A_PAUSE_STAT_PAD,
+ ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
+ ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
+
+ __ETHTOOL_A_PAUSE_STAT_CNT,
+ ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PAUSE_UNSPEC,
+ ETHTOOL_A_PAUSE_HEADER,
+ ETHTOOL_A_PAUSE_AUTONEG,
+ ETHTOOL_A_PAUSE_RX,
+ ETHTOOL_A_PAUSE_TX,
+ ETHTOOL_A_PAUSE_STATS,
+ ETHTOOL_A_PAUSE_STATS_SRC,
+
+ __ETHTOOL_A_PAUSE_CNT,
+ ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_EEE_UNSPEC,
+ ETHTOOL_A_EEE_HEADER,
+ ETHTOOL_A_EEE_MODES_OURS,
+ ETHTOOL_A_EEE_MODES_PEER,
+ ETHTOOL_A_EEE_ACTIVE,
+ ETHTOOL_A_EEE_ENABLED,
+ ETHTOOL_A_EEE_TX_LPI_ENABLED,
+ ETHTOOL_A_EEE_TX_LPI_TIMER,
+
+ __ETHTOOL_A_EEE_CNT,
+ ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_TS_STAT_UNSPEC,
+ ETHTOOL_A_TS_STAT_TX_PKTS,
+ ETHTOOL_A_TS_STAT_TX_LOST,
+ ETHTOOL_A_TS_STAT_TX_ERR,
+
+ __ETHTOOL_A_TS_STAT_CNT,
+ ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_TSINFO_UNSPEC,
+ ETHTOOL_A_TSINFO_HEADER,
+ ETHTOOL_A_TSINFO_TIMESTAMPING,
+ ETHTOOL_A_TSINFO_TX_TYPES,
+ ETHTOOL_A_TSINFO_RX_FILTERS,
+ ETHTOOL_A_TSINFO_PHC_INDEX,
+ ETHTOOL_A_TSINFO_STATS,
+
+ __ETHTOOL_A_TSINFO_CNT,
+ ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_RESULT_UNSPEC,
+ ETHTOOL_A_CABLE_RESULT_PAIR,
+ ETHTOOL_A_CABLE_RESULT_CODE,
+ ETHTOOL_A_CABLE_RESULT_SRC,
+
+ __ETHTOOL_A_CABLE_RESULT_CNT,
+ ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
+ ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,
+ ETHTOOL_A_CABLE_FAULT_LENGTH_CM,
+ ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,
+
+ __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
+ ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_NEST_UNSPEC,
+ ETHTOOL_A_CABLE_NEST_RESULT,
+ ETHTOOL_A_CABLE_NEST_FAULT_LENGTH,
+
+ __ETHTOOL_A_CABLE_NEST_CNT,
+ ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_TEST_UNSPEC,
+ ETHTOOL_A_CABLE_TEST_HEADER,
+
+ __ETHTOOL_A_CABLE_TEST_CNT,
+ ETHTOOL_A_CABLE_TEST_MAX = (__ETHTOOL_A_CABLE_TEST_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_TEST_NTF_UNSPEC,
+ ETHTOOL_A_CABLE_TEST_NTF_HEADER,
+ ETHTOOL_A_CABLE_TEST_NTF_STATUS,
+ ETHTOOL_A_CABLE_TEST_NTF_NEST,
+
+ __ETHTOOL_A_CABLE_TEST_NTF_CNT,
+ ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC,
+ ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST,
+ ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST,
+ ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP,
+ ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,
+
+ __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
+ ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC,
+ ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER,
+ ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS,
+ ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,
+
+ __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
+ ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
+ ETHTOOL_A_CABLE_TEST_TDR_HEADER,
+ ETHTOOL_A_CABLE_TEST_TDR_CFG,
+
+ __ETHTOOL_A_CABLE_TEST_TDR_CNT,
+ ETHTOOL_A_CABLE_TEST_TDR_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC,
+ ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
+ ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
+
+ __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT,
+ ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC,
+ ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE,
+ ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,
+ ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY,
+
+ __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT,
+ ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_TUNNEL_UDP_UNSPEC,
+ ETHTOOL_A_TUNNEL_UDP_TABLE,
+
+ __ETHTOOL_A_TUNNEL_UDP_CNT,
+ ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_TUNNEL_INFO_UNSPEC,
+ ETHTOOL_A_TUNNEL_INFO_HEADER,
+ ETHTOOL_A_TUNNEL_INFO_UDP_PORTS,
+
+ __ETHTOOL_A_TUNNEL_INFO_CNT,
+ ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_FEC_STAT_UNSPEC,
+ ETHTOOL_A_FEC_STAT_PAD,
+ ETHTOOL_A_FEC_STAT_CORRECTED,
+ ETHTOOL_A_FEC_STAT_UNCORR,
+ ETHTOOL_A_FEC_STAT_CORR_BITS,
+
+ __ETHTOOL_A_FEC_STAT_CNT,
+ ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_FEC_UNSPEC,
+ ETHTOOL_A_FEC_HEADER,
+ ETHTOOL_A_FEC_MODES,
+ ETHTOOL_A_FEC_AUTO,
+ ETHTOOL_A_FEC_ACTIVE,
+ ETHTOOL_A_FEC_STATS,
+
+ __ETHTOOL_A_FEC_CNT,
+ ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_MODULE_EEPROM_UNSPEC,
+ ETHTOOL_A_MODULE_EEPROM_HEADER,
+ ETHTOOL_A_MODULE_EEPROM_OFFSET,
+ ETHTOOL_A_MODULE_EEPROM_LENGTH,
+ ETHTOOL_A_MODULE_EEPROM_PAGE,
+ ETHTOOL_A_MODULE_EEPROM_BANK,
+ ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,
+ ETHTOOL_A_MODULE_EEPROM_DATA,
+
+ __ETHTOOL_A_MODULE_EEPROM_CNT,
+ ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STATS_GRP_UNSPEC,
+ ETHTOOL_A_STATS_GRP_PAD,
+ ETHTOOL_A_STATS_GRP_ID,
+ ETHTOOL_A_STATS_GRP_SS_ID,
+ ETHTOOL_A_STATS_GRP_STAT,
+ ETHTOOL_A_STATS_GRP_HIST_RX,
+ ETHTOOL_A_STATS_GRP_HIST_TX,
+ ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,
+ ETHTOOL_A_STATS_GRP_HIST_BKT_HI,
+ ETHTOOL_A_STATS_GRP_HIST_VAL,
+
+ __ETHTOOL_A_STATS_GRP_CNT,
+ ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_STATS_UNSPEC,
+ ETHTOOL_A_STATS_PAD,
+ ETHTOOL_A_STATS_HEADER,
+ ETHTOOL_A_STATS_GROUPS,
+ ETHTOOL_A_STATS_GRP,
+ ETHTOOL_A_STATS_SRC,
+
+ __ETHTOOL_A_STATS_CNT,
+ ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PHC_VCLOCKS_UNSPEC,
+ ETHTOOL_A_PHC_VCLOCKS_HEADER,
+ ETHTOOL_A_PHC_VCLOCKS_NUM,
+ ETHTOOL_A_PHC_VCLOCKS_INDEX,
+
+ __ETHTOOL_A_PHC_VCLOCKS_CNT,
+ ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_MODULE_UNSPEC,
+ ETHTOOL_A_MODULE_HEADER,
+ ETHTOOL_A_MODULE_POWER_MODE_POLICY,
+ ETHTOOL_A_MODULE_POWER_MODE,
+
+ __ETHTOOL_A_MODULE_CNT,
+ ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
+ ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,
+ ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,
+
+ __ETHTOOL_A_C33_PSE_PW_LIMIT_CNT,
+ __ETHTOOL_A_C33_PSE_PW_LIMIT_MAX = (__ETHTOOL_A_C33_PSE_PW_LIMIT_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PSE_UNSPEC,
+ ETHTOOL_A_PSE_HEADER,
+ ETHTOOL_A_PODL_PSE_ADMIN_STATE,
+ ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,
+ ETHTOOL_A_PODL_PSE_PW_D_STATUS,
+ ETHTOOL_A_C33_PSE_ADMIN_STATE,
+ ETHTOOL_A_C33_PSE_ADMIN_CONTROL,
+ ETHTOOL_A_C33_PSE_PW_D_STATUS,
+ ETHTOOL_A_C33_PSE_PW_CLASS,
+ ETHTOOL_A_C33_PSE_ACTUAL_PW,
+ ETHTOOL_A_C33_PSE_EXT_STATE,
+ ETHTOOL_A_C33_PSE_EXT_SUBSTATE,
+ ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,
+ ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,
+
+ __ETHTOOL_A_PSE_CNT,
+ ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_RSS_UNSPEC,
+ ETHTOOL_A_RSS_HEADER,
+ ETHTOOL_A_RSS_CONTEXT,
+ ETHTOOL_A_RSS_HFUNC,
+ ETHTOOL_A_RSS_INDIR,
+ ETHTOOL_A_RSS_HKEY,
+ ETHTOOL_A_RSS_INPUT_XFRM,
+ ETHTOOL_A_RSS_START_CONTEXT,
+
+ __ETHTOOL_A_RSS_CNT,
+ ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PLCA_UNSPEC,
+ ETHTOOL_A_PLCA_HEADER,
+ ETHTOOL_A_PLCA_VERSION,
+ ETHTOOL_A_PLCA_ENABLED,
+ ETHTOOL_A_PLCA_STATUS,
+ ETHTOOL_A_PLCA_NODE_CNT,
+ ETHTOOL_A_PLCA_NODE_ID,
+ ETHTOOL_A_PLCA_TO_TMR,
+ ETHTOOL_A_PLCA_BURST_CNT,
+ ETHTOOL_A_PLCA_BURST_TMR,
+
+ __ETHTOOL_A_PLCA_CNT,
+ ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
+ ETHTOOL_A_MODULE_FW_FLASH_HEADER,
+ ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,
+ ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,
+ ETHTOOL_A_MODULE_FW_FLASH_STATUS,
+ ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,
+ ETHTOOL_A_MODULE_FW_FLASH_DONE,
+ ETHTOOL_A_MODULE_FW_FLASH_TOTAL,
+
+ __ETHTOOL_A_MODULE_FW_FLASH_CNT,
+ ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
+};
+
+enum {
+ ETHTOOL_A_PHY_UNSPEC,
+ ETHTOOL_A_PHY_HEADER,
+ ETHTOOL_A_PHY_INDEX,
+ ETHTOOL_A_PHY_DRVNAME,
+ ETHTOOL_A_PHY_NAME,
+ ETHTOOL_A_PHY_UPSTREAM_TYPE,
+ ETHTOOL_A_PHY_UPSTREAM_INDEX,
+ ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,
+ ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,
+
+ __ETHTOOL_A_PHY_CNT,
+ ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
+};
+
+enum {
+ ETHTOOL_MSG_USER_NONE = 0,
+ ETHTOOL_MSG_STRSET_GET = 1,
+ ETHTOOL_MSG_LINKINFO_GET,
+ ETHTOOL_MSG_LINKINFO_SET,
+ ETHTOOL_MSG_LINKMODES_GET,
+ ETHTOOL_MSG_LINKMODES_SET,
+ ETHTOOL_MSG_LINKSTATE_GET,
+ ETHTOOL_MSG_DEBUG_GET,
+ ETHTOOL_MSG_DEBUG_SET,
+ ETHTOOL_MSG_WOL_GET,
+ ETHTOOL_MSG_WOL_SET,
+ ETHTOOL_MSG_FEATURES_GET,
+ ETHTOOL_MSG_FEATURES_SET,
+ ETHTOOL_MSG_PRIVFLAGS_GET,
+ ETHTOOL_MSG_PRIVFLAGS_SET,
+ ETHTOOL_MSG_RINGS_GET,
+ ETHTOOL_MSG_RINGS_SET,
+ ETHTOOL_MSG_CHANNELS_GET,
+ ETHTOOL_MSG_CHANNELS_SET,
+ ETHTOOL_MSG_COALESCE_GET,
+ ETHTOOL_MSG_COALESCE_SET,
+ ETHTOOL_MSG_PAUSE_GET,
+ ETHTOOL_MSG_PAUSE_SET,
+ ETHTOOL_MSG_EEE_GET,
+ ETHTOOL_MSG_EEE_SET,
+ ETHTOOL_MSG_TSINFO_GET,
+ ETHTOOL_MSG_CABLE_TEST_ACT,
+ ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
+ ETHTOOL_MSG_TUNNEL_INFO_GET,
+ ETHTOOL_MSG_FEC_GET,
+ ETHTOOL_MSG_FEC_SET,
+ ETHTOOL_MSG_MODULE_EEPROM_GET,
+ ETHTOOL_MSG_STATS_GET,
+ ETHTOOL_MSG_PHC_VCLOCKS_GET,
+ ETHTOOL_MSG_MODULE_GET,
+ ETHTOOL_MSG_MODULE_SET,
+ ETHTOOL_MSG_PSE_GET,
+ ETHTOOL_MSG_PSE_SET,
+ ETHTOOL_MSG_RSS_GET,
+ ETHTOOL_MSG_PLCA_GET_CFG,
+ ETHTOOL_MSG_PLCA_SET_CFG,
+ ETHTOOL_MSG_PLCA_GET_STATUS,
+ ETHTOOL_MSG_MM_GET,
+ ETHTOOL_MSG_MM_SET,
+ ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+ ETHTOOL_MSG_PHY_GET,
+
+ __ETHTOOL_MSG_USER_CNT,
+ ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1)
+};
+
+enum {
+ ETHTOOL_MSG_KERNEL_NONE = 0,
+ ETHTOOL_MSG_STRSET_GET_REPLY = 1,
+ ETHTOOL_MSG_LINKINFO_GET_REPLY,
+ ETHTOOL_MSG_LINKINFO_NTF,
+ ETHTOOL_MSG_LINKMODES_GET_REPLY,
+ ETHTOOL_MSG_LINKMODES_NTF,
+ ETHTOOL_MSG_LINKSTATE_GET_REPLY,
+ ETHTOOL_MSG_DEBUG_GET_REPLY,
+ ETHTOOL_MSG_DEBUG_NTF,
+ ETHTOOL_MSG_WOL_GET_REPLY,
+ ETHTOOL_MSG_WOL_NTF,
+ ETHTOOL_MSG_FEATURES_GET_REPLY,
+ ETHTOOL_MSG_FEATURES_SET_REPLY,
+ ETHTOOL_MSG_FEATURES_NTF,
+ ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
+ ETHTOOL_MSG_PRIVFLAGS_NTF,
+ ETHTOOL_MSG_RINGS_GET_REPLY,
+ ETHTOOL_MSG_RINGS_NTF,
+ ETHTOOL_MSG_CHANNELS_GET_REPLY,
+ ETHTOOL_MSG_CHANNELS_NTF,
+ ETHTOOL_MSG_COALESCE_GET_REPLY,
+ ETHTOOL_MSG_COALESCE_NTF,
+ ETHTOOL_MSG_PAUSE_GET_REPLY,
+ ETHTOOL_MSG_PAUSE_NTF,
+ ETHTOOL_MSG_EEE_GET_REPLY,
+ ETHTOOL_MSG_EEE_NTF,
+ ETHTOOL_MSG_TSINFO_GET_REPLY,
+ ETHTOOL_MSG_CABLE_TEST_NTF,
+ ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
+ ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+ ETHTOOL_MSG_FEC_GET_REPLY,
+ ETHTOOL_MSG_FEC_NTF,
+ ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
+ ETHTOOL_MSG_STATS_GET_REPLY,
+ ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
+ ETHTOOL_MSG_MODULE_GET_REPLY,
+ ETHTOOL_MSG_MODULE_NTF,
+ ETHTOOL_MSG_PSE_GET_REPLY,
+ ETHTOOL_MSG_RSS_GET_REPLY,
+ ETHTOOL_MSG_PLCA_GET_CFG_REPLY,
+ ETHTOOL_MSG_PLCA_GET_STATUS_REPLY,
+ ETHTOOL_MSG_PLCA_NTF,
+ ETHTOOL_MSG_MM_GET_REPLY,
+ ETHTOOL_MSG_MM_NTF,
+ ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
+ ETHTOOL_MSG_PHY_GET_REPLY,
+ ETHTOOL_MSG_PHY_NTF,
+
+ __ETHTOOL_MSG_KERNEL_CNT,
+ ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1)
+};
+
+#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2575e0cd9b48..77730c340c8f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1394,6 +1394,7 @@ enum {
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
+ IFLA_VXLAN_RESERVED_BITS,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3aa002a47a96..482d284a1553 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -678,7 +678,7 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
}
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
int err;
@@ -701,7 +701,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
struct sk_buff *nskb;
int err;
@@ -720,8 +720,8 @@ static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
}
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- bool exclude_ingress)
+ const struct bpf_prog *xdp_prog,
+ struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dst, *last_dst = NULL;
diff --git a/lib/packing.c b/lib/packing.c
index 793942745e34..bb1643d9e64d 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -5,10 +5,37 @@
#include <linux/packing.h>
#include <linux/module.h>
#include <linux/bitops.h>
+#include <linux/bits.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/bitrev.h>
+#define __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \
+ ({ \
+ for (size_t i = 0; i < (num_fields); i++) { \
+ typeof(&(fields)[0]) field = &(fields)[i]; \
+ u64 uval; \
+ \
+ uval = ustruct_field_to_u64(ustruct, field->offset, field->size); \
+ \
+ __pack(pbuf, uval, field->startbit, field->endbit, \
+ pbuflen, quirks); \
+ } \
+ })
+
+#define __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \
+ ({ \
+ for (size_t i = 0; i < (num_fields); i++) { \
+ typeof(&(fields)[0]) field = &fields[i]; \
+ u64 uval; \
+ \
+ __unpack(pbuf, &uval, field->startbit, field->endbit, \
+ pbuflen, quirks); \
+ \
+ u64_to_ustruct_field(ustruct, field->offset, field->size, uval); \
+ } \
+ })
+
/**
* calculate_box_addr - Determine physical location of byte in buffer
* @box: Index of byte within buffer seen as a logical big-endian big number
@@ -51,64 +78,29 @@ static size_t calculate_box_addr(size_t box, size_t len, u8 quirks)
return offset_of_group + offset_in_group;
}
-/**
- * pack - Pack u64 number into bitfield of buffer.
- *
- * @pbuf: Pointer to a buffer holding the packed value.
- * @uval: CPU-readable unpacked value to pack.
- * @startbit: The index (in logical notation, compensated for quirks) where
- * the packed value starts within pbuf. Must be larger than, or
- * equal to, endbit.
- * @endbit: The index (in logical notation, compensated for quirks) where
- * the packed value ends within pbuf. Must be smaller than, or equal
- * to, startbit.
- * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
- * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
- * QUIRK_MSB_ON_THE_RIGHT.
- *
- * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
- * correct usage, return code may be discarded. The @pbuf memory will
- * be modified on success.
- */
-int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
- u8 quirks)
+static void __pack(void *pbuf, u64 uval, size_t startbit, size_t endbit,
+ size_t pbuflen, u8 quirks)
{
/* Logical byte indices corresponding to the
* start and end of the field.
*/
- int plogical_first_u8, plogical_last_u8, box;
- /* width of the field to access in the pbuf */
- u64 value_width;
-
- /* startbit is expected to be larger than endbit, and both are
- * expected to be within the logically addressable range of the buffer.
- */
- if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen))
- /* Invalid function call */
- return -EINVAL;
-
- value_width = startbit - endbit + 1;
- if (unlikely(value_width > 64))
- return -ERANGE;
+ int plogical_first_u8 = startbit / BITS_PER_BYTE;
+ int plogical_last_u8 = endbit / BITS_PER_BYTE;
+ int value_width = startbit - endbit + 1;
+ int box;
/* Check if "uval" fits in "value_width" bits.
- * If value_width is 64, the check will fail, but any
- * 64-bit uval will surely fit.
+ * The test only works for value_width < 64, but in the latter case,
+ * any 64-bit uval will surely fit.
*/
- if (unlikely(value_width < 64 && uval >= (1ull << value_width)))
- /* Cannot store "uval" inside "value_width" bits.
- * Truncating "uval" is most certainly not desirable,
- * so simply erroring out is appropriate.
- */
- return -ERANGE;
+ WARN(value_width < 64 && uval >= (1ull << value_width),
+ "Cannot store 0x%llx inside bits %zu-%zu - will truncate\n",
+ uval, startbit, endbit);
/* Iterate through an idealistic view of the pbuf as an u64 with
* no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
* logical bit significance. "box" denotes the current logical u8.
*/
- plogical_first_u8 = startbit / BITS_PER_BYTE;
- plogical_last_u8 = endbit / BITS_PER_BYTE;
-
for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
/* Bit indices into the currently accessed 8-bit box */
size_t box_start_bit, box_end_bit, box_addr;
@@ -163,15 +155,13 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
((u8 *)pbuf)[box_addr] &= ~box_mask;
((u8 *)pbuf)[box_addr] |= pval;
}
- return 0;
}
-EXPORT_SYMBOL(pack);
/**
- * unpack - Unpack u64 number from packed buffer.
+ * pack - Pack u64 number into bitfield of buffer.
*
* @pbuf: Pointer to a buffer holding the packed value.
- * @uval: Pointer to an u64 holding the unpacked value.
+ * @uval: CPU-readable unpacked value to pack.
* @startbit: The index (in logical notation, compensated for quirks) where
* the packed value starts within pbuf. Must be larger than, or
* equal to, endbit.
@@ -183,19 +173,12 @@ EXPORT_SYMBOL(pack);
* QUIRK_MSB_ON_THE_RIGHT.
*
* Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
- * correct usage, return code may be discarded. The @uval will be
- * modified on success.
+ * correct usage, return code may be discarded. The @pbuf memory will
+ * be modified on success.
*/
-int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
- size_t pbuflen, u8 quirks)
+int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
+ u8 quirks)
{
- /* Logical byte indices corresponding to the
- * start and end of the field.
- */
- int plogical_first_u8, plogical_last_u8, box;
- /* width of the field to access in the pbuf */
- u64 value_width;
-
/* startbit is expected to be larger than endbit, and both are
* expected to be within the logically addressable range of the buffer.
*/
@@ -203,10 +186,25 @@ int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
/* Invalid function call */
return -EINVAL;
- value_width = startbit - endbit + 1;
- if (unlikely(value_width > 64))
+ if (unlikely(startbit - endbit >= 64))
return -ERANGE;
+ __pack(pbuf, uval, startbit, endbit, pbuflen, quirks);
+
+ return 0;
+}
+EXPORT_SYMBOL(pack);
+
+static void __unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
+ size_t pbuflen, u8 quirks)
+{
+ /* Logical byte indices corresponding to the
+ * start and end of the field.
+ */
+ int plogical_first_u8 = startbit / BITS_PER_BYTE;
+ int plogical_last_u8 = endbit / BITS_PER_BYTE;
+ int box;
+
/* Initialize parameter */
*uval = 0;
@@ -214,9 +212,6 @@ int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
* no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
* logical bit significance. "box" denotes the current logical u8.
*/
- plogical_first_u8 = startbit / BITS_PER_BYTE;
- plogical_last_u8 = endbit / BITS_PER_BYTE;
-
for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
/* Bit indices into the currently accessed 8-bit box */
size_t box_start_bit, box_end_bit, box_addr;
@@ -271,6 +266,46 @@ int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
*uval &= ~proj_mask;
*uval |= pval;
}
+}
+
+/**
+ * unpack - Unpack u64 number from packed buffer.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ * the packed value starts within pbuf. Must be larger than, or
+ * equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ * the packed value ends within pbuf. Must be smaller than, or equal
+ * to, startbit.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ * correct usage, return code may be discarded. The @uval will be
+ * modified on success.
+ */
+int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
+ size_t pbuflen, u8 quirks)
+{
+ /* width of the field to access in the pbuf */
+ u64 value_width;
+
+ /* startbit is expected to be larger than endbit, and both are
+ * expected to be within the logically addressable range of the buffer.
+ */
+ if (startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen)
+ /* Invalid function call */
+ return -EINVAL;
+
+ value_width = startbit - endbit + 1;
+ if (value_width > 64)
+ return -ERANGE;
+
+ __unpack(pbuf, uval, startbit, endbit, pbuflen, quirks);
+
return 0;
}
EXPORT_SYMBOL(unpack);
@@ -314,4 +349,130 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
}
EXPORT_SYMBOL(packing);
+static u64 ustruct_field_to_u64(const void *ustruct, size_t field_offset,
+ size_t field_size)
+{
+ switch (field_size) {
+ case 1:
+ return *((u8 *)(ustruct + field_offset));
+ case 2:
+ return *((u16 *)(ustruct + field_offset));
+ case 4:
+ return *((u32 *)(ustruct + field_offset));
+ default:
+ return *((u64 *)(ustruct + field_offset));
+ }
+}
+
+static void u64_to_ustruct_field(void *ustruct, size_t field_offset,
+ size_t field_size, u64 uval)
+{
+ switch (field_size) {
+ case 1:
+ *((u8 *)(ustruct + field_offset)) = uval;
+ break;
+ case 2:
+ *((u16 *)(ustruct + field_offset)) = uval;
+ break;
+ case 4:
+ *((u32 *)(ustruct + field_offset)) = uval;
+ break;
+ default:
+ *((u64 *)(ustruct + field_offset)) = uval;
+ break;
+ }
+}
+
+/**
+ * pack_fields_u8 - Pack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ * It is expected (but not checked) that this has the same data type
+ * as all struct packed_field_u8 definitions.
+ * @fields: Array of packed_field_u8 field definition. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the pack_fields() macro instead of calling this directly.
+ */
+void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct,
+ const struct packed_field_u8 *fields, size_t num_fields,
+ u8 quirks)
+{
+ __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(pack_fields_u8);
+
+/**
+ * pack_fields_u16 - Pack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ * It is expected (but not checked) that this has the same data type
+ * as all struct packed_field_u16 definitions.
+ * @fields: Array of packed_field_u16 field definitions. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the pack_fields() macro instead of calling this directly.
+ */
+void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct,
+ const struct packed_field_u16 *fields, size_t num_fields,
+ u8 quirks)
+{
+ __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(pack_fields_u16);
+
+/**
+ * unpack_fields_u8 - Unpack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ * It is expected (but not checked) that this has the same data type
+ * as all struct packed_field_u8 definitions.
+ * @fields: Array of packed_field_u8 field definitions. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the unpack_fields() macro instead of calling this directly.
+ */
+void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct,
+ const struct packed_field_u8 *fields, size_t num_fields,
+ u8 quirks)
+{
+ __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(unpack_fields_u8);
+
+/**
+ * unpack_fields_u16 - Unpack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ * It is expected (but not checked) that this has the same data type
+ * as all struct packed_field_u16 definitions.
+ * @fields: Array of packed_field_u16 field definitions. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the unpack_fields() macro instead of calling this directly.
+ */
+void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct,
+ const struct packed_field_u16 *fields, size_t num_fields,
+ u8 quirks)
+{
+ __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(unpack_fields_u16);
+
MODULE_DESCRIPTION("Generic bitfield packing and unpacking");
diff --git a/lib/packing_test.c b/lib/packing_test.c
index b38ea43c03fd..ce3b83d33b04 100644
--- a/lib/packing_test.c
+++ b/lib/packing_test.c
@@ -396,9 +396,70 @@ static void packing_test_unpack(struct kunit *test)
KUNIT_EXPECT_EQ(test, uval, params->uval);
}
+#define PACKED_BUF_SIZE 8
+
+typedef struct __packed { u8 buf[PACKED_BUF_SIZE]; } packed_buf_t;
+
+struct test_data {
+ u32 field3;
+ u16 field2;
+ u16 field4;
+ u16 field6;
+ u8 field1;
+ u8 field5;
+};
+
+static const struct packed_field_u8 test_fields[] = {
+ PACKED_FIELD(63, 61, struct test_data, field1),
+ PACKED_FIELD(60, 52, struct test_data, field2),
+ PACKED_FIELD(51, 28, struct test_data, field3),
+ PACKED_FIELD(27, 14, struct test_data, field4),
+ PACKED_FIELD(13, 9, struct test_data, field5),
+ PACKED_FIELD(8, 0, struct test_data, field6),
+};
+
+static void packing_test_pack_fields(struct kunit *test)
+{
+ const struct test_data data = {
+ .field1 = 0x2,
+ .field2 = 0x100,
+ .field3 = 0xF00050,
+ .field4 = 0x7D3,
+ .field5 = 0x9,
+ .field6 = 0x10B,
+ };
+ packed_buf_t expect = {
+ .buf = { 0x50, 0x0F, 0x00, 0x05, 0x01, 0xF4, 0xD3, 0x0B },
+ };
+ packed_buf_t buf = {};
+
+ pack_fields(&buf, sizeof(buf), &data, test_fields, 0);
+
+ KUNIT_EXPECT_MEMEQ(test, &expect, &buf, sizeof(buf));
+}
+
+static void packing_test_unpack_fields(struct kunit *test)
+{
+ const packed_buf_t buf = {
+ .buf = { 0x17, 0x28, 0x10, 0x19, 0x3D, 0xA9, 0x07, 0x9C },
+ };
+ struct test_data data = {};
+
+ unpack_fields(&buf, sizeof(buf), &data, test_fields, 0);
+
+ KUNIT_EXPECT_EQ(test, 0, data.field1);
+ KUNIT_EXPECT_EQ(test, 0x172, data.field2);
+ KUNIT_EXPECT_EQ(test, 0x810193, data.field3);
+ KUNIT_EXPECT_EQ(test, 0x36A4, data.field4);
+ KUNIT_EXPECT_EQ(test, 0x3, data.field5);
+ KUNIT_EXPECT_EQ(test, 0x19C, data.field6);
+}
+
static struct kunit_case packing_test_cases[] = {
KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params),
KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params),
+ KUNIT_CASE(packing_test_pack_fields),
+ KUNIT_CASE(packing_test_unpack_fields),
{},
};
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
index ec10506834b6..1682e614309c 100644
--- a/lib/win_minmax.c
+++ b/lib/win_minmax.c
@@ -97,3 +97,4 @@ u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
return minmax_subwin_update(m, win, &val);
}
+EXPORT_SYMBOL(minmax_running_min);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 82bac2426631..902694c0ce64 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -955,6 +955,7 @@ int br_fdb_dump(struct sk_buff *skb,
struct net_device *filter_dev,
int *idx)
{
+ struct ndo_fdb_dump_context *ctx = (void *)cb->ctx;
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_fdb_entry *f;
int err = 0;
@@ -970,7 +971,7 @@ int br_fdb_dump(struct sk_buff *skb,
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
- if (*idx < cb->args[2])
+ if (*idx < ctx->fdb_idx)
goto skip;
if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) {
if (filter_dev != dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 45a8c3dd4a64..c7f3dea3e0eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -460,7 +460,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
+DEFINE_PER_CPU(struct page_pool *, system_page_pool);
#ifdef CONFIG_LOCKDEP
/*
@@ -4931,7 +4931,7 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
}
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
@@ -5033,7 +5033,7 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
}
static int
-netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)
{
struct sk_buff *skb = *pskb;
int err, hroom, troom;
@@ -5057,7 +5057,7 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
struct sk_buff *skb = *pskb;
u32 mac_len, act = XDP_DROP;
@@ -5110,7 +5110,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
* and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
* queues, so they do not have this starvation issue.
*/
-void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
@@ -5135,7 +5135,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
+int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
@@ -12152,11 +12152,18 @@ static int net_page_pool_create(int cpuid)
.nid = cpu_to_mem(cpuid),
};
struct page_pool *pp_ptr;
+ int err;
pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
if (IS_ERR(pp_ptr))
return -ENOMEM;
+ err = xdp_reg_page_pool(pp_ptr);
+ if (err) {
+ page_pool_destroy(pp_ptr);
+ return err;
+ }
+
per_cpu(system_page_pool, cpuid) = pp_ptr;
#endif
return 0;
@@ -12290,6 +12297,7 @@ out:
if (!pp_ptr)
continue;
+ xdp_unreg_page_pool(pp_ptr);
page_pool_destroy(pp_ptr);
per_cpu(system_page_pool, i) = NULL;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 6625b3f563a4..fac245065b0a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4348,9 +4348,9 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
EXPORT_SYMBOL_GPL(xdp_master_redirect);
static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
- struct net_device *dev,
+ const struct net_device *dev,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
@@ -4371,10 +4371,10 @@ err:
return err;
}
-static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
- struct net_device *dev,
- struct xdp_frame *xdpf,
- struct bpf_prog *xdp_prog)
+static __always_inline int
+__xdp_do_redirect_frame(struct bpf_redirect_info *ri, struct net_device *dev,
+ struct xdp_frame *xdpf,
+ const struct bpf_prog *xdp_prog)
{
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
@@ -4443,7 +4443,7 @@ err:
}
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
@@ -4457,7 +4457,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
EXPORT_SYMBOL_GPL(xdp_do_redirect);
int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
- struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
+ struct xdp_frame *xdpf,
+ const struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
@@ -4472,9 +4473,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, void *fwd,
- enum bpf_map_type map_type, u32 map_id,
- u32 flags)
+ const struct bpf_prog *xdp_prog,
+ void *fwd, enum bpf_map_type map_type,
+ u32 map_id, u32 flags)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
struct bpf_map *map;
@@ -4528,7 +4529,8 @@ err:
}
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+ struct xdp_buff *xdp,
+ const struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
enum bpf_map_type map_type = ri->map_type;
@@ -9075,7 +9077,8 @@ static bool xdp_is_valid_access(int off, int size,
return __is_valid_xdp_access(off, size);
}
-void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act)
+void bpf_warn_invalid_xdp_action(const struct net_device *dev,
+ const struct bpf_prog *prog, u32 act)
{
const u32 act_max = XDP_REDIRECT;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2e459b9d88eb..6f2647b000b8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -390,7 +390,7 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
EXPORT_SYMBOL(netpoll_send_skb);
-void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
int total_len, ip_len, udp_len;
struct sk_buff *skb;
@@ -414,7 +414,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb = find_skb(np, total_len + np->dev->needed_tailroom,
total_len - len);
if (!skb)
- return;
+ return -ENOMEM;
skb_copy_to_linear_data(skb, msg, len);
skb_put(skb, len);
@@ -490,7 +490,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb->dev = np->dev;
- netpoll_send_skb(np, skb);
+ return (int)netpoll_send_skb(np, skb);
}
EXPORT_SYMBOL(netpoll_send_udp);
@@ -634,7 +634,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
goto out;
}
- if (!rcu_access_pointer(ndev->npinfo)) {
+ npinfo = rtnl_dereference(ndev->npinfo);
+ if (!npinfo) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
@@ -654,7 +655,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
goto free_npinfo;
}
} else {
- npinfo = rtnl_dereference(ndev->npinfo);
refcount_inc(&npinfo->refcnt);
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f89cf93f6eb4..4c85b77cfdac 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -840,22 +840,22 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
EXPORT_SYMBOL(page_pool_put_unrefed_page);
/**
- * page_pool_put_page_bulk() - release references on multiple pages
+ * page_pool_put_netmem_bulk() - release references on multiple netmems
* @pool: pool from which pages were allocated
- * @data: array holding page pointers
- * @count: number of pages in @data
+ * @data: array holding netmem references
+ * @count: number of entries in @data
*
- * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
- * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
- * will release leftover pages to the page allocator.
- * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
+ * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring
+ * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk()
+ * will release leftover netmems to the memory provider.
+ * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx
* completion loop for the XDP_REDIRECT use case.
*
* Please note the caller must not use data area after running
- * page_pool_put_page_bulk(), as this function overwrites it.
+ * page_pool_put_netmem_bulk(), as this function overwrites it.
*/
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
+void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data,
+ u32 count)
{
int i, bulk_len = 0;
bool allow_direct;
@@ -864,7 +864,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
allow_direct = page_pool_napi_local(pool);
for (i = 0; i < count; i++) {
- netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i]));
+ netmem_ref netmem = netmem_compound_head(data[i]);
/* It is not the last user for the page frag case */
if (!page_pool_is_last_ref(netmem))
@@ -873,7 +873,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
netmem = __page_pool_put_page(pool, netmem, -1, allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
if (netmem)
- data[bulk_len++] = (__force void *)netmem;
+ data[bulk_len++] = netmem;
}
if (!bulk_len)
@@ -882,7 +882,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
/* Bulk producer into ptr_ring page_pool cache */
in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
- if (__ptr_ring_produce(&pool->ring, data[i])) {
+ if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) {
/* ring full */
recycle_stat_inc(pool, ring_full);
break;
@@ -899,9 +899,9 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
* since put_page() with refcnt == 1 can be an expensive operation
*/
for (; i < bulk_len; i++)
- page_pool_return_page(pool, (__force netmem_ref)data[i]);
+ page_pool_return_page(pool, data[i]);
}
-EXPORT_SYMBOL(page_pool_put_page_bulk);
+EXPORT_SYMBOL(page_pool_put_netmem_bulk);
static netmem_ref page_pool_drain_frag(struct page_pool *pool,
netmem_ref netmem)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7e23cacbe66e..ee95dbb0539a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3883,17 +3883,14 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
list_add_tail(&t->th_list, &pn->pktgen_threads);
init_completion(&t->start_done);
- p = kthread_create_on_node(pktgen_thread_worker,
- t,
- cpu_to_node(cpu),
- "kpktgend_%d", cpu);
+ p = kthread_create_on_cpu(pktgen_thread_worker, t, cpu, "kpktgend_%d");
if (IS_ERR(p)) {
pr_err("kthread_create_on_node() failed for cpu %d\n", t->cpu);
list_del(&t->th_list);
kfree(t);
return PTR_ERR(p);
}
- kthread_bind(p, cpu);
+
t->tsk = p;
pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ebcfc2debf1a..d9e363d9fa31 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4764,15 +4764,16 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
int *idx,
struct netdev_hw_addr_list *list)
{
+ struct ndo_fdb_dump_context *ctx = (void *)cb->ctx;
struct netdev_hw_addr *ha;
- int err;
u32 portid, seq;
+ int err;
portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
list_for_each_entry(ha, &list->list, list) {
- if (*idx < cb->args[2])
+ if (*idx < ctx->fdb_idx)
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
@@ -4911,18 +4912,16 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net_device *dev;
- struct net_device *br_dev = NULL;
- const struct net_device_ops *ops = NULL;
- const struct net_device_ops *cops = NULL;
+ const struct net_device_ops *ops = NULL, *cops = NULL;
+ struct ndo_fdb_dump_context *ctx = (void *)cb->ctx;
+ struct net_device *dev, *br_dev = NULL;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
int brport_idx = 0;
int br_idx = 0;
- int h, s_h;
- int idx = 0, s_idx;
- int err = 0;
int fidx = 0;
+ int err;
+
+ NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context);
if (cb->strict_check)
err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx,
@@ -4941,70 +4940,51 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
ops = br_dev->netdev_ops;
}
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
-
- if (brport_idx && (dev->ifindex != brport_idx))
- continue;
-
- if (!br_idx) { /* user did not specify a specific bridge */
- if (netif_is_bridge_port(dev)) {
- br_dev = netdev_master_upper_dev_get(dev);
- cops = br_dev->netdev_ops;
- }
- } else {
- if (dev != br_dev &&
- !netif_is_bridge_port(dev))
- continue;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ if (brport_idx && (dev->ifindex != brport_idx))
+ continue;
- if (br_dev != netdev_master_upper_dev_get(dev) &&
- !netif_is_bridge_master(dev))
- continue;
- cops = ops;
+ if (!br_idx) { /* user did not specify a specific bridge */
+ if (netif_is_bridge_port(dev)) {
+ br_dev = netdev_master_upper_dev_get(dev);
+ cops = br_dev->netdev_ops;
}
+ } else {
+ if (dev != br_dev &&
+ !netif_is_bridge_port(dev))
+ continue;
- if (idx < s_idx)
- goto cont;
+ if (br_dev != netdev_master_upper_dev_get(dev) &&
+ !netif_is_bridge_master(dev))
+ continue;
+ cops = ops;
+ }
- if (netif_is_bridge_port(dev)) {
- if (cops && cops->ndo_fdb_dump) {
- err = cops->ndo_fdb_dump(skb, cb,
- br_dev, dev,
- &fidx);
- if (err == -EMSGSIZE)
- goto out;
- }
+ if (netif_is_bridge_port(dev)) {
+ if (cops && cops->ndo_fdb_dump) {
+ err = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
+ &fidx);
+ if (err == -EMSGSIZE)
+ break;
}
+ }
- if (dev->netdev_ops->ndo_fdb_dump)
- err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
- dev, NULL,
- &fidx);
- else
- err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
- &fidx);
- if (err == -EMSGSIZE)
- goto out;
+ if (dev->netdev_ops->ndo_fdb_dump)
+ err = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
+ &fidx);
+ else
+ err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, &fidx);
+ if (err == -EMSGSIZE)
+ break;
- cops = NULL;
+ cops = NULL;
- /* reset fdb offset to 0 for rest of the interfaces */
- cb->args[2] = 0;
- fidx = 0;
-cont:
- idx++;
- }
+ /* reset fdb offset to 0 for rest of the interfaces */
+ ctx->fdb_idx = 0;
+ fidx = 0;
}
-out:
- cb->args[0] = h;
- cb->args[1] = idx;
- cb->args[2] = fidx;
+ ctx->fdb_idx = fidx;
return skb->len;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6841e61a6bd0..a441613a1e6c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1009,7 +1009,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_pp_cow_data);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
- struct bpf_prog *prog)
+ const struct bpf_prog *prog)
{
if (!prog->aux->xdp_has_frags)
return -EINVAL;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bcc5551c6424..938ad15c9857 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -358,6 +358,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
if (IS_ERR(xdp_alloc))
return PTR_ERR(xdp_alloc);
+ if (type == MEM_TYPE_XSK_BUFF_POOL && allocator)
+ xsk_pool_set_rxq_info(allocator, xdp_rxq);
+
if (trace_mem_connect_enabled() && xdp_alloc)
trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
@@ -365,6 +368,62 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+/**
+ * xdp_reg_page_pool - register &page_pool as a memory provider for XDP
+ * @pool: &page_pool to register
+ *
+ * Can be used to register pools manually without connecting to any XDP RxQ
+ * info, so that the XDP layer will be aware of them. Then, they can be
+ * attached to an RxQ info manually via xdp_rxq_info_attach_page_pool().
+ *
+ * Return: %0 on success, -errno on error.
+ */
+int xdp_reg_page_pool(struct page_pool *pool)
+{
+ struct xdp_mem_info mem;
+
+ return xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pool);
+}
+EXPORT_SYMBOL_GPL(xdp_reg_page_pool);
+
+/**
+ * xdp_unreg_page_pool - unregister &page_pool from the memory providers list
+ * @pool: &page_pool to unregister
+ *
+ * A shorthand for manual unregistering page pools. If the pool was previously
+ * attached to an RxQ info, it must be detached first.
+ */
+void xdp_unreg_page_pool(const struct page_pool *pool)
+{
+ struct xdp_mem_info mem = {
+ .type = MEM_TYPE_PAGE_POOL,
+ .id = pool->xdp_mem_id,
+ };
+
+ xdp_unreg_mem_model(&mem);
+}
+EXPORT_SYMBOL_GPL(xdp_unreg_page_pool);
+
+/**
+ * xdp_rxq_info_attach_page_pool - attach registered pool to RxQ info
+ * @xdp_rxq: XDP RxQ info to attach the pool to
+ * @pool: pool to attach
+ *
+ * If the pool was registered manually, this function must be called instead
+ * of xdp_rxq_info_reg_mem_model() to connect it to the RxQ info.
+ */
+void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
+ const struct page_pool *pool)
+{
+ struct xdp_mem_info mem = {
+ .type = MEM_TYPE_PAGE_POOL,
+ .id = pool->xdp_mem_id,
+ };
+
+ xdp_rxq_info_attach_mem_model(xdp_rxq, &mem);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
+
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolean
@@ -459,7 +518,7 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
if (unlikely(!xa || !bq->count))
return;
- page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count);
+ page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count);
/* bq->xa is not cleared to save lookup, if mem.id same in next bulk */
bq->count = 0;
}
@@ -500,12 +559,12 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
for (i = 0; i < sinfo->nr_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
- bq->q[bq->count++] = skb_frag_address(frag);
+ bq->q[bq->count++] = skb_frag_netmem(frag);
if (bq->count == XDP_BULK_QUEUE_SIZE)
xdp_flush_frame_bulk(bq);
}
}
- bq->q[bq->count++] = xdpf->data;
+ bq->q[bq->count++] = virt_to_netmem(xdpf->data);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ee0aaec4c8e0..5c9d1798e830 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1575,6 +1575,22 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
+/* dsa_supports_eee - indicate that EEE is supported
+ * @ds: pointer to &struct dsa_switch
+ * @port: port index
+ *
+ * A default implementation for the .support_eee() DSA operations member,
+ * which drivers can use to indicate that they support EEE on all of their
+ * user ports.
+ *
+ * Returns: true
+ */
+bool dsa_supports_eee(struct dsa_switch *ds, int port)
+{
+ return true;
+}
+EXPORT_SYMBOL_GPL(dsa_supports_eee);
+
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 06c30a9e29ff..4a8de48a6f24 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -515,12 +515,13 @@ dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid,
bool is_static, void *data)
{
struct dsa_user_dump_ctx *dump = data;
+ struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx;
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
- if (dump->idx < dump->cb->args[2])
+ if (dump->idx < ctx->fdb_idx)
goto skip;
nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
@@ -1228,8 +1229,12 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e)
struct dsa_switch *ds = dp->ds;
int ret;
+ /* Check whether the switch supports EEE */
+ if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index))
+ return -EOPNOTSUPP;
+
/* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev || !dp->pl)
+ if (!dev->phydev)
return -ENODEV;
if (!ds->ops->set_mac_eee)
@@ -1248,8 +1253,12 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e)
struct dsa_switch *ds = dp->ds;
int ret;
+ /* Check whether the switch supports EEE */
+ if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index))
+ return -EOPNOTSUPP;
+
/* Port's PHY and MAC both need to be EEE capable */
- if (!dev->phydev || !dp->pl)
+ if (!dev->phydev)
return -ENODEV;
if (!ds->ops->get_mac_eee)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0065b1996c94..a59204a8d850 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1169,7 +1169,10 @@ alloc_new_skb:
/* [!] NOTE: copy will be negative if pagedlen>0
* because then the equation reduces to -fraggap.
*/
- if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+ if (copy > 0 &&
+ INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
@@ -1213,8 +1216,9 @@ alloc_new_skb:
unsigned int off;
off = skb->len;
- if (getfrag(from, skb_put(skb, copy),
- offset, copy, off, skb) < 0) {
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
@@ -1252,7 +1256,8 @@ alloc_new_skb:
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (getfrag(from,
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a79b2a52ce01..42cb5dc9cb24 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,6 +45,7 @@ static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX;
static int tcp_plb_max_rounds = 31;
static int tcp_plb_max_cong_thresh = 256;
+static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1066,6 +1067,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = SYSCTL_TWO,
},
{
+ .procname = "tcp_tw_reuse_delay",
+ .data = &init_net.ipv4.sysctl_tcp_tw_reuse_delay,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &tcp_tw_reuse_delay_max,
+ },
+ {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a38c8b1f44db..e45222d5fc2e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -120,6 +120,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
int ts_recent_stamp;
+ u32 reuse_thresh;
if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2)
reuse = 0;
@@ -162,9 +163,10 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
+ reuse_thresh = READ_ONCE(tw->tw_entry_stamp) +
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse_delay);
if (ts_recent_stamp &&
- (!twp || (reuse && time_after32(ktime_get_seconds(),
- ts_recent_stamp)))) {
+ (!twp || (reuse && time_after32(tcp_clock_ms(), reuse_thresh)))) {
/* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk
* and releasing the bucket lock.
*/
@@ -3457,6 +3459,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_tcp_tw_reuse = 2;
+ net->ipv4.sysctl_tcp_tw_reuse_delay = 1 * MSEC_PER_SEC;
net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7121d8573928..b089b08e9617 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -157,8 +157,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
rcv_nxt);
if (tmp_opt.saw_tstamp) {
+ u64 ts = tcp_clock_ms();
+
+ WRITE_ONCE(tw->tw_entry_stamp, ts);
WRITE_ONCE(tcptw->tw_ts_recent_stamp,
- ktime_get_seconds());
+ div_u64(ts, MSEC_PER_SEC));
WRITE_ONCE(tcptw->tw_ts_recent,
tmp_opt.rcv_tsval);
}
@@ -316,6 +319,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_mark = sk->sk_mark;
tw->tw_priority = READ_ONCE(sk->sk_priority);
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
+ /* refreshed when we enter true TIME-WAIT state */
+ tw->tw_entry_stamp = tcp_time_stamp_ms(tp);
tcptw->tw_rcv_nxt = tp->rcv_nxt;
tcptw->tw_snd_nxt = tp->snd_nxt;
tcptw->tw_rcv_wnd = tcp_receive_window(tp);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 9d8422e350f8..28e5a89dc255 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -253,14 +253,15 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
}
static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
- struct ioam6_lwt_encap *tuninfo)
+ struct ioam6_lwt_encap *tuninfo,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -291,7 +292,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
bool has_tunsrc,
struct in6_addr *tunsrc,
- struct in6_addr *tundst)
+ struct in6_addr *tundst,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr, *inner_hdr;
@@ -300,7 +302,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
len = sizeof(*hdr) + hdrlen;
- err = skb_cow_head(skb, len + skb->mac_len);
+ err = skb_cow_head(skb, len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -334,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
+ struct dst_entry *dst = skb_dst(skb), *cache_dst;
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
@@ -352,6 +354,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
orig_daddr = ipv6_hdr(skb)->daddr;
+ local_bh_disable();
+ cache_dst = dst_cache_get(&ilwt->cache);
+ local_bh_enable();
+
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
do_inline:
@@ -359,7 +365,7 @@ do_inline:
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
if (unlikely(err))
goto drop;
@@ -369,7 +375,7 @@ do_encap:
/* Encapsulation (ip6ip6) */
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
ilwt->has_tunsrc, &ilwt->tunsrc,
- &ilwt->tundst);
+ &ilwt->tundst, cache_dst);
if (unlikely(err))
goto drop;
@@ -387,41 +393,36 @@ do_encap:
goto drop;
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
+ if (unlikely(!cache_dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ cache_dst = ip6_route_output(net, NULL, &fl6);
+ if (cache_dst->error) {
+ err = cache_dst->error;
+ dst_release(cache_dst);
+ goto drop;
+ }
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
local_bh_disable();
- dst = dst_cache_get(&ilwt->cache);
+ dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
local_bh_enable();
- if (unlikely(!dst)) {
- struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = hdr->daddr;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- err = dst->error;
- dst_release(dst);
- goto drop;
- }
-
- local_bh_disable();
- dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
- local_bh_enable();
- }
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
+ if (unlikely(err))
+ goto drop;
+ }
+ if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
skb_dst_drop(skb);
- skb_dst_set(skb, dst);
-
+ skb_dst_set(skb, cache_dst);
return dst_output(net, sk, skb);
}
out:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f7b4608bb316..3d672dea9f56 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1697,8 +1697,9 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
if (copy > 0 &&
- getfrag(from, data + transhdrlen, offset,
- copy, fraggap, skb) < 0) {
+ INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
@@ -1742,8 +1743,9 @@ alloc_new_skb:
unsigned int off;
off = skb->len;
- if (getfrag(from, skb_put(skb, copy),
- offset, copy, off, skb) < 0) {
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
@@ -1781,7 +1783,8 @@ alloc_new_skb:
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (getfrag(from,
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b244dbf61d5f..5ca8692d565d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1021,29 +1021,31 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
rcu_read_lock();
idev = __in6_dev_get(dev);
- if (idev) {
- for_each_mc_rcu(idev, mc) {
- if (ipv6_addr_equal(&mc->mca_addr, group))
- break;
- }
- if (mc) {
- if (src_addr && !ipv6_addr_any(src_addr)) {
- struct ip6_sf_list *psf;
+ if (!idev)
+ goto unlock;
+ for_each_mc_rcu(idev, mc) {
+ if (ipv6_addr_equal(&mc->mca_addr, group))
+ break;
+ }
+ if (!mc)
+ goto unlock;
+ if (src_addr && !ipv6_addr_any(src_addr)) {
+ struct ip6_sf_list *psf;
- for_each_psf_rcu(mc, psf) {
- if (ipv6_addr_equal(&psf->sf_addr, src_addr))
- break;
- }
- if (psf)
- rv = psf->sf_count[MCAST_INCLUDE] ||
- psf->sf_count[MCAST_EXCLUDE] !=
- mc->mca_sfcount[MCAST_EXCLUDE];
- else
- rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
- } else
- rv = true; /* don't filter unspecified source */
+ for_each_psf_rcu(mc, psf) {
+ if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+ break;
}
+ if (psf)
+ rv = READ_ONCE(psf->sf_count[MCAST_INCLUDE]) ||
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]) !=
+ READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]);
+ else
+ rv = READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]) != 0;
+ } else {
+ rv = true; /* don't filter unspecified source */
}
+unlock:
rcu_read_unlock();
return rv;
}
@@ -2285,7 +2287,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
/* source filter not found, or count wrong => bug */
return -ESRCH;
}
- psf->sf_count[sfmode]--;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] - 1);
if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
struct inet6_dev *idev = pmc->idev;
@@ -2391,7 +2393,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
rcu_assign_pointer(pmc->mca_sources, psf);
}
}
- psf->sf_count[sfmode]++;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] + 1);
return 0;
}
@@ -2503,7 +2505,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
sf_markstate(pmc);
isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
if (!delta)
- pmc->mca_sfcount[sfmode]++;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] + 1);
err = 0;
for (i = 0; i < sfcount; i++) {
err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
@@ -2514,7 +2517,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int j;
if (!delta)
- pmc->mca_sfcount[sfmode]--;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] - 1);
for (j = 0; j < i; j++)
ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
@@ -2559,7 +2563,8 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
RCU_INIT_POINTER(pmc->mca_sources, NULL);
pmc->mca_sfmode = MCAST_EXCLUDE;
pmc->mca_sfcount[MCAST_INCLUDE] = 0;
- pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ /* Paired with the READ_ONCE() from ipv6_chk_mcast_addr() */
+ WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1);
}
/* called with mc_lock */
@@ -3074,8 +3079,8 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name,
&state->im->mca_addr,
&psf->sf_addr,
- psf->sf_count[MCAST_INCLUDE],
- psf->sf_count[MCAST_EXCLUDE]);
+ READ_ONCE(psf->sf_count[MCAST_INCLUDE]),
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]));
}
return 0;
}
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index db3c19a42e1c..7ba22d2f2bfe 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt)
}
static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
- const struct ipv6_rpl_sr_hdr *srh)
+ const struct ipv6_rpl_sr_hdr *srh,
+ struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
const struct ipv6hdr *oldhdr;
@@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
hdrlen = ((csrh->hdrlen + 1) << 3);
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err)) {
kfree(buf);
return err;
@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
return 0;
}
-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
+static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct rpl_iptunnel_encap *tinfo;
@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
tinfo = rpl_encap_lwtunnel(dst->lwtstate);
- return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
+ return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst);
}
static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
-
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
local_bh_enable();
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -237,15 +239,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
local_bh_enable();
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
return dst_output(net, sk, skb);
drop:
@@ -262,29 +264,31 @@ static int rpl_input(struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
-
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
+ local_bh_enable();
+
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err))
+ goto drop;
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
return dst_input(skb);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 098632adc9b5..4bf937bfc263 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
return flowlabel;
}
-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
return 0;
}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+{
+ return __seg6_do_srh_encap(skb, osrh, proto, NULL);
+}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
static int seg6_do_srh_encap_red(struct sk_buff *skb,
- struct ipv6_sr_hdr *osrh, int proto)
+ struct ipv6_sr_hdr *osrh, int proto,
+ struct dst_entry *cache_dst)
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
tot_len = red_hdrlen + sizeof(struct ipv6hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -317,8 +324,8 @@ out:
return 0;
}
-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
-static int seg6_do_srh(struct sk_buff *skb)
+static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
- err = seg6_do_srh_inline(skb, tinfo->srh);
+ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
if (err)
return err;
break;
@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb)
return -EINVAL;
if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ proto, cache_dst);
else
- err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ proto, cache_dst);
if (err)
return err;
@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_push(skb, skb->mac_len);
if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ IPPROTO_ETHERNET,
+ cache_dst);
if (err)
return err;
@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb)
return 0;
}
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ return __seg6_do_srh_inline(skb, osrh, NULL);
+}
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -458,31 +475,33 @@ static int seg6_input_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -528,16 +547,16 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
local_bh_enable();
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -559,15 +578,15 @@ static int seg6_output_core(struct net *net, struct sock *sk,
local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
local_bh_enable();
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index d692b902e120..e83691073496 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -73,9 +73,9 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
int ret = l2tp_xmit_skb(session, skb);
if (likely(ret == NET_XMIT_SUCCESS))
- dev_sw_netstats_tx_add(dev, 1, len);
+ dev_dstats_tx_add(dev, len);
else
- DEV_STATS_INC(dev, tx_dropped);
+ dev_dstats_tx_dropped(dev);
return NETDEV_TX_OK;
}
@@ -84,7 +84,6 @@ static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_init = l2tp_eth_dev_init,
.ndo_uninit = l2tp_eth_dev_uninit,
.ndo_start_xmit = l2tp_eth_dev_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
};
@@ -100,7 +99,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
dev->lltx = true;
dev->netdev_ops = &l2tp_eth_netdev_ops;
dev->needs_free_netdev = true;
- dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
}
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
@@ -128,7 +127,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
goto error_rcu;
if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS)
- dev_sw_netstats_rx_add(dev, data_len);
+ dev_dstats_rx_add(dev, data_len);
else
DEV_STATS_INC(dev, rx_errors);
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 26ce34b7e88e..8e0724c56723 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -20,8 +20,7 @@
#include <net/sock.h>
struct mctp_dump_cb {
- int h;
- int idx;
+ unsigned long ifindex;
size_t a_idx;
};
@@ -115,43 +114,29 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct mctp_dump_cb *mcb = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
struct net_device *dev;
struct ifaddrmsg *hdr;
struct mctp_dev *mdev;
- int ifindex;
- int idx = 0, rc;
+ int ifindex, rc;
hdr = nlmsg_data(cb->nlh);
// filter by ifindex if requested
ifindex = hdr->ifa_index;
rcu_read_lock();
- for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
- idx = 0;
- head = &net->dev_index_head[mcb->h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx >= mcb->idx &&
- (ifindex == 0 || ifindex == dev->ifindex)) {
- mdev = __mctp_dev_get(dev);
- if (mdev) {
- rc = mctp_dump_dev_addrinfo(mdev,
- skb, cb);
- mctp_dev_put(mdev);
- // Error indicates full buffer, this
- // callback will get retried.
- if (rc < 0)
- goto out;
- }
- }
- idx++;
- // reset for next iteration
- mcb->a_idx = 0;
- }
+ for_each_netdev_dump(net, dev, mcb->ifindex) {
+ if (ifindex && ifindex != dev->ifindex)
+ continue;
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ continue;
+ rc = mctp_dump_dev_addrinfo(mdev, skb, cb);
+ mctp_dev_put(mdev);
+ if (rc < 0)
+ break;
+ mcb->a_idx = 0;
}
-out:
rcu_read_unlock();
- mcb->idx = idx;
return skb->len;
}
@@ -531,9 +516,12 @@ static struct notifier_block mctp_dev_nb = {
};
static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = {
- {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0},
- {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0},
- {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_NEWADDR,
+ .doit = mctp_rtm_newaddr},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_DELADDR,
+ .doit = mctp_rtm_deladdr},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_GETADDR,
+ .dumpit = mctp_dump_addrinfo},
};
int __init mctp_device_init(void)
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index ac5caf5a48e1..210b75e3179e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -16,6 +16,7 @@ rxrpc-y := \
conn_object.o \
conn_service.o \
input.o \
+ input_rack.o \
insecure.o \
io_thread.o \
key.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9d8bd0b37e41..86873399f7d5 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -408,9 +408,9 @@ void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call)
/* Make sure we're not going to call back into a kernel service */
if (call->notify_rx) {
- spin_lock(&call->notify_lock);
+ spin_lock_irq(&call->notify_lock);
call->notify_rx = rxrpc_dummy_notify_rx;
- spin_unlock(&call->notify_lock);
+ spin_unlock_irq(&call->notify_lock);
}
}
mutex_unlock(&call->user_mutex);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index d0fd37bdcfe9..0c0a3c89dba3 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -30,6 +30,7 @@ struct rxrpc_crypt {
struct key_preparsed_payload;
struct rxrpc_connection;
struct rxrpc_txbuf;
+struct rxrpc_txqueue;
/*
* Mark applied to socket buffers in skb->mark. skb->priority is used
@@ -98,6 +99,7 @@ struct rxrpc_net {
atomic_t stat_tx_data_send;
atomic_t stat_tx_data_send_frag;
atomic_t stat_tx_data_send_fail;
+ atomic_t stat_tx_data_send_msgsize;
atomic_t stat_tx_data_underflow;
atomic_t stat_tx_data_cwnd_reset;
atomic_t stat_rx_data;
@@ -109,6 +111,8 @@ struct rxrpc_net {
atomic_t stat_tx_ack_skip;
atomic_t stat_tx_acks[256];
atomic_t stat_rx_acks[256];
+ atomic_t stat_tx_jumbo[10];
+ atomic_t stat_rx_jumbo[10];
atomic_t stat_why_req_ack[8];
@@ -210,9 +214,8 @@ struct rxrpc_skb_priv {
rxrpc_seq_t first_ack; /* First packet in acks table */
rxrpc_seq_t prev_ack; /* Highest seq seen */
rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */
+ u16 nr_acks; /* Number of acks+nacks */
u8 reason; /* Reason for ack */
- u8 nr_acks; /* Number of acks+nacks */
- u8 nr_nacks; /* Number of nacks */
} ack;
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
@@ -320,6 +323,12 @@ struct rxrpc_local {
struct list_head new_client_calls; /* Newly created client calls need connection */
spinlock_t client_call_lock; /* Lock for ->new_client_calls */
struct sockaddr_rxrpc srx; /* local address */
+ /* Provide a kvec table sufficiently large to manage either a DATA
+ * packet with a maximum set of jumbo subpackets or a PING ACK padded
+ * out to 64K with zeropages for PMTUD.
+ */
+ struct kvec kvec[RXRPC_MAX_NR_JUMBO > 3 + 16 ?
+ RXRPC_MAX_NR_JUMBO : 3 + 16];
};
/*
@@ -338,25 +347,28 @@ struct rxrpc_peer {
time64_t last_tx_at; /* Last time packet sent here */
seqlock_t service_conn_lock;
spinlock_t lock; /* access lock */
- unsigned int if_mtu; /* interface MTU for this peer */
- unsigned int mtu; /* network MTU for this peer */
- unsigned int maxdata; /* data size (MTU - hdrsize) */
- unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
int debug_id; /* debug ID for printks */
struct sockaddr_rxrpc srx; /* remote address */
- /* calculated RTT cache */
-#define RXRPC_RTT_CACHE_SIZE 32
- spinlock_t rtt_input_lock; /* RTT lock for input routine */
- ktime_t rtt_last_req; /* Time of last RTT request */
- unsigned int rtt_count; /* Number of samples we've got */
+ /* Path MTU discovery [RFC8899] */
+ unsigned int pmtud_trial; /* Current MTU probe size */
+ unsigned int pmtud_good; /* Largest working MTU probe we've tried */
+ unsigned int pmtud_bad; /* Smallest non-working MTU probe we've tried */
+ bool pmtud_lost; /* T if MTU probe was lost */
+ bool pmtud_probing; /* T if we have an active probe outstanding */
+ bool pmtud_pending; /* T if a call to this peer should send a probe */
+ u8 pmtud_jumbo; /* Max jumbo packets for the MTU */
+ bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */
+ unsigned int ackr_max_data; /* Maximum data advertised by peer */
+ seqcount_t mtu_lock; /* Lockless MTU access management */
+ unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */
+ unsigned int max_data; /* Maximum packet data capacity for this peer */
+ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
+ unsigned short tx_seg_max; /* Maximum number of transmissable segments */
- u32 srtt_us; /* smoothed round trip time << 3 in usecs */
- u32 mdev_us; /* medium deviation */
- u32 mdev_max_us; /* maximal mdev for the last rtt period */
- u32 rttvar_us; /* smoothed mdev_max */
- u32 rto_us; /* Retransmission timeout in usec */
- u8 backoff; /* Backoff timeout (as shift) */
+ /* Calculated RTT cache */
+ unsigned int recent_srtt_us;
+ unsigned int recent_rto_us;
u8 cong_ssthresh; /* Congestion slow-start threshold */
};
@@ -525,6 +537,8 @@ struct rxrpc_connection {
int debug_id; /* debug ID for printks */
rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
+ rxrpc_serial_t pmtud_probe; /* Serial of MTU probe (or 0) */
+ unsigned int pmtud_call; /* ID of call used for probe */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
u8 security_ix; /* security type */
@@ -599,13 +613,25 @@ enum rxrpc_call_state {
/*
* Call Tx congestion management modes.
*/
-enum rxrpc_congest_mode {
- RXRPC_CALL_SLOW_START,
- RXRPC_CALL_CONGEST_AVOIDANCE,
- RXRPC_CALL_PACKET_LOSS,
- RXRPC_CALL_FAST_RETRANSMIT,
- NR__RXRPC_CONGEST_MODES
-};
+enum rxrpc_ca_state {
+ RXRPC_CA_SLOW_START,
+ RXRPC_CA_CONGEST_AVOIDANCE,
+ RXRPC_CA_PACKET_LOSS,
+ RXRPC_CA_FAST_RETRANSMIT,
+ NR__RXRPC_CA_STATES
+} __mode(byte);
+
+/*
+ * Current purpose of call RACK timer. According to the RACK-TLP protocol
+ * [RFC8985], the transmission timer (call->rack_timo_at) may only be used for
+ * one of these at once.
+ */
+enum rxrpc_rack_timer_mode {
+ RXRPC_CALL_RACKTIMER_OFF, /* Timer not running */
+ RXRPC_CALL_RACKTIMER_RACK_REORDER, /* RACK reordering timer */
+ RXRPC_CALL_RACKTIMER_TLP_PTO, /* TLP timeout */
+ RXRPC_CALL_RACKTIMER_RTO, /* Retransmission timeout */
+} __mode(byte);
/*
* RxRPC call definition
@@ -624,8 +650,7 @@ struct rxrpc_call {
struct mutex user_mutex; /* User access mutex */
struct sockaddr_rxrpc dest_srx; /* Destination address */
ktime_t delay_ack_at; /* When DELAY ACK needs to happen */
- ktime_t ack_lost_at; /* When ACK is figured as lost */
- ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t rack_timo_at; /* When ACK is figured as lost */
ktime_t ping_at; /* When next to send a ping */
ktime_t keepalive_at; /* When next to send a keepalive ping */
ktime_t expect_rx_by; /* When we expect to get a packet by */
@@ -670,21 +695,30 @@ struct rxrpc_call {
unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
unsigned short rx_pkt_len; /* Current recvmsg packet len */
+ /* Sendmsg data tracking. */
+ rxrpc_seq_t send_top; /* Highest Tx slot filled by sendmsg. */
+ struct rxrpc_txqueue *send_queue; /* Queue that sendmsg is writing into */
+
/* Transmitted data tracking. */
- spinlock_t tx_lock; /* Transmit queue lock */
- struct list_head tx_sendmsg; /* Sendmsg prepared packets */
- struct list_head tx_buffer; /* Buffer of transmissible packets */
+ struct rxrpc_txqueue *tx_queue; /* Start of transmission buffers */
+ struct rxrpc_txqueue *tx_qtail; /* End of transmission buffers */
+ rxrpc_seq_t tx_qbase; /* First slot in tx_queue */
rxrpc_seq_t tx_bottom; /* First packet in buffer */
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
- rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
+ rxrpc_serial_t tx_last_serial; /* Serial of last DATA transmitted */
u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */
- u8 tx_winsize; /* Maximum size of Tx window */
+ u16 tx_nr_sent; /* Number of packets sent, but unacked */
+ u16 tx_nr_lost; /* Number of packets marked lost */
+ u16 tx_nr_resent; /* Number of packets resent, but unacked */
+ u16 tx_winsize; /* Maximum size of Tx window */
#define RXRPC_TX_MAX_WINDOW 128
+ u8 tx_jumbo_max; /* Maximum subpkts peer will accept */
ktime_t tx_last_sent; /* Last time a transmission occurred */
/* Received data tracking */
struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */
+ struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */
struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */
rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */
@@ -698,14 +732,32 @@ struct rxrpc_call {
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
#define RXRPC_MIN_CWND 4
- u8 cong_cwnd; /* Congestion window size */
+ enum rxrpc_ca_state cong_ca_state; /* Congestion control state */
u8 cong_extra; /* Extra to send for congestion management */
- u8 cong_ssthresh; /* Slow-start threshold */
- enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */
- u8 cong_dup_acks; /* Count of ACKs showing missing packets */
- u8 cong_cumul_acks; /* Cumulative ACK count */
+ u16 cong_cwnd; /* Congestion window size */
+ u16 cong_ssthresh; /* Slow-start threshold */
+ u16 cong_dup_acks; /* Count of ACKs showing missing packets */
+ u16 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
- struct sk_buff *cong_last_nack; /* Last ACK with nacks received */
+
+ /* RACK-TLP [RFC8985] state. */
+ ktime_t rack_xmit_ts; /* Latest transmission timestamp */
+ ktime_t rack_rtt; /* RTT of most recently ACK'd segment */
+ ktime_t rack_rtt_ts; /* Timestamp of rack_rtt */
+ ktime_t rack_reo_wnd; /* Reordering window */
+ unsigned int rack_reo_wnd_mult; /* Multiplier applied to rack_reo_wnd */
+ int rack_reo_wnd_persist; /* Num loss recoveries before reset reo_wnd */
+ rxrpc_seq_t rack_fack; /* Highest sequence so far ACK'd */
+ rxrpc_seq_t rack_end_seq; /* Highest sequence seen */
+ rxrpc_seq_t rack_dsack_round; /* DSACK opt recv'd in latest roundtrip */
+ bool rack_dsack_round_none; /* T if dsack_round is "None" */
+ bool rack_reordering_seen; /* T if detected reordering event */
+ enum rxrpc_rack_timer_mode rack_timer_mode; /* Current mode of RACK timer */
+ bool tlp_is_retrans; /* T if unacked TLP retransmission */
+ rxrpc_serial_t tlp_serial; /* Serial of TLP probe (or 0 if none in progress) */
+ rxrpc_seq_t tlp_seq; /* Sequence of TLP probe */
+ unsigned int tlp_rtt_taken; /* Last time RTT taken */
+ ktime_t tlp_max_ack_delay; /* Sender budget for max delayed ACK interval */
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
@@ -730,32 +782,45 @@ struct rxrpc_call {
/* Transmission-phase ACK management (ACKs we've received). */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
- rxrpc_seq_t acks_first_seq; /* first sequence number received */
+ rxrpc_seq_t acks_hard_ack; /* Highest sequence hard acked */
rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
- rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */
+ unsigned short acks_nr_sacks; /* Number of soft acks recorded */
+ unsigned short acks_nr_snacks; /* Number of soft nacks recorded */
+
+ /* Calculated RTT cache */
+ ktime_t rtt_last_req; /* Time of last RTT request */
+ unsigned int rtt_count; /* Number of samples we've got */
+ unsigned int rtt_taken; /* Number of samples taken (wrapping) */
+ struct minmax min_rtt; /* Estimated minimum RTT */
+ u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ u32 mdev_us; /* medium deviation */
+ u32 mdev_max_us; /* maximal mdev for the last rtt period */
+ u32 rttvar_us; /* smoothed mdev_max */
+ u32 rto_us; /* Retransmission timeout in usec */
+ u8 backoff; /* Backoff timeout (as shift) */
};
/*
* Summary of a new ACK and the changes it made to the Tx buffer packet states.
*/
struct rxrpc_ack_summary {
- u16 nr_acks; /* Number of ACKs in packet */
- u16 nr_new_acks; /* Number of new ACKs in packet */
- u16 nr_new_nacks; /* Number of new nacks in packet */
- u16 nr_retained_nacks; /* Number of nacks retained between ACKs */
- u8 ack_reason;
- bool saw_nacks; /* Saw NACKs in packet */
- bool new_low_nack; /* T if new low NACK found */
- bool retrans_timeo; /* T if reTx due to timeout happened */
- u8 flight_size; /* Number of unreceived transmissions */
- /* Place to stash values for tracing */
- enum rxrpc_congest_mode mode:8;
- u8 cwnd;
- u8 ssthresh;
- u8 dup_acks;
- u8 cumulative_acks;
+ rxrpc_serial_t ack_serial; /* Serial number of ACK */
+ rxrpc_serial_t acked_serial; /* Serial number ACK'd */
+ u16 in_flight; /* Number of unreceived transmissions */
+ u16 nr_new_hacks; /* Number of rotated new ACKs */
+ u16 nr_new_sacks; /* Number of new soft ACKs in packet */
+ u16 nr_new_snacks; /* Number of new soft nacks in packet */
+ u8 ack_reason;
+ bool new_low_snack:1; /* T if new low soft NACK found */
+ bool retrans_timeo:1; /* T if reTx due to timeout happened */
+ bool need_retransmit:1; /* T if we need transmission */
+ bool rtt_sample_avail:1; /* T if RTT sample available */
+ bool in_fast_or_rto_recovery:1;
+ bool exiting_fast_or_rto_recovery:1;
+ bool tlp_probe_acked:1; /* T if the TLP probe seq was acked */
+ u8 /*enum rxrpc_congest_change*/ change;
};
/*
@@ -793,25 +858,23 @@ struct rxrpc_send_params {
* Buffer of data to be output as a packet.
*/
struct rxrpc_txbuf {
- struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
- struct list_head tx_link; /* Link in live Enc queue or Tx queue */
- ktime_t last_sent; /* Time at which last transmitted */
refcount_t ref;
rxrpc_seq_t seq; /* Sequence number of this packet */
rxrpc_serial_t serial; /* Last serial number transmitted with */
unsigned int call_debug_id;
unsigned int debug_id;
- unsigned int len; /* Amount of data in buffer */
- unsigned int space; /* Remaining data space */
- unsigned int offset; /* Offset of fill point */
+ unsigned short len; /* Amount of data in buffer */
+ unsigned short space; /* Remaining data space */
+ unsigned short offset; /* Offset of fill point */
+ unsigned short pkt_len; /* Size of packet content */
+ unsigned short alloc_size; /* Amount of bufferage allocated */
unsigned int flags;
#define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */
#define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */
__be16 cksum; /* Checksum to go in header */
- unsigned short ack_rwind; /* ACK receive window */
- u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */
+ bool jumboable; /* Can be non-terminal jumbo subpacket */
u8 nr_kvec; /* Amount of kvec[] used */
- struct kvec kvec[3];
+ struct kvec kvec[1];
};
static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb)
@@ -824,6 +887,46 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
return !rxrpc_sending_to_server(txb);
}
+/*
+ * Transmit queue element, including RACK [RFC8985] per-segment metadata. The
+ * transmission timestamp is in usec from the base.
+ */
+struct rxrpc_txqueue {
+ /* Start with the members we want to prefetch. */
+ struct rxrpc_txqueue *next;
+ ktime_t xmit_ts_base;
+ rxrpc_seq_t qbase;
+ u8 nr_reported_acks; /* Number of segments explicitly acked/nacked */
+ unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */
+ unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */
+ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */
+ unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */
+ unsigned long ever_retransmitted; /* Bit-per-buf: Set if ever retransmitted */
+
+ /* The arrays we want to pack into as few cache lines as possible. */
+ struct {
+#define RXRPC_NR_TXQUEUE BITS_PER_LONG
+#define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1)
+ struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE];
+ unsigned int segment_serial[RXRPC_NR_TXQUEUE];
+ unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE];
+ } ____cacheline_aligned;
+};
+
+/*
+ * Data transmission request.
+ */
+struct rxrpc_send_data_req {
+ ktime_t now; /* Current time */
+ struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */
+ rxrpc_seq_t seq; /* Sequence of first data */
+ int n; /* Number of DATA packets to glue into jumbo */
+ bool retrans; /* T if this is a retransmission */
+ bool did_send; /* T if did actually send */
+ bool tlp_probe; /* T if this is a TLP probe */
+ int /* enum rxrpc_txdata_trace */ trace;
+};
+
#include <trace/events/rxrpc.h>
/*
@@ -841,6 +944,21 @@ static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn
}
/*
+ * Allocate the next serial n numbers on a connection. 0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serials(struct rxrpc_connection *conn,
+ unsigned int n)
+{
+ rxrpc_serial_t serial;
+
+ serial = conn->tx_serial;
+ if (serial + n <= n)
+ serial = 1;
+ conn->tx_serial = serial + n;
+ return serial;
+}
+
+/*
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_rx_skbs;
@@ -865,10 +983,10 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why);
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
-void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
-void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
-
-bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
+void rxrpc_resend_tlp(struct rxrpc_call *call);
+void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit,
+ enum rxrpc_txdata_trace trace);
+bool rxrpc_input_call_event(struct rxrpc_call *call);
/*
* call_object.c
@@ -1047,6 +1165,32 @@ void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
/*
+ * input_rack.c
+ */
+void rxrpc_input_rack_one(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned int ix);
+void rxrpc_input_rack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned long new_acks);
+void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary);
+ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now);
+void rxrpc_tlp_send_probe(struct rxrpc_call *call);
+void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary);
+void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by);
+
+/* Initialise TLP state [RFC8958 7.1]. */
+static inline void rxrpc_tlp_init(struct rxrpc_call *call)
+{
+ call->tlp_serial = 0;
+ call->tlp_seq = call->acks_hard_ack;
+ call->tlp_is_retrans = false;
+}
+
+/*
* io_thread.c
*/
int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
@@ -1149,17 +1293,20 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
*/
void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why);
+void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call);
int rxrpc_send_abort_packet(struct rxrpc_call *);
+void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req);
void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
-void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
/*
* peer_event.c
*/
void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
+void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial,
+ bool sendmsg_fail);
/*
* peer_object.c
@@ -1208,10 +1355,12 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
/*
* rtt.c
*/
-void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int,
- rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
-ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans);
-void rxrpc_peer_init_rtt(struct rxrpc_peer *);
+void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+ int rtt_slot,
+ rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+ ktime_t send_time, ktime_t resp_time);
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans);
+void rxrpc_call_init_rtt(struct rxrpc_call *call);
/*
* rxkad.c
@@ -1284,7 +1433,6 @@ static inline void rxrpc_sysctl_exit(void) {}
extern atomic_t rxrpc_nr_txbuf;
struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
size_t data_align, gfp_t gfp);
-struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size);
void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
@@ -1311,6 +1459,53 @@ static inline bool after_eq(u32 seq1, u32 seq2)
return (s32)(seq1 - seq2) >= 0;
}
+static inline u32 earliest(u32 seq1, u32 seq2)
+{
+ return before(seq1, seq2) ? seq1 : seq2;
+}
+
+static inline u32 latest(u32 seq1, u32 seq2)
+{
+ return after(seq1, seq2) ? seq1 : seq2;
+}
+
+static inline bool rxrpc_seq_in_txq(const struct rxrpc_txqueue *tq, rxrpc_seq_t seq)
+{
+ return (seq & (RXRPC_NR_TXQUEUE - 1)) == tq->qbase;
+}
+
+static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ rxrpc_get_skb(skb, rxrpc_skb_get_call_rx);
+ __skb_queue_tail(&call->rx_queue, skb);
+ rxrpc_poke_call(call, rxrpc_call_poke_rx_packet);
+}
+
+/*
+ * Calculate how much space there is for transmitting more DATA packets.
+ */
+static inline unsigned int rxrpc_tx_window_space(const struct rxrpc_call *call)
+{
+ int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra);
+ int transmitted = call->tx_top - call->tx_bottom;
+
+ return max(winsize - transmitted, 0);
+}
+
+static inline unsigned int rxrpc_left_out(const struct rxrpc_call *call)
+{
+ return call->acks_nr_sacks + call->tx_nr_lost;
+}
+
+/*
+ * Calculate the number of transmitted DATA packets assumed to be in flight
+ * [approx RFC6675].
+ */
+static inline unsigned int rxrpc_tx_in_flight(const struct rxrpc_call *call)
+{
+ return call->tx_nr_sent - rxrpc_left_out(call) + call->tx_nr_resent;
+}
+
/*
* debug tracing
*/
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0f5a1d77b890..e685034ce4f7 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -188,8 +188,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
/* Make sure that there aren't any incoming calls in progress before we
* clear the preallocation buffers.
*/
- spin_lock(&rx->incoming_lock);
- spin_unlock(&rx->incoming_lock);
+ spin_lock_irq(&rx->incoming_lock);
+ spin_unlock_irq(&rx->incoming_lock);
head = b->peer_backlog_head;
tail = b->peer_backlog_tail;
@@ -343,7 +343,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call);
- read_lock(&local->services_lock);
+ read_lock_irq(&local->services_lock);
/* Weed out packets to services we're not offering. Packets that would
* begin a call are explicitly rejected and the rest are just
@@ -399,34 +399,34 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_unlock(&conn->state_lock);
spin_unlock(&rx->incoming_lock);
- read_unlock(&local->services_lock);
+ read_unlock_irq(&local->services_lock);
if (hlist_unhashed(&call->error_link)) {
- spin_lock(&call->peer->lock);
+ spin_lock_irq(&call->peer->lock);
hlist_add_head(&call->error_link, &call->peer->error_targets);
- spin_unlock(&call->peer->lock);
+ spin_unlock_irq(&call->peer->lock);
}
_leave(" = %p{%d}", call, call->debug_id);
- rxrpc_input_call_event(call, skb);
+ rxrpc_queue_rx_call_packet(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
return true;
unsupported_service:
- read_unlock(&local->services_lock);
+ read_unlock_irq(&local->services_lock);
return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
RX_INVALID_OPERATION, -EOPNOTSUPP);
unsupported_security:
- read_unlock(&local->services_lock);
+ read_unlock_irq(&local->services_lock);
return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
RX_INVALID_OPERATION, -EKEYREJECTED);
no_call:
spin_unlock(&rx->incoming_lock);
- read_unlock(&local->services_lock);
+ read_unlock_irq(&local->services_lock);
_leave(" = f [%u]", skb->mark);
return false;
discard:
- read_unlock(&local->services_lock);
+ read_unlock_irq(&local->services_lock);
return true;
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7bbb68504766..8e477f7f8850 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -44,8 +44,8 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);
- if (call->peer->srtt_us)
- delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC;
+ if (call->srtt_us)
+ delay = (call->srtt_us >> 3) * NSEC_PER_USEC;
else
delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay));
ktime_add_ms(delay, call->tx_backoff);
@@ -55,147 +55,104 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
}
/*
- * Handle congestion being detected by the retransmit timeout.
+ * Retransmit one or more packets.
*/
-static void rxrpc_congestion_timeout(struct rxrpc_call *call)
+static bool rxrpc_retransmit_data(struct rxrpc_call *call,
+ struct rxrpc_send_data_req *req)
{
- set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
+ struct rxrpc_txqueue *tq = req->tq;
+ unsigned int ix = req->seq & RXRPC_TXQ_MASK;
+ struct rxrpc_txbuf *txb = tq->bufs[ix];
+
+ _enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id);
+
+ req->retrans = true;
+ trace_rxrpc_retransmit(call, req, txb);
+
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_send_data_packet(call, req);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
+
+ req->tq = NULL;
+ req->n = 0;
+ req->did_send = true;
+ req->now = ktime_get_real();
+ return true;
}
/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
+static void rxrpc_resend(struct rxrpc_call *call)
{
- struct rxrpc_ackpacket *ack = NULL;
- struct rxrpc_skb_priv *sp;
- struct rxrpc_txbuf *txb;
- rxrpc_seq_t transmitted = call->tx_transmitted;
- ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
- ktime_t resend_at = KTIME_MAX, now, delay;
- bool unacked = false, did_send = false;
- unsigned int i;
-
- _enter("{%d,%d}", call->acks_hard_ack, call->tx_top);
-
- now = ktime_get_real();
-
- if (list_empty(&call->tx_buffer))
- goto no_resend;
+ struct rxrpc_send_data_req req = {
+ .now = ktime_get_real(),
+ .trace = rxrpc_txdata_retransmit,
+ };
+ struct rxrpc_txqueue *tq;
- trace_rxrpc_resend(call, ack_skb);
- txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
+ _enter("{%d,%d}", call->tx_bottom, call->tx_top);
- /* Scan the soft ACK table without dropping the lock and resend any
- * explicitly NAK'd packets.
- */
- if (ack_skb) {
- sp = rxrpc_skb(ack_skb);
- ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
+ trace_rxrpc_resend(call, call->acks_highest_serial);
- for (i = 0; i < sp->ack.nr_acks; i++) {
- rxrpc_seq_t seq;
+ /* Scan the transmission queue, looking for lost packets. */
+ for (tq = call->tx_queue; tq; tq = tq->next) {
+ unsigned long lost = tq->segment_lost;
- if (ack->acks[i] & 1)
- continue;
- seq = sp->ack.first_ack + i;
- if (after(txb->seq, transmitted))
- break;
- if (after(txb->seq, seq))
- continue; /* A new hard ACK probably came in */
- list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
- if (txb->seq == seq)
- goto found_txb;
- }
- goto no_further_resend;
-
- found_txb:
- resend_at = ktime_add(txb->last_sent, rto);
- if (after(txb->serial, call->acks_highest_serial)) {
- if (ktime_after(resend_at, now) &&
- ktime_before(resend_at, next_resend))
- next_resend = resend_at;
- continue; /* Ack point not yet reached */
- }
+ if (after(tq->qbase, call->tx_transmitted))
+ break;
- rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
+ _debug("retr %16lx %u c=%08x [%x]",
+ tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase);
+ _debug("lost %16lx", lost);
- trace_rxrpc_retransmit(call, txb->seq, txb->serial,
- ktime_sub(resend_at, now));
+ trace_rxrpc_resend_lost(call, tq, lost);
+ while (lost) {
+ unsigned int ix = __ffs(lost);
+ struct rxrpc_txbuf *txb = tq->bufs[ix];
- txb->flags |= RXRPC_TXBUF_RESENT;
- rxrpc_transmit_one(call, txb);
- did_send = true;
- now = ktime_get_real();
+ __clear_bit(ix, &lost);
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_lost);
- if (list_is_last(&txb->call_link, &call->tx_buffer))
- goto no_further_resend;
- txb = list_next_entry(txb, call_link);
+ req.tq = tq;
+ req.seq = tq->qbase + ix;
+ req.n = 1;
+ rxrpc_retransmit_data(call, &req);
}
}
- /* Fast-forward through the Tx queue to the point the peer says it has
- * seen. Anything between the soft-ACK table and that point will get
- * ACK'd or NACK'd in due course, so don't worry about it here; here we
- * need to consider retransmitting anything beyond that point.
- */
- if (after_eq(call->acks_prev_seq, call->tx_transmitted))
- goto no_further_resend;
-
- list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
- resend_at = ktime_add(txb->last_sent, rto);
-
- if (before_eq(txb->seq, call->acks_prev_seq))
- continue;
- if (after(txb->seq, call->tx_transmitted))
- break; /* Not transmitted yet */
-
- if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
- before(txb->serial, ntohl(ack->serial)))
- goto do_resend; /* Wasn't accounted for by a more recent ping. */
-
- if (ktime_after(resend_at, now)) {
- if (ktime_before(resend_at, next_resend))
- next_resend = resend_at;
- continue;
- }
-
- do_resend:
- unacked = true;
-
- txb->flags |= RXRPC_TXBUF_RESENT;
- rxrpc_transmit_one(call, txb);
- did_send = true;
- rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
- now = ktime_get_real();
- }
+ rxrpc_get_rto_backoff(call, req.did_send);
+ _leave("");
+}
-no_further_resend:
-no_resend:
- if (resend_at < KTIME_MAX) {
- delay = rxrpc_get_rto_backoff(call->peer, did_send);
- resend_at = ktime_add(resend_at, delay);
- trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset);
+/*
+ * Resend the highest-seq DATA packet so far transmitted for RACK-TLP [RFC8985 7.3].
+ */
+void rxrpc_resend_tlp(struct rxrpc_call *call)
+{
+ struct rxrpc_send_data_req req = {
+ .now = ktime_get_real(),
+ .seq = call->tx_transmitted,
+ .n = 1,
+ .tlp_probe = true,
+ .trace = rxrpc_txdata_tlp_retransmit,
+ };
+
+ /* There's a chance it'll be on the tail segment of the queue. */
+ req.tq = READ_ONCE(call->tx_qtail);
+ if (req.tq &&
+ before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) {
+ rxrpc_retransmit_data(call, &req);
+ return;
}
- call->resend_at = resend_at;
-
- if (unacked)
- rxrpc_congestion_timeout(call);
-
- /* If there was nothing that needed retransmission then it's likely
- * that an ACK got lost somewhere. Send a ping to find out instead of
- * retransmitting data.
- */
- if (!did_send) {
- ktime_t next_ping = ktime_add_us(call->acks_latest_ts,
- call->peer->srtt_us >> 3);
- if (ktime_sub(next_ping, now) <= 0)
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_0_retrans);
+ for (req.tq = call->tx_queue; req.tq; req.tq = req.tq->next) {
+ if (after_eq(call->tx_transmitted, req.tq->qbase) &&
+ before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) {
+ rxrpc_retransmit_data(call, &req);
+ return;
+ }
}
-
- _leave("");
}
/*
@@ -231,68 +188,93 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call)
}
}
-static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
-{
- unsigned int winsize = min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra);
- rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize;
- rxrpc_seq_t tx_top = call->tx_top;
- int space;
-
- space = wtop - tx_top;
- return space > 0;
-}
-
/*
- * Decant some if the sendmsg prepared queue into the transmission buffer.
+ * Transmit some as-yet untransmitted data, to a maximum of the supplied limit.
*/
-static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
+static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, unsigned int limit,
+ enum rxrpc_txdata_trace trace)
{
- struct rxrpc_txbuf *txb;
+ int space = rxrpc_tx_window_space(call);
if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
- if (list_empty(&call->tx_sendmsg))
+ if (call->send_top == call->tx_top)
return;
rxrpc_expose_client_call(call);
}
- while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
- struct rxrpc_txbuf, call_link))) {
- spin_lock(&call->tx_lock);
- list_del(&txb->call_link);
- spin_unlock(&call->tx_lock);
+ while (space > 0) {
+ struct rxrpc_send_data_req req = {
+ .now = ktime_get_real(),
+ .seq = call->tx_transmitted + 1,
+ .n = 0,
+ .trace = trace,
+ };
+ struct rxrpc_txqueue *tq;
+ struct rxrpc_txbuf *txb;
+ rxrpc_seq_t send_top, seq;
+ int limit = min(space, max(call->peer->pmtud_jumbo, 1));
+
+ /* Order send_top before the contents of the new txbufs and
+ * txqueue pointers
+ */
+ send_top = smp_load_acquire(&call->send_top);
+ if (call->tx_top == send_top)
+ break;
- call->tx_top = txb->seq;
- list_add_tail(&txb->call_link, &call->tx_buffer);
+ trace_rxrpc_transmit(call, send_top, space);
- if (txb->flags & RXRPC_LAST_PACKET)
- rxrpc_close_tx_phase(call);
+ tq = call->tx_qtail;
+ seq = call->tx_top;
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant);
- rxrpc_transmit_one(call, txb);
+ do {
+ int ix;
- if (!rxrpc_tx_window_has_space(call))
- break;
+ seq++;
+ ix = seq & RXRPC_TXQ_MASK;
+ if (!ix) {
+ tq = tq->next;
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant_advance);
+ }
+ if (!req.tq)
+ req.tq = tq;
+ txb = tq->bufs[ix];
+ req.n++;
+ if (!txb->jumboable)
+ break;
+ } while (req.n < limit && before(seq, send_top));
+
+ if (txb->flags & RXRPC_LAST_PACKET) {
+ rxrpc_close_tx_phase(call);
+ tq = NULL;
+ }
+ call->tx_qtail = tq;
+ call->tx_top = seq;
+
+ space -= req.n;
+ rxrpc_send_data_packet(call, &req);
}
}
-static void rxrpc_transmit_some_data(struct rxrpc_call *call)
+void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit,
+ enum rxrpc_txdata_trace trace)
{
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_ACK_REQUEST:
- if (list_empty(&call->tx_sendmsg))
+ if (call->tx_bottom == READ_ONCE(call->send_top))
return;
rxrpc_begin_service_reply(call);
fallthrough;
case RXRPC_CALL_SERVER_SEND_REPLY:
case RXRPC_CALL_CLIENT_SEND_REQUEST:
- if (!rxrpc_tx_window_has_space(call))
+ if (!rxrpc_tx_window_space(call))
return;
- if (list_empty(&call->tx_sendmsg)) {
+ if (call->tx_bottom == READ_ONCE(call->send_top)) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow);
return;
}
- rxrpc_decant_prepared_tx(call);
+ rxrpc_transmit_fresh_data(call, limit, trace);
break;
default:
return;
@@ -305,8 +287,8 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call)
*/
static void rxrpc_send_initial_ping(struct rxrpc_call *call)
{
- if (call->peer->rtt_count < 3 ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ if (call->rtt_count < 3 ||
+ ktime_before(ktime_add_ms(call->rtt_last_req, 1000),
ktime_get_real()))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_params);
@@ -315,10 +297,11 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
/*
* Handle retransmission and deferred ACK/abort generation.
*/
-bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
+bool rxrpc_input_call_event(struct rxrpc_call *call)
{
+ struct sk_buff *skb;
ktime_t now, t;
- bool resend = false;
+ bool did_receive = false, saw_ack = false;
s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
@@ -328,9 +311,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)],
call->events);
- if (__rxrpc_call_is_complete(call))
- goto out;
-
/* Handle abort request locklessly, vs rxrpc_propose_abort(). */
abort_code = smp_load_acquire(&call->send_abort);
if (abort_code) {
@@ -339,11 +319,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
goto out;
}
- if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
- goto out;
+ do {
+ skb = __skb_dequeue(&call->rx_queue);
+ if (skb) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (__rxrpc_call_is_complete(call) ||
+ skb->mark == RXRPC_SKB_MARK_ERROR) {
+ rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
+ goto out;
+ }
+
+ saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK;
+
+ rxrpc_input_call_packet(call, skb);
+ rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
+ did_receive = true;
+ }
- if (skb)
- rxrpc_input_call_packet(call, skb);
+ t = ktime_sub(call->rack_timo_at, ktime_get_real());
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t,
+ rxrpc_timer_trace_rack_off + call->rack_timer_mode);
+ call->rack_timo_at = KTIME_MAX;
+ rxrpc_rack_timer_expired(call, t);
+ }
+
+ } while (!skb_queue_empty(&call->rx_queue));
/* If we see our async-event poke, check for timeout trippage. */
now = ktime_get_real();
@@ -376,13 +378,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_delayed_ack);
}
- t = ktime_sub(call->ack_lost_at, now);
- if (t <= 0) {
- trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack);
- call->ack_lost_at = KTIME_MAX;
- set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
- }
-
t = ktime_sub(call->ping_at, now);
if (t <= 0) {
trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping);
@@ -391,15 +386,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_ping_for_keepalive);
}
- t = ktime_sub(call->resend_at, now);
- if (t <= 0) {
- trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend);
- call->resend_at = KTIME_MAX;
- resend = true;
- }
-
- rxrpc_transmit_some_data(call);
-
now = ktime_get_real();
t = ktime_sub(call->keepalive_at, now);
if (t <= 0) {
@@ -409,35 +395,40 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_ping_for_keepalive);
}
- if (skb) {
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK)
- rxrpc_congestion_degrade(call);
- }
-
if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
rxrpc_send_initial_ping(call);
+ rxrpc_transmit_some_data(call, UINT_MAX, rxrpc_txdata_new_data);
+
+ if (saw_ack)
+ rxrpc_congestion_degrade(call);
+
+ if (did_receive &&
+ (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+ __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SEND_REPLY)) {
+ t = ktime_sub(call->rack_timo_at, ktime_get_real());
+ trace_rxrpc_rack(call, t);
+ }
+
/* Process events */
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend &&
+ if (call->tx_nr_lost > 0 &&
__rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY &&
!test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
- rxrpc_resend(call, NULL);
+ rxrpc_resend(call);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
rxrpc_propose_ack_rx_idle);
if (call->ackr_nr_unacked > 2) {
- if (call->peer->rtt_count < 3)
+ if (call->rtt_count < 3)
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_rtt);
- else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000),
ktime_get_real()))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_old_rtt);
@@ -455,8 +446,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
set(call->expect_req_by);
set(call->expect_rx_by);
set(call->delay_ack_at);
- set(call->ack_lost_at);
- set(call->resend_at);
+ set(call->rack_timo_at);
set(call->keepalive_at);
set(call->ping_at);
@@ -467,7 +457,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
} else {
unsigned long nowj = jiffies, delayj, nextj;
- delayj = max(nsecs_to_jiffies(delay), 1);
+ delayj = umax(nsecs_to_jiffies(delay), 1);
nextj = nowj + delayj;
if (time_before(nextj, call->timer.expires) ||
!timer_pending(&call->timer)) {
@@ -484,9 +474,12 @@ out:
rxrpc_disconnect_call(call);
if (call->security)
call->security->free_call_crypto(call);
+ } else {
+ if (did_receive &&
+ call->peer->ackr_adv_pmtud &&
+ call->peer->pmtud_pending)
+ rxrpc_send_probe_for_pmtud(call);
}
- if (call->acks_hard_ack != call->tx_bottom)
- rxrpc_shrink_call_tx_buffer(call);
_leave("");
return true;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f9e983a12c14..5a543c3f6fb0 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -49,7 +49,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
bool busy;
if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) {
- spin_lock_bh(&local->lock);
+ spin_lock_irq(&local->lock);
busy = !list_empty(&call->attend_link);
trace_rxrpc_poke_call(call, busy, what);
if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke))
@@ -57,7 +57,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
if (!busy) {
list_add_tail(&call->attend_link, &local->call_attend_q);
}
- spin_unlock_bh(&local->lock);
+ spin_unlock_irq(&local->lock);
if (!busy)
rxrpc_wake_up_io_thread(local);
}
@@ -146,23 +146,21 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
INIT_LIST_HEAD(&call->attend_link);
- INIT_LIST_HEAD(&call->tx_sendmsg);
- INIT_LIST_HEAD(&call->tx_buffer);
+ skb_queue_head_init(&call->rx_queue);
skb_queue_head_init(&call->recvmsg_queue);
skb_queue_head_init(&call->rx_oos_queue);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
- spin_lock_init(&call->tx_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
+ call->tx_jumbo_max = 1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
call->ackr_window = 1;
call->ackr_wtop = 1;
call->delay_ack_at = KTIME_MAX;
- call->ack_lost_at = KTIME_MAX;
- call->resend_at = KTIME_MAX;
+ call->rack_timo_at = KTIME_MAX;
call->ping_at = KTIME_MAX;
call->keepalive_at = KTIME_MAX;
call->expect_rx_by = KTIME_MAX;
@@ -177,6 +175,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
call->cong_cwnd = RXRPC_MIN_CWND;
call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
+ rxrpc_call_init_rtt(call);
+
call->rxnet = rxnet;
call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK;
atomic_inc(&rxnet->nr_calls);
@@ -220,9 +220,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
__set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
if (p->timeouts.normal)
- call->next_rx_timo = min(p->timeouts.normal, 1);
+ call->next_rx_timo = umin(p->timeouts.normal, 1);
if (p->timeouts.idle)
- call->next_req_timo = min(p->timeouts.idle, 1);
+ call->next_req_timo = umin(p->timeouts.idle, 1);
if (p->timeouts.hard)
call->hard_timo = p->timeouts.hard;
@@ -302,9 +302,9 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call);
rxrpc_get_call(call, rxrpc_call_get_io_thread);
- spin_lock(&local->client_call_lock);
+ spin_lock_irq(&local->client_call_lock);
list_add_tail(&call->wait_link, &local->new_client_calls);
- spin_unlock(&local->client_call_lock);
+ spin_unlock_irq(&local->client_call_lock);
rxrpc_wake_up_io_thread(local);
return 0;
@@ -434,7 +434,7 @@ error_attached_to_socket:
/*
* Set up an incoming call. call->conn points to the connection.
- * This is called in BH context and isn't allowed to fail.
+ * This is called with interrupts disabled and isn't allowed to fail.
*/
void rxrpc_incoming_call(struct rxrpc_sock *rx,
struct rxrpc_call *call,
@@ -531,11 +531,29 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
}
/*
- * Clean up the Rx skb ring.
+ * Clean up the transmission buffers.
+ */
+static void rxrpc_cleanup_tx_buffers(struct rxrpc_call *call)
+{
+ struct rxrpc_txqueue *tq, *next;
+
+ for (tq = call->tx_queue; tq; tq = next) {
+ next = tq->next;
+ for (int i = 0; i < RXRPC_NR_TXQUEUE; i++)
+ if (tq->bufs[i])
+ rxrpc_put_txbuf(tq->bufs[i], rxrpc_txbuf_put_cleaned);
+ trace_rxrpc_tq(call, tq, 0, rxrpc_tq_cleaned);
+ kfree(tq);
+ }
+}
+
+/*
+ * Clean up the receive buffers.
*/
-static void rxrpc_cleanup_ring(struct rxrpc_call *call)
+static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
{
rxrpc_purge_queue(&call->recvmsg_queue);
+ rxrpc_purge_queue(&call->rx_queue);
rxrpc_purge_queue(&call->rx_oos_queue);
}
@@ -558,7 +576,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
/* Make sure we don't get any more notifications */
- spin_lock(&rx->recvmsg_lock);
+ spin_lock_irq(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -571,7 +589,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- spin_unlock(&rx->recvmsg_lock);
+ spin_unlock_irq(&rx->recvmsg_lock);
if (put)
rxrpc_put_call(call, rxrpc_call_put_unnotify);
@@ -671,23 +689,11 @@ static void rxrpc_rcu_free_call(struct rcu_head *rcu)
static void rxrpc_destroy_call(struct work_struct *work)
{
struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer);
- struct rxrpc_txbuf *txb;
del_timer_sync(&call->timer);
- rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
- rxrpc_cleanup_ring(call);
- while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
- struct rxrpc_txbuf, call_link))) {
- list_del(&txb->call_link);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
- }
- while ((txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link))) {
- list_del(&txb->call_link);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
- }
-
+ rxrpc_cleanup_tx_buffers(call);
+ rxrpc_cleanup_rx_buffers(call);
rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
rxrpc_deactivate_bundle(call->bundle);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index bb11e8289d6d..db0099197890 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -231,7 +231,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
distance = id - id_cursor;
if (distance < 0)
distance = -distance;
- limit = max_t(unsigned long, atomic_read(&rxnet->nr_conns) * 4, 1024);
+ limit = umax(atomic_read(&rxnet->nr_conns) * 4, 1024);
if (distance > limit)
goto mark_dont_reuse;
@@ -437,9 +437,9 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
call->dest_srx.srx_service = conn->service_id;
call->cong_ssthresh = call->peer->cong_ssthresh;
if (call->cong_cwnd >= call->cong_ssthresh)
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
else
- call->cong_mode = RXRPC_CALL_SLOW_START;
+ call->cong_ca_state = RXRPC_CA_SLOW_START;
chan->call_id = call_id;
chan->call_debug_id = call->debug_id;
@@ -508,16 +508,18 @@ static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
void rxrpc_connect_client_calls(struct rxrpc_local *local)
{
struct rxrpc_call *call;
+ LIST_HEAD(new_client_calls);
- while ((call = list_first_entry_or_null(&local->new_client_calls,
- struct rxrpc_call, wait_link))
- ) {
+ spin_lock_irq(&local->client_call_lock);
+ list_splice_tail_init(&local->new_client_calls, &new_client_calls);
+ spin_unlock_irq(&local->client_call_lock);
+
+ while ((call = list_first_entry_or_null(&new_client_calls,
+ struct rxrpc_call, wait_link))) {
struct rxrpc_bundle *bundle = call->bundle;
- spin_lock(&local->client_call_lock);
list_move_tail(&call->wait_link, &bundle->waiting_calls);
rxrpc_see_call(call, rxrpc_call_see_waiting_call);
- spin_unlock(&local->client_call_lock);
if (rxrpc_bundle_has_space(bundle))
rxrpc_activate_channels(bundle);
@@ -545,9 +547,9 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
trace_rxrpc_client(conn, channel, rxrpc_client_exposed);
- spin_lock(&call->peer->lock);
+ spin_lock_irq(&call->peer->lock);
hlist_add_head(&call->error_link, &call->peer->error_targets);
- spin_unlock(&call->peer->lock);
+ spin_unlock_irq(&call->peer->lock);
}
}
@@ -588,9 +590,9 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
ASSERTCMP(call->call_id, ==, 0);
ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
/* May still be on ->new_client_calls. */
- spin_lock(&local->client_call_lock);
+ spin_lock_irq(&local->client_call_lock);
list_del_init(&call->wait_link);
- spin_unlock(&local->client_call_lock);
+ spin_unlock_irq(&local->client_call_lock);
return;
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 598b4ee389fc..713e04394ceb 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -26,7 +26,7 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff
bool aborted = false;
if (conn->state != RXRPC_CONN_ABORTED) {
- spin_lock(&conn->state_lock);
+ spin_lock_irq(&conn->state_lock);
if (conn->state != RXRPC_CONN_ABORTED) {
conn->abort_code = abort_code;
conn->error = err;
@@ -37,7 +37,7 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff
set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events);
aborted = true;
}
- spin_unlock(&conn->state_lock);
+ spin_unlock_irq(&conn->state_lock);
}
return aborted;
@@ -63,11 +63,12 @@ int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
/*
* Mark a connection as being remotely aborted.
*/
-static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+static void rxrpc_input_conn_abort(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
- return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
- RXRPC_CALL_REMOTELY_ABORTED);
+ trace_rxrpc_rx_conn_abort(conn, skb);
+ rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ RXRPC_CALL_REMOTELY_ABORTED);
}
/*
@@ -91,7 +92,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
struct rxrpc_acktrailer trailer;
size_t len;
int ret, ioc;
- u32 serial, mtu, call_id, padding;
+ u32 serial, max_mtu, if_mtu, call_id, padding;
_enter("%d", conn->debug_id);
@@ -149,8 +150,13 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
break;
case RXRPC_PACKET_TYPE_ACK:
- mtu = conn->peer->if_mtu;
- mtu -= conn->peer->hdrsize;
+ if_mtu = conn->peer->if_mtu - conn->peer->hdrsize;
+ if (conn->peer->ackr_adv_pmtud) {
+ max_mtu = umax(conn->peer->max_data, rxrpc_rx_mtu);
+ } else {
+ if_mtu = umin(1444, if_mtu);
+ max_mtu = if_mtu;
+ }
pkt.ack.bufferSpace = 0;
pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
pkt.ack.firstPacket = htonl(chan->last_seq + 1);
@@ -158,10 +164,10 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- trailer.maxMTU = htonl(rxrpc_rx_mtu);
- trailer.ifMTU = htonl(mtu);
+ trailer.maxMTU = htonl(max_mtu);
+ trailer.ifMTU = htonl(if_mtu);
trailer.rwind = htonl(rxrpc_rx_window_size);
- trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ trailer.jumbo_max = 0;
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
padding = 0;
iov[0].iov_len += sizeof(pkt.ack);
@@ -171,7 +177,8 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
- pkt.ack.reason, 0, rxrpc_rx_window_size);
+ pkt.ack.reason, 0, rxrpc_rx_window_size,
+ rxrpc_propose_ack_retransmit);
break;
default:
@@ -202,11 +209,14 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
for (i = 0; i < RXRPC_MAXCALLS; i++) {
call = conn->channels[i].call;
- if (call)
+ if (call) {
+ rxrpc_see_call(call, rxrpc_call_see_conn_abort);
rxrpc_set_call_completion(call,
conn->completion,
conn->abort_code,
conn->error);
+ rxrpc_poke_call(call, rxrpc_call_poke_conn_abort);
+ }
}
_leave("");
@@ -252,10 +262,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- spin_lock(&conn->state_lock);
+ spin_lock_irq(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING)
conn->state = RXRPC_CONN_SERVICE;
- spin_unlock(&conn->state_lock);
+ spin_unlock_irq(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE) {
/* Offload call state flipping to the I/O thread. As
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 694c4df7a1a3..7eba4d7d9a38 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -31,13 +31,13 @@ void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
if (WARN_ON_ONCE(!local))
return;
- spin_lock_bh(&local->lock);
+ spin_lock_irq(&local->lock);
busy = !list_empty(&conn->attend_link);
if (!busy) {
rxrpc_get_connection(conn, why);
list_add_tail(&conn->attend_link, &local->conn_attend_q);
}
- spin_unlock_bh(&local->lock);
+ spin_unlock_irq(&local->lock);
rxrpc_wake_up_io_thread(local);
}
@@ -196,9 +196,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
- spin_lock(&call->peer->lock);
+ spin_lock_irq(&call->peer->lock);
hlist_del_init(&call->error_link);
- spin_unlock(&call->peer->lock);
+ spin_unlock_irq(&call->peer->lock);
}
if (rxrpc_is_client_call(call)) {
@@ -321,6 +321,12 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
list_del_init(&conn->proc_link);
write_unlock(&rxnet->conn_lock);
+ if (conn->pmtud_probe) {
+ trace_rxrpc_pmtud_lost(conn, 0);
+ conn->peer->pmtud_probing = false;
+ conn->peer->pmtud_pending = true;
+ }
+
rxrpc_purge_queue(&conn->rx_queue);
rxrpc_kill_client_conn(conn);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 16d49a861dbb..4974b5accafa 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -27,80 +27,68 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
}
/*
- * Do TCP-style congestion management [RFC 5681].
+ * Do TCP-style congestion management [RFC5681].
*/
static void rxrpc_congestion_management(struct rxrpc_call *call,
- struct sk_buff *skb,
- struct rxrpc_ack_summary *summary,
- rxrpc_serial_t acked_serial)
+ struct rxrpc_ack_summary *summary)
{
- enum rxrpc_congest_change change = rxrpc_cong_no_change;
- unsigned int cumulative_acks = call->cong_cumul_acks;
- unsigned int cwnd = call->cong_cwnd;
- bool resend = false;
-
- summary->flight_size =
- (call->tx_top - call->acks_hard_ack) - summary->nr_acks;
+ summary->change = rxrpc_cong_no_change;
+ summary->in_flight = rxrpc_tx_in_flight(call);
if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
summary->retrans_timeo = true;
- call->cong_ssthresh = max_t(unsigned int,
- summary->flight_size / 2, 2);
- cwnd = 1;
- if (cwnd >= call->cong_ssthresh &&
- call->cong_mode == RXRPC_CALL_SLOW_START) {
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
- call->cong_tstamp = skb->tstamp;
- cumulative_acks = 0;
+ call->cong_ssthresh = umax(summary->in_flight / 2, 2);
+ call->cong_cwnd = 1;
+ if (call->cong_cwnd >= call->cong_ssthresh &&
+ call->cong_ca_state == RXRPC_CA_SLOW_START) {
+ call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
+ call->cong_tstamp = call->acks_latest_ts;
+ call->cong_cumul_acks = 0;
}
}
- cumulative_acks += summary->nr_new_acks;
- if (cumulative_acks > 255)
- cumulative_acks = 255;
-
- summary->cwnd = call->cong_cwnd;
- summary->ssthresh = call->cong_ssthresh;
- summary->cumulative_acks = cumulative_acks;
- summary->dup_acks = call->cong_dup_acks;
+ call->cong_cumul_acks += summary->nr_new_sacks;
+ call->cong_cumul_acks += summary->nr_new_hacks;
+ if (call->cong_cumul_acks > 255)
+ call->cong_cumul_acks = 255;
- switch (call->cong_mode) {
- case RXRPC_CALL_SLOW_START:
- if (summary->saw_nacks)
+ switch (call->cong_ca_state) {
+ case RXRPC_CA_SLOW_START:
+ if (call->acks_nr_snacks > 0)
goto packet_loss_detected;
- if (summary->cumulative_acks > 0)
- cwnd += 1;
- if (cwnd >= call->cong_ssthresh) {
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
- call->cong_tstamp = skb->tstamp;
+ if (call->cong_cumul_acks > 0)
+ call->cong_cwnd += 1;
+ if (call->cong_cwnd >= call->cong_ssthresh) {
+ call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
+ call->cong_tstamp = call->acks_latest_ts;
}
goto out;
- case RXRPC_CALL_CONGEST_AVOIDANCE:
- if (summary->saw_nacks)
+ case RXRPC_CA_CONGEST_AVOIDANCE:
+ if (call->acks_nr_snacks > 0)
goto packet_loss_detected;
/* We analyse the number of packets that get ACK'd per RTT
* period and increase the window if we managed to fill it.
*/
- if (call->peer->rtt_count == 0)
+ if (call->rtt_count == 0)
goto out;
- if (ktime_before(skb->tstamp,
+ if (ktime_before(call->acks_latest_ts,
ktime_add_us(call->cong_tstamp,
- call->peer->srtt_us >> 3)))
+ call->srtt_us >> 3)))
goto out_no_clear_ca;
- change = rxrpc_cong_rtt_window_end;
- call->cong_tstamp = skb->tstamp;
- if (cumulative_acks >= cwnd)
- cwnd++;
+ summary->change = rxrpc_cong_rtt_window_end;
+ call->cong_tstamp = call->acks_latest_ts;
+ if (call->cong_cumul_acks >= call->cong_cwnd)
+ call->cong_cwnd++;
goto out;
- case RXRPC_CALL_PACKET_LOSS:
- if (!summary->saw_nacks)
+ case RXRPC_CA_PACKET_LOSS:
+ if (call->acks_nr_snacks == 0)
goto resume_normality;
- if (summary->new_low_nack) {
- change = rxrpc_cong_new_low_nack;
+ if (summary->new_low_snack) {
+ summary->change = rxrpc_cong_new_low_nack;
call->cong_dup_acks = 1;
if (call->cong_extra > 1)
call->cong_extra = 1;
@@ -111,31 +99,35 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
if (call->cong_dup_acks < 3)
goto send_extra_data;
- change = rxrpc_cong_begin_retransmission;
- call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
- call->cong_ssthresh = max_t(unsigned int,
- summary->flight_size / 2, 2);
- cwnd = call->cong_ssthresh + 3;
+ summary->change = rxrpc_cong_begin_retransmission;
+ call->cong_ca_state = RXRPC_CA_FAST_RETRANSMIT;
+ call->cong_ssthresh = umax(summary->in_flight / 2, 2);
+ call->cong_cwnd = call->cong_ssthresh + 3;
call->cong_extra = 0;
call->cong_dup_acks = 0;
- resend = true;
+ summary->need_retransmit = true;
+ summary->in_fast_or_rto_recovery = true;
goto out;
- case RXRPC_CALL_FAST_RETRANSMIT:
- if (!summary->new_low_nack) {
- if (summary->nr_new_acks == 0)
- cwnd += 1;
+ case RXRPC_CA_FAST_RETRANSMIT:
+ rxrpc_tlp_init(call);
+ summary->in_fast_or_rto_recovery = true;
+ if (!summary->new_low_snack) {
+ if (summary->nr_new_sacks == 0)
+ call->cong_cwnd += 1;
call->cong_dup_acks++;
if (call->cong_dup_acks == 2) {
- change = rxrpc_cong_retransmit_again;
+ summary->change = rxrpc_cong_retransmit_again;
call->cong_dup_acks = 0;
- resend = true;
+ summary->need_retransmit = true;
}
} else {
- change = rxrpc_cong_progress;
- cwnd = call->cong_ssthresh;
- if (!summary->saw_nacks)
+ summary->change = rxrpc_cong_progress;
+ call->cong_cwnd = call->cong_ssthresh;
+ if (call->acks_nr_snacks == 0) {
+ summary->exiting_fast_or_rto_recovery = true;
goto resume_normality;
+ }
}
goto out;
@@ -145,30 +137,25 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}
resume_normality:
- change = rxrpc_cong_cleared_nacks;
+ summary->change = rxrpc_cong_cleared_nacks;
call->cong_dup_acks = 0;
call->cong_extra = 0;
- call->cong_tstamp = skb->tstamp;
- if (cwnd < call->cong_ssthresh)
- call->cong_mode = RXRPC_CALL_SLOW_START;
+ call->cong_tstamp = call->acks_latest_ts;
+ if (call->cong_cwnd < call->cong_ssthresh)
+ call->cong_ca_state = RXRPC_CA_SLOW_START;
else
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
out:
- cumulative_acks = 0;
+ call->cong_cumul_acks = 0;
out_no_clear_ca:
- if (cwnd >= RXRPC_TX_MAX_WINDOW)
- cwnd = RXRPC_TX_MAX_WINDOW;
- call->cong_cwnd = cwnd;
- call->cong_cumul_acks = cumulative_acks;
- summary->mode = call->cong_mode;
- trace_rxrpc_congest(call, summary, acked_serial, change);
- if (resend)
- rxrpc_resend(call, skb);
+ if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW)
+ call->cong_cwnd = RXRPC_TX_MAX_WINDOW;
+ trace_rxrpc_congest(call, summary);
return;
packet_loss_detected:
- change = rxrpc_cong_saw_nack;
- call->cong_mode = RXRPC_CALL_PACKET_LOSS;
+ summary->change = rxrpc_cong_saw_nack;
+ call->cong_ca_state = RXRPC_CA_PACKET_LOSS;
call->cong_dup_acks = 0;
goto send_extra_data;
@@ -177,7 +164,7 @@ send_extra_data:
* state.
*/
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ||
- summary->nr_acks != call->tx_top - call->acks_hard_ack) {
+ call->acks_nr_sacks != call->tx_top - call->tx_bottom) {
call->cong_extra++;
wake_up(&call->waitq);
}
@@ -189,26 +176,42 @@ send_extra_data:
*/
void rxrpc_congestion_degrade(struct rxrpc_call *call)
{
- ktime_t rtt, now;
+ ktime_t rtt, now, time_since;
- if (call->cong_mode != RXRPC_CALL_SLOW_START &&
- call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
+ if (call->cong_ca_state != RXRPC_CA_SLOW_START &&
+ call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE)
return;
if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY)
return;
- rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
+ rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8));
now = ktime_get_real();
- if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
+ time_since = ktime_sub(now, call->tx_last_sent);
+ if (ktime_before(time_since, rtt))
return;
- trace_rxrpc_reset_cwnd(call, now);
+ trace_rxrpc_reset_cwnd(call, time_since, rtt);
rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
call->tx_last_sent = now;
- call->cong_mode = RXRPC_CALL_SLOW_START;
- call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh,
- call->cong_cwnd * 3 / 4);
- call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND);
+ call->cong_ca_state = RXRPC_CA_SLOW_START;
+ call->cong_ssthresh = umax(call->cong_ssthresh, call->cong_cwnd * 3 / 4);
+ call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND);
+}
+
+/*
+ * Add an RTT sample derived from an ACK'd DATA packet.
+ */
+static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ int ix)
+{
+ ktime_t xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
+
+ rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1,
+ summary->acked_serial, summary->ack_serial,
+ xmit_ts, call->acks_latest_ts);
+ __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */
}
/*
@@ -217,37 +220,120 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
struct rxrpc_ack_summary *summary)
{
- struct rxrpc_txbuf *txb;
- bool rot_last = false;
+ struct rxrpc_txqueue *tq = call->tx_queue;
+ rxrpc_seq_t seq = call->tx_bottom + 1;
+ bool rot_last = false, trace = false;
- list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
- if (before_eq(txb->seq, call->acks_hard_ack))
- continue;
- if (txb->flags & RXRPC_LAST_PACKET) {
+ _enter("%x,%x", call->tx_bottom, to);
+
+ trace_rxrpc_tx_rotate(call, seq, to);
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate);
+
+ if (call->acks_lowest_nak == call->tx_bottom) {
+ call->acks_lowest_nak = to;
+ } else if (after(to, call->acks_lowest_nak)) {
+ summary->new_low_snack = true;
+ call->acks_lowest_nak = to;
+ }
+
+ /* We may have a left over fully-consumed buffer at the front that we
+ * couldn't drop before (rotate_and_keep below).
+ */
+ if (seq == call->tx_qbase + RXRPC_NR_TXQUEUE) {
+ call->tx_qbase += RXRPC_NR_TXQUEUE;
+ call->tx_queue = tq->next;
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free);
+ kfree(tq);
+ tq = call->tx_queue;
+ }
+
+ do {
+ unsigned int ix = seq - call->tx_qbase;
+
+ _debug("tq=%x seq=%x i=%d f=%x", tq->qbase, seq, ix, tq->bufs[ix]->flags);
+ if (tq->bufs[ix]->flags & RXRPC_LAST_PACKET) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
}
- if (txb->seq == to)
- break;
- }
- if (rot_last)
- set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags);
+ if (summary->acked_serial == tq->segment_serial[ix] &&
+ test_bit(ix, &tq->rtt_samples))
+ rxrpc_add_data_rtt_sample(call, summary, tq, ix);
+
+ if (ix == tq->nr_reported_acks) {
+ /* Packet directly hard ACK'd. */
+ tq->nr_reported_acks++;
+ rxrpc_input_rack_one(call, summary, tq, ix);
+ if (seq == call->tlp_seq)
+ summary->tlp_probe_acked = true;
+ summary->nr_new_hacks++;
+ __set_bit(ix, &tq->segment_acked);
+ trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack);
+ } else if (test_bit(ix, &tq->segment_acked)) {
+ /* Soft ACK -> hard ACK. */
+ call->acks_nr_sacks--;
+ trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_sack);
+ } else {
+ /* Soft NAK -> hard ACK. */
+ call->acks_nr_snacks--;
+ rxrpc_input_rack_one(call, summary, tq, ix);
+ if (seq == call->tlp_seq)
+ summary->tlp_probe_acked = true;
+ summary->nr_new_hacks++;
+ __set_bit(ix, &tq->segment_acked);
+ trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak);
+ }
- _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last);
+ call->tx_nr_sent--;
+ if (__test_and_clear_bit(ix, &tq->segment_lost))
+ call->tx_nr_lost--;
+ if (__test_and_clear_bit(ix, &tq->segment_retransmitted))
+ call->tx_nr_resent--;
+ __clear_bit(ix, &tq->ever_retransmitted);
- if (call->acks_lowest_nak == call->acks_hard_ack) {
- call->acks_lowest_nak = to;
- } else if (after(to, call->acks_lowest_nak)) {
- summary->new_low_nack = true;
- call->acks_lowest_nak = to;
+ rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated);
+ tq->bufs[ix] = NULL;
+
+ WRITE_ONCE(call->tx_bottom, seq);
+ trace_rxrpc_txqueue(call, (rot_last ?
+ rxrpc_txqueue_rotate_last :
+ rxrpc_txqueue_rotate));
+
+ seq++;
+ trace = true;
+ if (!(seq & RXRPC_TXQ_MASK)) {
+ trace_rxrpc_rack_update(call, summary);
+ trace = false;
+ prefetch(tq->next);
+ if (tq != call->tx_qtail) {
+ call->tx_qbase += RXRPC_NR_TXQUEUE;
+ call->tx_queue = tq->next;
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free);
+ kfree(tq);
+ tq = call->tx_queue;
+ } else {
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_keep);
+ tq = NULL;
+ break;
+ }
+ }
+
+ } while (before_eq(seq, to));
+
+ if (trace)
+ trace_rxrpc_rack_update(call, summary);
+
+ if (rot_last) {
+ set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags);
+ if (tq) {
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free);
+ kfree(tq);
+ call->tx_queue = NULL;
+ }
}
- smp_store_release(&call->acks_hard_ack, to);
+ _debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last);
- trace_rxrpc_txqueue(call, (rot_last ?
- rxrpc_txqueue_rotate_last :
- rxrpc_txqueue_rotate));
wake_up(&call->waitq);
return rot_last;
}
@@ -263,13 +349,10 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
- call->resend_at = KTIME_MAX;
- trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
-
- if (unlikely(call->cong_last_nack)) {
- rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
- call->cong_last_nack = NULL;
- }
+ call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF;
+ call->rack_timo_at = KTIME_MAX;
+ trace_rxrpc_rack_timer(call, 0, false);
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_rack_off + call->rack_timer_mode);
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
@@ -365,7 +448,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
- __skb_queue_tail(&call->recvmsg_queue, skb);
+ skb_queue_tail(&call->recvmsg_queue, skb);
rxrpc_input_update_ack_window(call, window, wtop);
trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
if (last)
@@ -442,7 +525,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
- spin_lock(&call->recvmsg_queue.lock);
rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue);
*_notify = true;
@@ -464,8 +546,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_receive_queue_oos);
}
- spin_unlock(&call->recvmsg_queue.lock);
-
call->ackr_sack_base = sack;
} else {
unsigned int slot;
@@ -530,7 +610,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len - offset;
bool notify = false;
- int ack_reason = 0;
+ int ack_reason = 0, count = 1, stat_ix;
while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
if (len < RXRPC_JUMBO_SUBPKTLEN)
@@ -559,12 +639,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
sp->hdr.serial++;
offset += RXRPC_JUMBO_SUBPKTLEN;
len -= RXRPC_JUMBO_SUBPKTLEN;
+ count++;
}
sp->offset = offset;
sp->len = len;
rxrpc_input_data_one(call, skb, &notify, &ack_serial, &ack_reason);
+ stat_ix = umin(count, ARRAY_SIZE(call->rxnet->stat_rx_jumbo)) - 1;
+ atomic_inc(&call->rxnet->stat_rx_jumbo[stat_ix]);
+
if (ack_reason > 0) {
rxrpc_send_ACK(call, ack_reason, ack_serial,
rxrpc_propose_ack_input_data);
@@ -667,7 +751,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
smp_mb(); /* Read data before setting avail bit */
set_bit(i, &call->rtt_avail);
- rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
+ rxrpc_call_add_rtt(call, type, i, acked_serial, ack_serial,
sent_at, resp_time);
matched = true;
}
@@ -677,7 +761,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
*/
if (after(acked_serial, orig_serial)) {
trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i,
- orig_serial, acked_serial, 0, 0);
+ orig_serial, acked_serial, 0, 0, 0);
clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
smp_wmb();
set_bit(i, &call->rtt_avail);
@@ -685,7 +769,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
}
if (!matched)
- trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0);
+ trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0, 0);
}
/*
@@ -695,10 +779,13 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
struct rxrpc_acktrailer *trailer)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_peer *peer;
- unsigned int mtu;
+ struct rxrpc_peer *peer = call->peer;
+ unsigned int max_data, capacity;
bool wake = false;
- u32 rwind = ntohl(trailer->rwind);
+ u32 max_mtu = ntohl(trailer->maxMTU);
+ //u32 if_mtu = ntohl(trailer->ifMTU);
+ u32 rwind = ntohl(trailer->rwind);
+ u32 jumbo_max = ntohl(trailer->jumbo_max);
if (rwind > RXRPC_TX_MAX_WINDOW)
rwind = RXRPC_TX_MAX_WINDOW;
@@ -709,54 +796,149 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
call->tx_winsize = rwind;
}
- mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
+ max_mtu = clamp(max_mtu, 500, 65535);
+ peer->ackr_max_data = max_mtu;
- peer = call->peer;
- if (mtu < peer->maxdata) {
- spin_lock(&peer->lock);
- peer->maxdata = mtu;
- peer->mtu = mtu + peer->hdrsize;
- spin_unlock(&peer->lock);
+ if (max_mtu < peer->max_data) {
+ trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_mtu,
+ rxrpc_pmtud_reduce_ack);
+ write_seqcount_begin(&peer->mtu_lock);
+ peer->max_data = max_mtu;
+ write_seqcount_end(&peer->mtu_lock);
+ }
+
+ max_data = umin(max_mtu, peer->max_data);
+ capacity = max_data;
+ capacity += sizeof(struct rxrpc_jumbo_header); /* First subpacket has main hdr, not jumbo */
+ capacity /= sizeof(struct rxrpc_jumbo_header) + RXRPC_JUMBO_DATALEN;
+
+ if (jumbo_max == 0) {
+ /* The peer says it supports pmtu discovery */
+ peer->ackr_adv_pmtud = true;
+ } else {
+ peer->ackr_adv_pmtud = false;
+ capacity = clamp(capacity, 1, jumbo_max);
}
+ call->tx_jumbo_max = capacity;
+
if (wake)
wake_up(&call->waitq);
}
+#if defined(CONFIG_X86) && __GNUC__ && !defined(__clang__)
+/* Clang doesn't support the %z constraint modifier */
+#define shiftr_adv_rotr(shift_from, rotate_into) ({ \
+ asm(" shr%z1 %1\n" \
+ " inc %0\n" \
+ " rcr%z2 %2\n" \
+ : "+d"(shift_from), "+m"(*(shift_from)), "+rm"(rotate_into) \
+ ); \
+ })
+#else
+#define shiftr_adv_rotr(shift_from, rotate_into) ({ \
+ typeof(rotate_into) __bit0 = *(shift_from) & 1; \
+ *(shift_from) >>= 1; \
+ shift_from++; \
+ rotate_into >>= 1; \
+ rotate_into |= __bit0 << (sizeof(rotate_into) * 8 - 1); \
+ })
+#endif
+
/*
- * Determine how many nacks from the previous ACK have now been satisfied.
+ * Deal with RTT samples from soft ACKs.
*/
-static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
- struct rxrpc_ack_summary *summary,
- rxrpc_seq_t seq)
+static void rxrpc_input_soft_rtt(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq)
{
- struct sk_buff *skb = call->cong_last_nack;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int i, new_acks = 0, retained_nacks = 0;
- rxrpc_seq_t old_seq = sp->ack.first_ack;
- u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
+ for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++)
+ if (summary->acked_serial == tq->segment_serial[ix])
+ return rxrpc_add_data_rtt_sample(call, summary, tq, ix);
+}
- if (after_eq(seq, old_seq + sp->ack.nr_acks)) {
- summary->nr_new_acks += sp->ack.nr_nacks;
- summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks);
- summary->nr_retained_nacks = 0;
- } else if (seq == old_seq) {
- summary->nr_retained_nacks = sp->ack.nr_nacks;
- } else {
- for (i = 0; i < sp->ack.nr_acks; i++) {
- if (acks[i] == RXRPC_ACK_TYPE_NACK) {
- if (before(old_seq + i, seq))
- new_acks++;
- else
- retained_nacks++;
- }
+/*
+ * Process a batch of soft ACKs specific to a transmission queue segment.
+ */
+static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned long extracted_acks,
+ int nr_reported,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t *lowest_nak)
+{
+ unsigned long old_reported = 0, flipped, new_acks = 0;
+ unsigned long a_to_n, n_to_a = 0;
+ int new, a, n;
+
+ if (tq->nr_reported_acks > 0)
+ old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks);
+
+ _enter("{%x,%lx,%d},%lx,%d,%x",
+ tq->qbase, tq->segment_acked, tq->nr_reported_acks,
+ extracted_acks, nr_reported, seq);
+
+ _debug("[%x]", tq->qbase);
+ _debug("tq %16lx %u", tq->segment_acked, tq->nr_reported_acks);
+ _debug("sack %16lx %u", extracted_acks, nr_reported);
+
+ /* See how many previously logged ACKs/NAKs have flipped. */
+ flipped = (tq->segment_acked ^ extracted_acks) & old_reported;
+ if (flipped) {
+ n_to_a = ~tq->segment_acked & flipped; /* Old NAK -> ACK */
+ a_to_n = tq->segment_acked & flipped; /* Old ACK -> NAK */
+ a = hweight_long(n_to_a);
+ n = hweight_long(a_to_n);
+ _debug("flip %16lx", flipped);
+ _debug("ntoa %16lx %d", n_to_a, a);
+ _debug("aton %16lx %d", a_to_n, n);
+ call->acks_nr_sacks += a - n;
+ call->acks_nr_snacks += n - a;
+ summary->nr_new_sacks += a;
+ summary->nr_new_snacks += n;
+ }
+
+ /* See how many new ACKs/NAKs have been acquired. */
+ new = nr_reported - tq->nr_reported_acks;
+ if (new > 0) {
+ new_acks = extracted_acks & ~old_reported;
+ if (new_acks) {
+ a = hweight_long(new_acks);
+ n = new - a;
+ _debug("new_a %16lx new=%d a=%d n=%d", new_acks, new, a, n);
+ call->acks_nr_sacks += a;
+ call->acks_nr_snacks += n;
+ summary->nr_new_sacks += a;
+ summary->nr_new_snacks += n;
+ } else {
+ call->acks_nr_snacks += new;
+ summary->nr_new_snacks += new;
}
+ }
+
+ tq->nr_reported_acks = nr_reported;
+ tq->segment_acked = extracted_acks;
+ trace_rxrpc_apply_acks(call, tq);
- summary->nr_new_acks += new_acks;
- summary->nr_retained_nacks = retained_nacks;
+ if (extracted_acks != ~0UL) {
+ rxrpc_seq_t lowest = seq + ffz(extracted_acks);
+
+ if (before(lowest, *lowest_nak))
+ *lowest_nak = lowest;
}
- return old_seq + sp->ack.nr_acks;
+ if (summary->acked_serial)
+ rxrpc_input_soft_rtt(call, summary, tq);
+
+ new_acks |= n_to_a;
+ if (new_acks)
+ rxrpc_input_rack(call, summary, tq, new_acks);
+
+ if (call->tlp_serial &&
+ rxrpc_seq_in_txq(tq, call->tlp_seq) &&
+ test_bit(call->tlp_seq - tq->qbase, &new_acks))
+ summary->tlp_probe_acked = true;
}
/*
@@ -770,39 +952,50 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
*/
static void rxrpc_input_soft_acks(struct rxrpc_call *call,
struct rxrpc_ack_summary *summary,
- struct sk_buff *skb,
- rxrpc_seq_t seq,
- rxrpc_seq_t since)
+ struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int i, old_nacks = 0;
+ struct rxrpc_txqueue *tq = call->tx_queue;
+ unsigned long extracted = ~0UL;
+ unsigned int nr = 0;
+ rxrpc_seq_t seq = call->acks_hard_ack + 1;
rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks;
u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < sp->ack.nr_acks; i++) {
- if (acks[i] == RXRPC_ACK_TYPE_ACK) {
- summary->nr_acks++;
- if (after_eq(seq, since))
- summary->nr_new_acks++;
- } else {
- summary->saw_nacks = true;
- if (before(seq, since)) {
- /* Overlap with previous ACK */
- old_nacks++;
- } else {
- summary->nr_new_nacks++;
- sp->ack.nr_nacks++;
- }
+ _enter("%x,%x,%u", tq->qbase, seq, sp->ack.nr_acks);
+
+ while (after(seq, tq->qbase + RXRPC_NR_TXQUEUE - 1))
+ tq = tq->next;
- if (before(seq, lowest_nak))
- lowest_nak = seq;
+ for (unsigned int i = 0; i < sp->ack.nr_acks; i++) {
+ /* Decant ACKs until we hit a txqueue boundary. */
+ shiftr_adv_rotr(acks, extracted);
+ if (i == 256) {
+ acks -= i;
+ i = 0;
}
seq++;
+ nr++;
+ if ((seq & RXRPC_TXQ_MASK) != 0)
+ continue;
+
+ _debug("bound %16lx %u", extracted, nr);
+
+ rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE,
+ seq - RXRPC_NR_TXQUEUE, &lowest_nak);
+ extracted = ~0UL;
+ nr = 0;
+ tq = tq->next;
+ prefetch(tq);
}
- if (lowest_nak != call->acks_lowest_nak) {
- call->acks_lowest_nak = lowest_nak;
- summary->new_low_nack = true;
+ if (nr) {
+ unsigned int nr_reported = seq & RXRPC_TXQ_MASK;
+
+ extracted >>= RXRPC_NR_TXQUEUE - nr_reported;
+ _debug("tail %16lx %u", extracted, nr_reported);
+ rxrpc_input_soft_ack_tq(call, summary, tq, extracted, nr_reported,
+ seq & ~RXRPC_TXQ_MASK, &lowest_nak);
}
/* We *can* have more nacks than we did - the peer is permitted to drop
@@ -810,9 +1003,14 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
* possible for the nack distribution to change whilst the number of
* nacks stays the same or goes down.
*/
- if (old_nacks < summary->nr_retained_nacks)
- summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
- summary->nr_retained_nacks = old_nacks;
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_snack = true;
+ }
+
+ _debug("summary A=%d+%d N=%d+%d",
+ call->acks_nr_sacks, summary->nr_new_sacks,
+ call->acks_nr_snacks, summary->nr_new_snacks);
}
/*
@@ -820,21 +1018,21 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
* with respect to the ack state conveyed by preceding ACKs.
*/
static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
- rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
+ rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt)
{
- rxrpc_seq_t base = READ_ONCE(call->acks_first_seq);
+ rxrpc_seq_t base = READ_ONCE(call->acks_hard_ack);
- if (after(first_pkt, base))
+ if (after(hard_ack, base))
return true; /* The window advanced */
- if (before(first_pkt, base))
+ if (before(hard_ack, base))
return false; /* firstPacket regressed */
if (after_eq(prev_pkt, call->acks_prev_seq))
return true; /* previousPacket hasn't regressed. */
/* Some rx implementations put a serial number in previousPacket. */
- if (after_eq(prev_pkt, base + call->tx_winsize))
+ if (after(prev_pkt, base + call->tx_winsize))
return false;
return true;
}
@@ -852,53 +1050,34 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_acktrailer trailer;
- rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;
_enter("");
offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- ack_serial = sp->hdr.serial;
- acked_serial = sp->ack.acked_serial;
- first_soft_ack = sp->ack.first_ack;
- prev_pkt = sp->ack.prev_ack;
- nr_acks = sp->ack.nr_acks;
- hard_ack = first_soft_ack - 1;
- summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
- sp->ack.reason : RXRPC_ACK__INVALID);
-
- trace_rxrpc_rx_ack(call, ack_serial, acked_serial,
- first_soft_ack, prev_pkt,
- summary.ack_reason, nr_acks);
- rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
+ summary.ack_serial = sp->hdr.serial;
+ first_soft_ack = sp->ack.first_ack;
+ prev_pkt = sp->ack.prev_ack;
+ nr_acks = sp->ack.nr_acks;
+ hard_ack = first_soft_ack - 1;
+ summary.acked_serial = sp->ack.acked_serial;
+ summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
+ sp->ack.reason : RXRPC_ACK__INVALID);
- if (acked_serial != 0) {
- switch (summary.ack_reason) {
- case RXRPC_ACK_PING_RESPONSE:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_ping_response);
- break;
- case RXRPC_ACK_REQUESTED:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_requested_ack);
- break;
- default:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_other_ack);
- break;
- }
- }
+ trace_rxrpc_rx_ack(call, sp);
+ rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
+ prefetch(call->tx_queue);
/* If we get an EXCEEDS_WINDOW ACK from the server, it probably
* indicates that the client address changed due to NAT. The server
* lost the call because it switched to a different peer.
*/
if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
- first_soft_ack == 1 &&
+ hard_ack == 0 &&
prev_pkt == 0 &&
rxrpc_is_client_call(call)) {
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
@@ -911,9 +1090,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* if we still have it buffered to the beginning.
*/
if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
- first_soft_ack == 1 &&
+ hard_ack == 0 &&
prev_pkt == 0 &&
- call->acks_hard_ack == 0 &&
+ call->tx_bottom == 0 &&
rxrpc_is_client_call(call)) {
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
0, -ENETRESET);
@@ -921,11 +1100,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
/* Discard any out-of-order or duplicate ACKs (outside lock). */
- if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
- trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->acks_first_seq,
- prev_pkt, call->acks_prev_seq);
- goto send_response;
+ if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call, summary.ack_serial, hard_ack, prev_pkt);
+ goto send_response; /* Still respond if requested. */
}
trailer.maxMTU = 0;
@@ -937,34 +1114,30 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0)
skb_condense(skb);
- if (call->cong_last_nack) {
- since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
- rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
- call->cong_last_nack = NULL;
- } else {
- summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
- call->acks_lowest_nak = first_soft_ack + nr_acks;
- since = first_soft_ack;
- }
-
- call->acks_latest_ts = skb->tstamp;
- call->acks_first_seq = first_soft_ack;
+ call->acks_latest_ts = ktime_get_real();
+ call->acks_hard_ack = hard_ack;
call->acks_prev_seq = prev_pkt;
- switch (summary.ack_reason) {
- case RXRPC_ACK_PING:
- break;
- default:
- if (acked_serial && after(acked_serial, call->acks_highest_serial))
- call->acks_highest_serial = acked_serial;
- break;
+ if (summary.acked_serial) {
+ switch (summary.ack_reason) {
+ case RXRPC_ACK_PING_RESPONSE:
+ rxrpc_complete_rtt_probe(call, call->acks_latest_ts,
+ summary.acked_serial, summary.ack_serial,
+ rxrpc_rtt_rx_ping_response);
+ break;
+ default:
+ if (after(summary.acked_serial, call->acks_highest_serial))
+ call->acks_highest_serial = summary.acked_serial;
+ summary.rtt_sample_avail = true;
+ break;
+ }
}
/* Parse rwind and mtu sizes if provided. */
if (trailer.maxMTU)
rxrpc_input_ack_trailer(call, skb, &trailer);
- if (first_soft_ack == 0)
+ if (hard_ack + 1 == 0)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
/* Ignore ACKs unless we are or have just been transmitting. */
@@ -978,13 +1151,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
goto send_response;
}
- if (before(hard_ack, call->acks_hard_ack) ||
+ if (before(hard_ack, call->tx_bottom) ||
after(hard_ack, call->tx_top))
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window);
if (nr_acks > call->tx_top - hard_ack)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
- if (after(hard_ack, call->acks_hard_ack)) {
+ if (after(hard_ack, call->tx_bottom)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
goto send_response;
@@ -994,25 +1167,30 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
- rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
- call->cong_last_nack = skb;
+ rxrpc_input_soft_acks(call, &summary, skb);
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
- summary.nr_acks == call->tx_top - hard_ack &&
+ call->acks_nr_sacks == call->tx_top - hard_ack &&
rxrpc_is_client_call(call))
- rxrpc_propose_ping(call, ack_serial,
+ rxrpc_propose_ping(call, summary.ack_serial,
rxrpc_propose_ack_ping_for_lost_reply);
- rxrpc_congestion_management(call, skb, &summary, acked_serial);
+ /* Drive the congestion management algorithm first and then RACK-TLP as
+ * the latter depends on the state/change in state in the former.
+ */
+ rxrpc_congestion_management(call, &summary);
+ rxrpc_rack_detect_loss_and_arm_timer(call, &summary);
+ rxrpc_tlp_process_ack(call, &summary);
+ if (call->tlp_serial && after_eq(summary.acked_serial, call->tlp_serial))
+ call->tlp_serial = 0;
send_response:
if (summary.ack_reason == RXRPC_ACK_PING)
- rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
+ rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, summary.ack_serial,
rxrpc_propose_ack_respond_to_ping);
else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
- rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
+ rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, summary.ack_serial,
rxrpc_propose_ack_respond_to_ack);
}
@@ -1111,5 +1289,5 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
break;
}
- rxrpc_input_call_event(call, skb);
+ rxrpc_input_call_event(call);
}
diff --git a/net/rxrpc/input_rack.c b/net/rxrpc/input_rack.c
new file mode 100644
index 000000000000..13c371261e0a
--- /dev/null
+++ b/net/rxrpc/input_rack.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RACK-TLP [RFC8958] Implementation
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ar-internal.h"
+
+static bool rxrpc_rack_sent_after(ktime_t t1, rxrpc_seq_t seq1,
+ ktime_t t2, rxrpc_seq_t seq2)
+{
+ if (ktime_after(t1, t2))
+ return true;
+ return t1 == t2 && after(seq1, seq2);
+}
+
+/*
+ * Mark a packet lost.
+ */
+static void rxrpc_rack_mark_lost(struct rxrpc_call *call,
+ struct rxrpc_txqueue *tq, unsigned int ix)
+{
+ if (__test_and_set_bit(ix, &tq->segment_lost)) {
+ if (__test_and_clear_bit(ix, &tq->segment_retransmitted))
+ call->tx_nr_resent--;
+ } else {
+ call->tx_nr_lost++;
+ }
+ tq->segment_xmit_ts[ix] = UINT_MAX;
+}
+
+/*
+ * Get the transmission time of a packet in the Tx queue.
+ */
+static ktime_t rxrpc_get_xmit_ts(const struct rxrpc_txqueue *tq, unsigned int ix)
+{
+ if (tq->segment_xmit_ts[ix] == UINT_MAX)
+ return KTIME_MAX;
+ return ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
+}
+
+/*
+ * Get a bitmask of nack bits for a queue segment and mask off any that aren't
+ * yet reported.
+ */
+static unsigned long rxrpc_tq_nacks(const struct rxrpc_txqueue *tq)
+{
+ unsigned long nacks = ~tq->segment_acked;
+
+ if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE)
+ nacks &= (1UL << tq->nr_reported_acks) - 1;
+ return nacks;
+}
+
+/*
+ * Update the RACK state for the most recently sent packet that has been
+ * delivered [RFC8958 6.2 Step 2].
+ */
+static void rxrpc_rack_update(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned int ix)
+{
+ rxrpc_seq_t seq = tq->qbase + ix;
+ ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
+ ktime_t rtt = ktime_sub(call->acks_latest_ts, xmit_ts);
+
+ if (__test_and_clear_bit(ix, &tq->segment_lost))
+ call->tx_nr_lost--;
+
+ if (test_bit(ix, &tq->segment_retransmitted)) {
+ /* Use Rx.serial instead of TCP.ACK.ts_option.echo_reply. */
+ if (before(call->acks_highest_serial, tq->segment_serial[ix]))
+ return;
+ if (rtt < minmax_get(&call->min_rtt))
+ return;
+ }
+
+ /* The RACK algorithm requires the segment ACKs to be traversed in
+ * order of segment transmission - but the only thing this seems to
+ * matter for is that RACK.rtt is set to the rtt of the most recently
+ * transmitted segment. We should be able to achieve the same by only
+ * setting RACK.rtt if the xmit time is greater.
+ */
+ if (ktime_after(xmit_ts, call->rack_rtt_ts)) {
+ call->rack_rtt = rtt;
+ call->rack_rtt_ts = xmit_ts;
+ }
+
+ if (rxrpc_rack_sent_after(xmit_ts, seq, call->rack_xmit_ts, call->rack_end_seq)) {
+ call->rack_rtt = rtt;
+ call->rack_xmit_ts = xmit_ts;
+ call->rack_end_seq = seq;
+ }
+}
+
+/*
+ * Detect data segment reordering [RFC8958 6.2 Step 3].
+ */
+static void rxrpc_rack_detect_reordering(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned int ix)
+{
+ rxrpc_seq_t seq = tq->qbase + ix;
+
+ /* Track the highest sequence number so far ACK'd. This is not
+ * necessarily the same as ack.firstPacket + ack.nAcks - 1 as the peer
+ * could put a NACK in the last SACK slot.
+ */
+ if (after(seq, call->rack_fack))
+ call->rack_fack = seq;
+ else if (before(seq, call->rack_fack) &&
+ test_bit(ix, &tq->segment_retransmitted))
+ call->rack_reordering_seen = true;
+}
+
+void rxrpc_input_rack_one(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned int ix)
+{
+ rxrpc_rack_update(call, summary, tq, ix);
+ rxrpc_rack_detect_reordering(call, summary, tq, ix);
+}
+
+void rxrpc_input_rack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned long new_acks)
+{
+ while (new_acks) {
+ unsigned int ix = __ffs(new_acks);
+
+ __clear_bit(ix, &new_acks);
+ rxrpc_input_rack_one(call, summary, tq, ix);
+ }
+
+ trace_rxrpc_rack_update(call, summary);
+}
+
+/*
+ * Update the reordering window [RFC8958 6.2 Step 4]. Returns the updated
+ * duration of the reordering window.
+ *
+ * Note that the Rx protocol doesn't have a 'DSACK option' per se, but ACKs can
+ * be given a 'DUPLICATE' reason with the serial number referring to the
+ * duplicated DATA packet. Rx does not inform as to whether this was a
+ * reception of the same packet twice or of a retransmission of a packet we
+ * already received (though this could be determined by the transmitter based
+ * on the serial number).
+ */
+static ktime_t rxrpc_rack_update_reo_wnd(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary)
+{
+ rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
+ rxrpc_seq_t snd_nxt = call->tx_transmitted + 1; /* Next seq to be sent */
+ bool have_dsack_option = summary->ack_reason == RXRPC_ACK_DUPLICATE;
+ int dup_thresh = 3;
+
+ /* DSACK-based reordering window adaptation */
+ if (!call->rack_dsack_round_none &&
+ after_eq(snd_una, call->rack_dsack_round))
+ call->rack_dsack_round_none = true;
+
+ /* Grow the reordering window per round that sees DSACK. Reset the
+ * window after 16 DSACK-free recoveries.
+ */
+ if (call->rack_dsack_round_none && have_dsack_option) {
+ call->rack_dsack_round_none = false;
+ call->rack_dsack_round = snd_nxt;
+ call->rack_reo_wnd_mult++;
+ call->rack_reo_wnd_persist = 16;
+ } else if (summary->exiting_fast_or_rto_recovery) {
+ call->rack_reo_wnd_persist--;
+ if (call->rack_reo_wnd_persist <= 0)
+ call->rack_reo_wnd_mult = 1;
+ }
+
+ if (!call->rack_reordering_seen) {
+ if (summary->in_fast_or_rto_recovery)
+ return 0;
+ if (call->acks_nr_sacks >= dup_thresh)
+ return 0;
+ }
+
+ return us_to_ktime(umin(call->rack_reo_wnd_mult * minmax_get(&call->min_rtt) / 4,
+ call->srtt_us >> 3));
+}
+
+/*
+ * Detect losses [RFC8958 6.2 Step 5].
+ */
+static ktime_t rxrpc_rack_detect_loss(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary)
+{
+ struct rxrpc_txqueue *tq;
+ ktime_t timeout = 0, lost_after, now = ktime_get_real();
+
+ call->rack_reo_wnd = rxrpc_rack_update_reo_wnd(call, summary);
+ lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
+ trace_rxrpc_rack_scan_loss(call);
+
+ for (tq = call->tx_queue; tq; tq = tq->next) {
+ unsigned long nacks = rxrpc_tq_nacks(tq);
+
+ if (after(tq->qbase, call->tx_transmitted))
+ break;
+ trace_rxrpc_rack_scan_loss_tq(call, tq, nacks);
+
+ /* Skip ones marked lost but not yet retransmitted */
+ nacks &= ~tq->segment_lost | tq->segment_retransmitted;
+
+ while (nacks) {
+ unsigned int ix = __ffs(nacks);
+ rxrpc_seq_t seq = tq->qbase + ix;
+ ktime_t remaining;
+ ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
+
+ __clear_bit(ix, &nacks);
+
+ if (rxrpc_rack_sent_after(call->rack_xmit_ts, call->rack_end_seq,
+ xmit_ts, seq)) {
+ remaining = ktime_sub(ktime_add(xmit_ts, lost_after), now);
+ if (remaining <= 0) {
+ rxrpc_rack_mark_lost(call, tq, ix);
+ trace_rxrpc_rack_detect_loss(call, summary, seq);
+ } else {
+ timeout = max(remaining, timeout);
+ }
+ }
+ }
+ }
+
+ return timeout;
+}
+
+/*
+ * Detect losses and set a timer to retry the detection [RFC8958 6.2 Step 5].
+ */
+void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary)
+{
+ ktime_t timeout = rxrpc_rack_detect_loss(call, summary);
+
+ if (timeout) {
+ call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RACK_REORDER;
+ call->rack_timo_at = ktime_add(ktime_get_real(), timeout);
+ trace_rxrpc_rack_timer(call, timeout, false);
+ trace_rxrpc_timer_set(call, timeout, rxrpc_timer_trace_rack_reo);
+ }
+}
+
+/*
+ * Handle RACK-TLP RTO expiration [RFC8958 6.3].
+ */
+static void rxrpc_rack_mark_losses_on_rto(struct rxrpc_call *call)
+{
+ struct rxrpc_txqueue *tq;
+ rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
+ ktime_t lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
+ ktime_t deadline = ktime_sub(ktime_get_real(), lost_after);
+
+ for (tq = call->tx_queue; tq; tq = tq->next) {
+ unsigned long unacked = ~tq->segment_acked;
+
+ trace_rxrpc_rack_mark_loss_tq(call, tq);
+ while (unacked) {
+ unsigned int ix = __ffs(unacked);
+ rxrpc_seq_t seq = tq->qbase + ix;
+ ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
+
+ if (after(seq, call->tx_transmitted))
+ return;
+ __clear_bit(ix, &unacked);
+
+ if (seq == snd_una ||
+ ktime_before(xmit_ts, deadline))
+ rxrpc_rack_mark_lost(call, tq, ix);
+ }
+ }
+}
+
+/*
+ * Calculate the TLP loss probe timeout (PTO) [RFC8958 7.2].
+ */
+ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now)
+{
+ unsigned int flight_size = rxrpc_tx_in_flight(call);
+ ktime_t rto_at = ktime_add(call->tx_last_sent,
+ rxrpc_get_rto_backoff(call, false));
+ ktime_t pto;
+
+ if (call->rtt_count > 0) {
+ /* Use 2*SRTT as the timeout. */
+ pto = ns_to_ktime(call->srtt_us * NSEC_PER_USEC / 4);
+ if (flight_size)
+ pto = ktime_add(pto, call->tlp_max_ack_delay);
+ } else {
+ pto = NSEC_PER_SEC;
+ }
+
+ if (ktime_after(ktime_add(now, pto), rto_at))
+ pto = ktime_sub(rto_at, now);
+ return pto;
+}
+
+/*
+ * Send a TLP loss probe on PTO expiration [RFC8958 7.3].
+ */
+void rxrpc_tlp_send_probe(struct rxrpc_call *call)
+{
+ unsigned int in_flight = rxrpc_tx_in_flight(call);
+
+ if (after_eq(call->acks_hard_ack, call->tx_transmitted))
+ return; /* Everything we transmitted has been acked. */
+
+ /* There must be no other loss probe still in flight and we need to
+ * have taken a new RTT sample since last probe or the start of
+ * connection.
+ */
+ if (!call->tlp_serial &&
+ call->tlp_rtt_taken != call->rtt_taken) {
+ call->tlp_is_retrans = false;
+ if (after(call->send_top, call->tx_transmitted) &&
+ rxrpc_tx_window_space(call) > 0) {
+ /* Transmit the lowest-sequence unsent DATA */
+ call->tx_last_serial = 0;
+ rxrpc_transmit_some_data(call, 1, rxrpc_txdata_tlp_new_data);
+ call->tlp_serial = call->tx_last_serial;
+ call->tlp_seq = call->tx_transmitted;
+ trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_transmit_new);
+ in_flight = rxrpc_tx_in_flight(call);
+ } else {
+ /* Retransmit the highest-sequence DATA sent */
+ call->tx_last_serial = 0;
+ rxrpc_resend_tlp(call);
+ call->tlp_is_retrans = true;
+ trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_retransmit);
+ }
+ } else {
+ trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_busy);
+ }
+
+ if (in_flight != 0) {
+ ktime_t rto = rxrpc_get_rto_backoff(call, false);
+
+ call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RTO;
+ call->rack_timo_at = ktime_add(ktime_get_real(), rto);
+ trace_rxrpc_rack_timer(call, rto, false);
+ trace_rxrpc_timer_set(call, rto, rxrpc_timer_trace_rack_rto);
+ }
+}
+
+/*
+ * Detect losses using the ACK of a TLP loss probe [RFC8958 7.4].
+ */
+void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary)
+{
+ if (!call->tlp_serial || after(call->tlp_seq, call->acks_hard_ack))
+ return;
+
+ if (!call->tlp_is_retrans) {
+ /* TLP of new data delivered */
+ trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_new_data);
+ call->tlp_serial = 0;
+ } else if (summary->ack_reason == RXRPC_ACK_DUPLICATE &&
+ summary->acked_serial == call->tlp_serial) {
+ /* General Case: Detected packet losses using RACK [7.4.1] */
+ trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_dup_acked);
+ call->tlp_serial = 0;
+ } else if (after(call->acks_hard_ack, call->tlp_seq)) {
+ /* Repaired the single loss */
+ trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_hard_beyond);
+ call->tlp_serial = 0;
+ // TODO: Invoke congestion control to react to the loss
+ // event the probe has repaired
+ } else if (summary->tlp_probe_acked) {
+ trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_acked);
+ /* Special Case: Detected a single loss repaired by the loss
+ * probe [7.4.2]
+ */
+ call->tlp_serial = 0;
+ } else {
+ trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_incomplete);
+ }
+}
+
+/*
+ * Handle RACK timer expiration; returns true to request a resend.
+ */
+void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by)
+{
+ struct rxrpc_ack_summary summary = {};
+ enum rxrpc_rack_timer_mode mode = call->rack_timer_mode;
+
+ trace_rxrpc_rack_timer(call, overran_by, true);
+ call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF;
+
+ switch (mode) {
+ case RXRPC_CALL_RACKTIMER_RACK_REORDER:
+ rxrpc_rack_detect_loss_and_arm_timer(call, &summary);
+ break;
+ case RXRPC_CALL_RACKTIMER_TLP_PTO:
+ rxrpc_tlp_send_probe(call);
+ break;
+ case RXRPC_CALL_RACKTIMER_RTO:
+ // Might need to poke the congestion algo in some way
+ rxrpc_rack_mark_losses_on_rto(call);
+ break;
+ //case RXRPC_CALL_RACKTIMER_ZEROWIN:
+ default:
+ pr_warn("Unexpected rack timer %u", call->rack_timer_mode);
+ }
+}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 6716c021a532..e068f9b79d02 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -19,11 +19,14 @@ static int none_init_connection_security(struct rxrpc_connection *conn,
*/
static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp);
+ return rxrpc_alloc_data_txbuf(call, umin(remain, RXRPC_JUMBO_DATALEN), 1, gfp);
}
static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
+ txb->pkt_len = txb->len;
+ if (txb->len == RXRPC_JUMBO_DATALEN)
+ txb->jumboable = true;
return 0;
}
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 07c74c77d802..2925c7fc82cf 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -338,7 +338,6 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
unsigned int channel;
- bool ret;
if (sp->hdr.securityIndex != conn->security_ix)
return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security,
@@ -364,6 +363,12 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
if (sp->hdr.callNumber == 0)
return rxrpc_input_conn_packet(conn, skb);
+ /* Deal with path MTU discovery probing. */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK &&
+ conn->pmtud_probe &&
+ after_eq(sp->ack.acked_serial, conn->pmtud_probe))
+ rxrpc_input_probe_for_pmtud(conn, sp->ack.acked_serial, false);
+
/* Call-bound packets are routed by connection channel. */
channel = sp->hdr.cid & RXRPC_CHANNELMASK;
chan = &conn->channels[channel];
@@ -419,9 +424,9 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
peer_srx, skb);
}
- ret = rxrpc_input_call_event(call, skb);
+ rxrpc_queue_rx_call_packet(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return ret;
+ return true;
}
/*
@@ -438,6 +443,8 @@ int rxrpc_io_thread(void *data)
ktime_t now;
#endif
bool should_stop;
+ LIST_HEAD(conn_attend_q);
+ LIST_HEAD(call_attend_q);
complete(&local->io_thread_ready);
@@ -448,43 +455,26 @@ int rxrpc_io_thread(void *data)
for (;;) {
rxrpc_inc_stat(local->rxnet, stat_io_loop);
- /* Deal with connections that want immediate attention. */
- conn = list_first_entry_or_null(&local->conn_attend_q,
- struct rxrpc_connection,
- attend_link);
- if (conn) {
- spin_lock_bh(&local->lock);
- list_del_init(&conn->attend_link);
- spin_unlock_bh(&local->lock);
-
- rxrpc_input_conn_event(conn, NULL);
- rxrpc_put_connection(conn, rxrpc_conn_put_poke);
- continue;
+ /* Inject a delay into packets if requested. */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ now = ktime_get_real();
+ while ((skb = skb_peek(&local->rx_delay_queue))) {
+ if (ktime_before(now, skb->tstamp))
+ break;
+ skb = skb_dequeue(&local->rx_delay_queue);
+ skb_queue_tail(&local->rx_queue, skb);
}
+#endif
- if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
- &local->client_conn_flags))
- rxrpc_discard_expired_client_conns(local);
-
- /* Deal with calls that want immediate attention. */
- if ((call = list_first_entry_or_null(&local->call_attend_q,
- struct rxrpc_call,
- attend_link))) {
- spin_lock_bh(&local->lock);
- list_del_init(&call->attend_link);
- spin_unlock_bh(&local->lock);
-
- trace_rxrpc_call_poked(call);
- rxrpc_input_call_event(call, NULL);
- rxrpc_put_call(call, rxrpc_call_put_poke);
- continue;
+ if (!skb_queue_empty(&local->rx_queue)) {
+ spin_lock_irq(&local->rx_queue.lock);
+ skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
+ spin_unlock_irq(&local->rx_queue.lock);
+ trace_rxrpc_iothread_rx(local, skb_queue_len(&rx_queue));
}
- if (!list_empty(&local->new_client_calls))
- rxrpc_connect_client_calls(local);
-
- /* Process received packets and errors. */
- if ((skb = __skb_dequeue(&rx_queue))) {
+ /* Distribute packets and errors. */
+ while ((skb = __skb_dequeue(&rx_queue))) {
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
switch (skb->mark) {
case RXRPC_SKB_MARK_PACKET:
@@ -508,27 +498,46 @@ int rxrpc_io_thread(void *data)
rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
break;
}
- continue;
}
- /* Inject a delay into packets if requested. */
-#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
- now = ktime_get_real();
- while ((skb = skb_peek(&local->rx_delay_queue))) {
- if (ktime_before(now, skb->tstamp))
- break;
- skb = skb_dequeue(&local->rx_delay_queue);
- skb_queue_tail(&local->rx_queue, skb);
+ /* Deal with connections that want immediate attention. */
+ spin_lock_irq(&local->lock);
+ list_splice_tail_init(&local->conn_attend_q, &conn_attend_q);
+ spin_unlock_irq(&local->lock);
+
+ while ((conn = list_first_entry_or_null(&conn_attend_q,
+ struct rxrpc_connection,
+ attend_link))) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&conn->attend_link);
+ spin_unlock_bh(&local->lock);
+ rxrpc_input_conn_event(conn, NULL);
+ rxrpc_put_connection(conn, rxrpc_conn_put_poke);
}
-#endif
- if (!skb_queue_empty(&local->rx_queue)) {
- spin_lock_irq(&local->rx_queue.lock);
- skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
- spin_unlock_irq(&local->rx_queue.lock);
- continue;
+ if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags))
+ rxrpc_discard_expired_client_conns(local);
+
+ /* Deal with calls that want immediate attention. */
+ spin_lock_irq(&local->lock);
+ list_splice_tail_init(&local->call_attend_q, &call_attend_q);
+ spin_unlock_irq(&local->lock);
+
+ while ((call = list_first_entry_or_null(&call_attend_q,
+ struct rxrpc_call,
+ attend_link))) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&call->attend_link);
+ spin_unlock_bh(&local->lock);
+ trace_rxrpc_call_poked(call);
+ rxrpc_input_call_event(call);
+ rxrpc_put_call(call, rxrpc_call_put_poke);
}
+ if (!list_empty(&local->new_client_calls))
+ rxrpc_connect_client_calls(local);
+
set_current_state(TASK_INTERRUPTIBLE);
should_stop = kthread_should_stop();
if (!skb_queue_empty(&local->rx_queue) ||
@@ -558,7 +567,7 @@ int rxrpc_io_thread(void *data)
}
timeout = nsecs_to_jiffies(delay_ns);
- timeout = max(timeout, 1UL);
+ timeout = umax(timeout, 1);
schedule_timeout(timeout);
__set_current_state(TASK_RUNNING);
continue;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 2792d2304605..a74a4b43904f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -215,9 +215,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
/* we want to set the don't fragment bit */
rxrpc_local_dont_fragment(local, true);
-
- /* We want receive timestamps. */
- sock_enable_timestamps(usk);
break;
default:
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 657cf35089a6..8fcc8139d771 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -46,13 +46,13 @@ unsigned int rxrpc_rx_window_size = 255;
* Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
* made by gluing normal packets together that we're willing to handle.
*/
-unsigned int rxrpc_rx_mtu = 5692;
+unsigned int rxrpc_rx_mtu = RXRPC_JUMBO(46);
/*
* The maximum number of fragments in a received jumbo packet that we tell the
* sender that we're willing to handle.
*/
-unsigned int rxrpc_rx_jumbo_max = 4;
+unsigned int rxrpc_rx_jumbo_max = 46;
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
/*
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5ea9601efd05..6f7a125d6e90 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -72,22 +72,96 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
}
/*
+ * Allocate transmission buffers for an ACK and attach them to local->kv[].
+ */
+static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size)
+{
+ struct rxrpc_wire_header *whdr;
+ struct rxrpc_acktrailer *trailer;
+ struct rxrpc_ackpacket *ack;
+ struct kvec *kv = call->local->kvec;
+ gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
+ void *buf, *buf2 = NULL;
+ u8 *filler;
+
+ buf = page_frag_alloc(&call->local->tx_alloc,
+ sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
+ if (!buf)
+ return -ENOMEM;
+
+ if (sack_size) {
+ buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
+ if (!buf2) {
+ page_frag_free(buf);
+ return -ENOMEM;
+ }
+ }
+
+ whdr = buf;
+ ack = buf + sizeof(*whdr);
+ filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
+ trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
+
+ kv[0].iov_base = whdr;
+ kv[0].iov_len = sizeof(*whdr) + sizeof(*ack);
+ kv[1].iov_base = buf2;
+ kv[1].iov_len = sack_size;
+ kv[2].iov_base = filler;
+ kv[2].iov_len = 3 + sizeof(*trailer);
+ return 3; /* Number of kvec[] used. */
+}
+
+static void rxrpc_free_ack(struct rxrpc_call *call)
+{
+ page_frag_free(call->local->kvec[0].iov_base);
+ if (call->local->kvec[1].iov_base)
+ page_frag_free(call->local->kvec[1].iov_base);
+}
+
+/*
+ * Record the beginning of an RTT probe.
+ */
+static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
+ ktime_t now, enum rxrpc_rtt_tx_trace why)
+{
+ unsigned long avail = call->rtt_avail;
+ int rtt_slot = 9;
+
+ if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK))
+ goto no_slot;
+
+ rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK);
+ if (!test_and_clear_bit(rtt_slot, &call->rtt_avail))
+ goto no_slot;
+
+ call->rtt_serial[rtt_slot] = serial;
+ call->rtt_sent_at[rtt_slot] = now;
+ smp_wmb(); /* Write data before avail bit */
+ set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
+
+ trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
+ return;
+
+no_slot:
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
+}
+
+/*
* Fill out an ACK packet.
*/
-static void rxrpc_fill_out_ack(struct rxrpc_call *call,
- struct rxrpc_txbuf *txb,
- u8 ack_reason,
- rxrpc_serial_t serial)
+static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason,
+ rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial)
{
- struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
- struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3;
+ struct kvec *kv = call->local->kvec;
+ struct rxrpc_wire_header *whdr = kv[0].iov_base;
+ struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
- unsigned int qsize, sack, wrap, to;
+ unsigned int qsize, sack, wrap, to, max_mtu, if_mtu;
rxrpc_seq_t window, wtop;
+ ktime_t now = ktime_get_real();
int rsize;
- u32 mtu, jmax;
- u8 *filler = txb->kvec[2].iov_base;
- u8 *sackp = txb->kvec[1].iov_base;
+ u8 *filler = kv[2].iov_base;
+ u8 *sackp = kv[1].iov_base;
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
@@ -95,14 +169,25 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call,
wtop = call->ackr_wtop;
sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
+ *_ack_serial = rxrpc_get_next_serial(call->conn);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->serial = htonl(*_ack_serial);
whdr->seq = 0;
whdr->type = RXRPC_PACKET_TYPE_ACK;
- txb->flags |= RXRPC_SLOW_START_OK;
+ whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
ack->bufferSpace = 0;
ack->maxSkew = 0;
ack->firstPacket = htonl(window);
ack->previousPacket = htonl(call->rx_highest_seq);
- ack->serial = htonl(serial);
+ ack->serial = htonl(serial_to_ack);
ack->reason = ack_reason;
ack->nAcks = wtop - window;
filler[0] = 0;
@@ -110,15 +195,13 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call,
filler[2] = 0;
if (ack_reason == RXRPC_ACK_PING)
- txb->flags |= RXRPC_REQUEST_ACK;
+ whdr->flags |= RXRPC_REQUEST_ACK;
if (after(wtop, window)) {
- txb->len += ack->nAcks;
- txb->kvec[1].iov_base = sackp;
- txb->kvec[1].iov_len = ack->nAcks;
+ kv[1].iov_len = ack->nAcks;
wrap = RXRPC_SACK_SIZE - sack;
- to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE);
+ to = umin(ack->nAcks, RXRPC_SACK_SIZE);
if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
@@ -132,56 +215,42 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call,
ack->reason = RXRPC_ACK_IDLE;
}
- mtu = call->peer->if_mtu;
- mtu -= call->peer->hdrsize;
- jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
- txb->ack_rwind = rsize;
- trailer->maxMTU = htonl(rxrpc_rx_mtu);
- trailer->ifMTU = htonl(mtu);
- trailer->rwind = htonl(rsize);
- trailer->jumbo_max = htonl(jmax);
-}
-
-/*
- * Record the beginning of an RTT probe.
- */
-static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
- ktime_t now, enum rxrpc_rtt_tx_trace why)
-{
- unsigned long avail = call->rtt_avail;
- int rtt_slot = 9;
-
- if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK))
- goto no_slot;
-
- rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK);
- if (!test_and_clear_bit(rtt_slot, &call->rtt_avail))
- goto no_slot;
- call->rtt_serial[rtt_slot] = serial;
- call->rtt_sent_at[rtt_slot] = now;
- smp_wmb(); /* Write data before avail bit */
- set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
+ if_mtu = call->peer->if_mtu - call->peer->hdrsize;
+ if (call->peer->ackr_adv_pmtud) {
+ max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu);
+ } else {
+ if_mtu = umin(if_mtu, 1444);
+ max_mtu = if_mtu;
+ }
- trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
- return;
+ trailer->maxMTU = htonl(max_mtu);
+ trailer->ifMTU = htonl(if_mtu);
+ trailer->rwind = htonl(rsize);
+ trailer->jumbo_max = 0; /* Advertise pmtu discovery */
-no_slot:
- trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
+ if (ack_reason == RXRPC_ACK_PING)
+ rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping);
+ if (whdr->flags & RXRPC_REQUEST_ACK)
+ call->rtt_last_req = now;
+ rxrpc_set_keepalive(call, now);
+ return nr_kv;
}
/*
* Transmit an ACK packet.
*/
-static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
{
- struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct kvec *kv = call->local->kvec;
+ struct rxrpc_wire_header *whdr = kv[0].iov_base;
+ struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
struct rxrpc_connection *conn;
struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
struct msghdr msg;
- ktime_t now;
int ret;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
@@ -195,33 +264,34 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
msg.msg_controllen = 0;
msg.msg_flags = MSG_SPLICE_PAGES;
- whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
-
- txb->serial = rxrpc_get_next_serial(conn);
- whdr->serial = htonl(txb->serial);
- trace_rxrpc_tx_ack(call->debug_id, txb->serial,
+ trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(ack->firstPacket),
ntohl(ack->serial), ack->reason, ack->nAcks,
- txb->ack_rwind);
+ ntohl(trailer->rwind), why);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
- iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len);
- rxrpc_local_dont_fragment(conn->local, false);
- ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len);
+ rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe);
+
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0) {
- trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret,
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_ack);
+ if (why == rxrpc_propose_ack_ping_for_mtu_probe &&
+ ret == -EMSGSIZE)
+ rxrpc_input_probe_for_pmtud(conn, serial, true);
} else {
trace_rxrpc_tx_packet(call->debug_id, whdr,
rxrpc_tx_point_call_ack);
- now = ktime_get_real();
- if (ack->reason == RXRPC_ACK_PING)
- rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping);
- if (txb->flags & RXRPC_REQUEST_ACK)
- call->peer->rtt_last_req = now;
- rxrpc_set_keepalive(call, now);
+ if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
+ call->peer->pmtud_pending = false;
+ call->peer->pmtud_probing = true;
+ call->conn->pmtud_probe = serial;
+ call->conn->pmtud_call = call->debug_id;
+ trace_rxrpc_pmtud_tx(call);
+ }
}
rxrpc_tx_backoff(call, ret);
}
@@ -230,31 +300,62 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
* Queue an ACK for immediate transmission.
*/
void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
- rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
+ rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why)
{
- struct rxrpc_txbuf *txb;
+ struct kvec *kv = call->local->kvec;
+ rxrpc_serial_t ack_serial;
+ size_t len;
+ int nr_kv;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return;
rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
- txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window);
- if (!txb) {
+ nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window);
+ if (nr_kv < 0) {
kleave(" = -ENOMEM");
return;
}
- txb->ack_why = why;
+ nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial);
+ len = kv[0].iov_len;
+ len += kv[1].iov_len;
+ len += kv[2].iov_len;
+
+ /* Extend a path MTU probe ACK. */
+ if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
+ size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header);
+
+ if (len > probe_mtu)
+ goto skip;
+ while (len < probe_mtu) {
+ size_t part = umin(probe_mtu - len, PAGE_SIZE);
+
+ kv[nr_kv].iov_base = page_address(ZERO_PAGE(0));
+ kv[nr_kv].iov_len = part;
+ len += part;
+ nr_kv++;
+ }
+ }
- rxrpc_fill_out_ack(call, txb, ack_reason, serial);
call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
- trace_rxrpc_send_ack(call, why, ack_reason, serial);
- rxrpc_send_ack_packet(call, txb);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
+ trace_rxrpc_send_ack(call, why, ack_reason, ack_serial);
+ rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why);
+skip:
+ rxrpc_free_ack(call);
+}
+
+/*
+ * Send an ACK probe for path MTU discovery.
+ */
+void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call)
+{
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_mtu_probe);
}
/*
@@ -324,14 +425,21 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
/*
* Prepare a (sub)packet for transmission.
*/
-static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb,
- rxrpc_serial_t serial)
+static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
+ struct rxrpc_send_data_req *req,
+ struct rxrpc_txbuf *txb,
+ rxrpc_serial_t serial, int subpkt)
{
struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo);
enum rxrpc_req_ack_trace why;
struct rxrpc_connection *conn = call->conn;
+ struct kvec *kv = &call->local->kvec[subpkt];
+ size_t len = txb->pkt_len;
+ bool last;
+ u8 flags;
- _enter("%x,{%d}", txb->seq, txb->len);
+ _enter("%x,%zd", txb->seq, len);
txb->serial = serial;
@@ -339,6 +447,15 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t
txb->seq == 1)
whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
+ txb->flags &= ~RXRPC_REQUEST_ACK;
+ flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
+ last = txb->flags & RXRPC_LAST_PACKET;
+
+ if (subpkt < req->n - 1) {
+ len = RXRPC_JUMBO_DATALEN;
+ goto dont_set_request_ack;
+ }
+
/* If our RTT cache needs working on, request an ACK. Also request
* ACKs if a DATA packet appears to have been lost.
*
@@ -346,113 +463,188 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t
* service call, lest OpenAFS incorrectly send us an ACK with some
* soft-ACKs in it and then never follow up with a proper hard ACK.
*/
- if (txb->flags & RXRPC_REQUEST_ACK)
- why = rxrpc_reqack_already_on;
- else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb))
+ if (last && rxrpc_sending_to_client(txb))
why = rxrpc_reqack_no_srv_last;
else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
why = rxrpc_reqack_ack_lost;
else if (txb->flags & RXRPC_TXBUF_RESENT)
why = rxrpc_reqack_retrans;
- else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2)
+ else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND)
why = rxrpc_reqack_slow_start;
else if (call->tx_winsize <= 2)
why = rxrpc_reqack_small_txwin;
- else if (call->peer->rtt_count < 3 && txb->seq & 1)
+ else if (call->rtt_count < 3)
why = rxrpc_reqack_more_rtt;
- else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))
+ else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real()))
why = rxrpc_reqack_old_rtt;
+ else if (!last && !after(READ_ONCE(call->send_top), txb->seq))
+ why = rxrpc_reqack_app_stall;
else
goto dont_set_request_ack;
rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
- if (why != rxrpc_reqack_no_srv_last)
- txb->flags |= RXRPC_REQUEST_ACK;
+ if (why != rxrpc_reqack_no_srv_last) {
+ flags |= RXRPC_REQUEST_ACK;
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial);
+ call->rtt_last_req = req->now;
+ }
dont_set_request_ack:
- whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
- whdr->serial = htonl(txb->serial);
- whdr->cksum = txb->cksum;
+ /* The jumbo header overlays the wire header in the txbuf. */
+ if (subpkt < req->n - 1)
+ flags |= RXRPC_JUMBO_PACKET;
+ else
+ flags &= ~RXRPC_JUMBO_PACKET;
+ if (subpkt == 0) {
+ whdr->flags = flags;
+ whdr->serial = htonl(txb->serial);
+ whdr->cksum = txb->cksum;
+ whdr->serviceId = htons(conn->service_id);
+ kv->iov_base = whdr;
+ len += sizeof(*whdr);
+ } else {
+ jumbo->flags = flags;
+ jumbo->pad = 0;
+ jumbo->cksum = txb->cksum;
+ kv->iov_base = jumbo;
+ len += sizeof(*jumbo);
+ }
- trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false);
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace);
+ kv->iov_len = len;
+ return len;
}
/*
- * Prepare a packet for transmission.
+ * Prepare a transmission queue object for initial transmission. Returns the
+ * number of microseconds since the transmission queue base timestamp.
*/
-static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq,
+ struct rxrpc_send_data_req *req)
{
- rxrpc_serial_t serial;
-
- /* Each transmission of a Tx packet needs a new serial number */
- serial = rxrpc_get_next_serial(call->conn);
-
- rxrpc_prepare_data_subpacket(call, txb, serial);
-
- return txb->len;
+ if (!tq)
+ return 0;
+ if (tq->xmit_ts_base == KTIME_MIN) {
+ tq->xmit_ts_base = req->now;
+ return 0;
+ }
+ return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base));
}
/*
- * Set timeouts after transmitting a packet.
+ * Prepare a (jumbo) packet for transmission.
*/
-static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req)
{
- ktime_t now = ktime_get_real();
- bool ack_requested = txb->flags & RXRPC_REQUEST_ACK;
+ struct rxrpc_txqueue *tq = req->tq;
+ rxrpc_serial_t serial;
+ unsigned int xmit_ts;
+ rxrpc_seq_t seq = req->seq;
+ size_t len = 0;
+ bool start_tlp = false;
- call->tx_last_sent = now;
- txb->last_sent = now;
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit);
- if (ack_requested) {
- rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_data);
+ /* Each transmission of a Tx packet needs a new serial number */
+ serial = rxrpc_get_next_serials(call->conn, req->n);
+
+ call->tx_last_serial = serial + req->n - 1;
+ call->tx_last_sent = req->now;
+ xmit_ts = rxrpc_prepare_txqueue(tq, req);
+ prefetch(tq->next);
+
+ for (int i = 0;;) {
+ int ix = seq & RXRPC_TXQ_MASK;
+ struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
+
+ _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
+
+ /* Record (re-)transmission for RACK [RFC8985 6.1]. */
+ if (__test_and_clear_bit(ix, &tq->segment_lost))
+ call->tx_nr_lost--;
+ if (req->retrans) {
+ __set_bit(ix, &tq->ever_retransmitted);
+ __set_bit(ix, &tq->segment_retransmitted);
+ call->tx_nr_resent++;
+ } else {
+ call->tx_nr_sent++;
+ start_tlp = true;
+ }
+ tq->segment_xmit_ts[ix] = xmit_ts;
+ tq->segment_serial[ix] = serial;
+ if (i + 1 == req->n)
+ /* Only sample the last subpacket in a jumbo. */
+ __set_bit(ix, &tq->rtt_samples);
+ len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i);
+ serial++;
+ seq++;
+ i++;
+ if (i >= req->n)
+ break;
+ if (!(seq & RXRPC_TXQ_MASK)) {
+ tq = tq->next;
+ trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance);
+ xmit_ts = rxrpc_prepare_txqueue(tq, req);
+ }
+ }
- call->peer->rtt_last_req = now;
- if (call->peer->rtt_count > 1) {
- ktime_t delay = rxrpc_get_rto_backoff(call->peer, false);
+ /* Set timeouts */
+ if (req->tlp_probe) {
+ /* Sending TLP loss probe [RFC8985 7.3]. */
+ call->tlp_serial = serial - 1;
+ call->tlp_seq = seq - 1;
+ } else if (start_tlp) {
+ /* Schedule TLP loss probe [RFC8985 7.2]. */
+ ktime_t pto;
+
+ if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+ /* The first packet may take longer to elicit a response. */
+ pto = NSEC_PER_SEC;
+ else
+ pto = rxrpc_tlp_calc_pto(call, req->now);
- call->ack_lost_at = ktime_add(now, delay);
- trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack);
- }
+ call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO;
+ call->rack_timo_at = ktime_add(req->now, pto);
+ trace_rxrpc_rack_timer(call, pto, false);
+ trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto);
}
if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
- call->expect_rx_by = ktime_add(now, delay);
+ call->expect_rx_by = ktime_add(req->now, delay);
trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
}
- rxrpc_set_keepalive(call, now);
+ rxrpc_set_keepalive(call, req->now);
+ return len;
}
/*
- * send a packet through the transport endpoint
+ * Send one or more packets through the transport endpoint
*/
-static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req)
{
- struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
struct rxrpc_connection *conn = call->conn;
enum rxrpc_tx_point frag;
+ struct rxrpc_txqueue *tq = req->tq;
+ struct rxrpc_txbuf *txb;
struct msghdr msg;
+ rxrpc_seq_t seq = req->seq;
size_t len;
- int ret;
+ bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags);
+ int ret, stat_ix;
- _enter("%x,{%d}", txb->seq, txb->len);
+ _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1);
- len = rxrpc_prepare_data_packet(call, txb);
+ stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1;
+ atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]);
- if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
- static int lose;
- if ((lose++ & 7) == 7) {
- ret = 0;
- trace_rxrpc_tx_data(call, txb->seq, txb->serial,
- txb->flags, true);
- goto done;
- }
- }
+ len = rxrpc_prepare_data_packet(call, req);
+ txb = tq->bufs[seq & RXRPC_TXQ_MASK];
- iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len);
msg.msg_name = &call->peer->srx.transport;
msg.msg_namelen = call->peer->srx.transport_len;
@@ -460,16 +652,11 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
msg.msg_controllen = 0;
msg.msg_flags = MSG_SPLICE_PAGES;
- /* Track what we've attempted to transmit at least once so that the
- * retransmission algorithm doesn't try to resend what we haven't sent
- * yet.
+ /* Send the packet with the don't fragment bit set unless we think it's
+ * too big or if this is a retransmission.
*/
- if (txb->seq == call->tx_transmitted + 1)
- call->tx_transmitted = txb->seq;
-
- /* send the packet with the don't fragment bit set if we currently
- * think it's small enough */
- if (txb->len >= call->peer->maxdata) {
+ if (seq == call->tx_transmitted + 1 &&
+ len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
rxrpc_local_dont_fragment(conn->local, false);
frag = rxrpc_tx_point_call_data_frag;
} else {
@@ -477,7 +664,25 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
frag = rxrpc_tx_point_call_data_nofrag;
}
-retry:
+ /* Track what we've attempted to transmit at least once so that the
+ * retransmission algorithm doesn't try to resend what we haven't sent
+ * yet.
+ */
+ if (seq == call->tx_transmitted + 1)
+ call->tx_transmitted = seq + req->n - 1;
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+
+ if ((lose++ & 7) == 7) {
+ ret = 0;
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags,
+ rxrpc_txdata_inject_loss);
+ conn->peer->last_tx_at = ktime_get_seconds();
+ goto done;
+ }
+ }
+
/* send the packet by UDP
* - returns -EMSGSIZE if UDP would have to fragment the packet
* to go out of the interface
@@ -488,36 +693,35 @@ retry:
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- if (ret < 0) {
+ if (ret == -EMSGSIZE) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize);
+ trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag);
+ ret = 0;
+ } else if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
} else {
- trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
+ trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag);
}
rxrpc_tx_backoff(call, ret);
- if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_frag) {
- rxrpc_local_dont_fragment(conn->local, false);
- frag = rxrpc_tx_point_call_data_frag;
- goto retry;
- }
-done:
- if (ret >= 0) {
- rxrpc_tstamp_data_packets(call, txb);
- } else {
- /* Cancel the call if the initial transmission fails,
- * particularly if that's due to network routing issues that
- * aren't going away anytime soon. The layer above can arrange
- * the retransmission.
+ if (ret < 0) {
+ /* Cancel the call if the initial transmission fails or if we
+ * hit due to network routing issues that aren't going away
+ * anytime soon. The layer above can arrange the
+ * retransmission.
*/
- if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+ if (new_call ||
+ ret == -ENETUNREACH ||
+ ret == -EHOSTUNREACH ||
+ ret == -ECONNREFUSED)
rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_USER_ABORT, ret);
}
- _leave(" = %d [%u]", ret, call->peer->maxdata);
- return ret;
+done:
+ _leave(" = %d [%u]", ret, call->peer->max_data);
}
/*
@@ -692,41 +896,3 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
peer->last_tx_at = ktime_get_seconds();
_leave("");
}
-
-/*
- * Schedule an instant Tx resend.
- */
-static inline void rxrpc_instant_resend(struct rxrpc_call *call,
- struct rxrpc_txbuf *txb)
-{
- if (!__rxrpc_call_is_complete(call))
- kdebug("resend");
-}
-
-/*
- * Transmit one packet.
- */
-void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
-{
- int ret;
-
- ret = rxrpc_send_data_packet(call, txb);
- if (ret < 0) {
- switch (ret) {
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- 0, ret);
- break;
- default:
- _debug("need instant resend %d", ret);
- rxrpc_instant_resend(call, txb);
- }
- } else {
- ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
-
- call->resend_at = ktime_add(ktime_get_real(), delay);
- trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx);
- }
-}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 552ba84a255c..d82e44a3901b 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -102,6 +102,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
*/
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
+ unsigned int max_data;
+
/* wind down the local interface MTU */
if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
peer->if_mtu = mtu;
@@ -120,11 +122,17 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
}
}
- if (mtu < peer->mtu) {
- spin_lock(&peer->lock);
- peer->mtu = mtu;
- peer->maxdata = peer->mtu - peer->hdrsize;
- spin_unlock(&peer->lock);
+ max_data = max_t(int, mtu - peer->hdrsize, 500);
+ if (max_data < peer->max_data) {
+ if (peer->pmtud_good > max_data)
+ peer->pmtud_good = max_data;
+ if (peer->pmtud_bad > max_data + 1)
+ peer->pmtud_bad = max_data + 1;
+
+ trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp);
+ write_seqcount_begin(&peer->mtu_lock);
+ peer->max_data = max_data;
+ write_seqcount_end(&peer->mtu_lock);
}
}
@@ -205,23 +213,23 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb,
struct rxrpc_call *call;
HLIST_HEAD(error_targets);
- spin_lock(&peer->lock);
+ spin_lock_irq(&peer->lock);
hlist_move_list(&peer->error_targets, &error_targets);
while (!hlist_empty(&error_targets)) {
call = hlist_entry(error_targets.first,
struct rxrpc_call, error_link);
hlist_del_init(&call->error_link);
- spin_unlock(&peer->lock);
+ spin_unlock_irq(&peer->lock);
rxrpc_see_call(call, rxrpc_call_see_distribute_error);
rxrpc_set_call_completion(call, compl, 0, -err);
- rxrpc_input_call_event(call, skb);
+ rxrpc_input_call_event(call);
- spin_lock(&peer->lock);
+ spin_lock_irq(&peer->lock);
}
- spin_unlock(&peer->lock);
+ spin_unlock_irq(&peer->lock);
}
/*
@@ -347,3 +355,89 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
_leave("");
}
+
+/*
+ * Do path MTU probing.
+ */
+void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial,
+ bool sendmsg_fail)
+{
+ struct rxrpc_peer *peer = conn->peer;
+ unsigned int max_data = peer->max_data;
+ int good, trial, bad, jumbo;
+
+ good = peer->pmtud_good;
+ trial = peer->pmtud_trial;
+ bad = peer->pmtud_bad;
+ if (good >= bad - 1) {
+ conn->pmtud_probe = 0;
+ peer->pmtud_lost = false;
+ return;
+ }
+
+ if (!peer->pmtud_probing)
+ goto send_probe;
+
+ if (sendmsg_fail || after(acked_serial, conn->pmtud_probe)) {
+ /* Retry a lost probe. */
+ if (!peer->pmtud_lost) {
+ trace_rxrpc_pmtud_lost(conn, acked_serial);
+ conn->pmtud_probe = 0;
+ peer->pmtud_lost = true;
+ goto send_probe;
+ }
+
+ /* The probed size didn't seem to get through. */
+ bad = trial;
+ peer->pmtud_bad = bad;
+ if (bad <= max_data)
+ max_data = bad - 1;
+ } else {
+ /* It did get through. */
+ good = trial;
+ peer->pmtud_good = good;
+ if (good > max_data)
+ max_data = good;
+ }
+
+ max_data = umin(max_data, peer->ackr_max_data);
+ if (max_data != peer->max_data) {
+ preempt_disable();
+ write_seqcount_begin(&peer->mtu_lock);
+ peer->max_data = max_data;
+ write_seqcount_end(&peer->mtu_lock);
+ preempt_enable();
+ }
+
+ jumbo = max_data + sizeof(struct rxrpc_jumbo_header);
+ jumbo /= RXRPC_JUMBO_SUBPKTLEN;
+ peer->pmtud_jumbo = jumbo;
+
+ trace_rxrpc_pmtud_rx(conn, acked_serial);
+ conn->pmtud_probe = 0;
+ peer->pmtud_lost = false;
+
+ if (good < RXRPC_JUMBO(2) && bad > RXRPC_JUMBO(2))
+ trial = RXRPC_JUMBO(2);
+ else if (good < RXRPC_JUMBO(4) && bad > RXRPC_JUMBO(4))
+ trial = RXRPC_JUMBO(4);
+ else if (good < RXRPC_JUMBO(3) && bad > RXRPC_JUMBO(3))
+ trial = RXRPC_JUMBO(3);
+ else if (good < RXRPC_JUMBO(6) && bad > RXRPC_JUMBO(6))
+ trial = RXRPC_JUMBO(6);
+ else if (good < RXRPC_JUMBO(5) && bad > RXRPC_JUMBO(5))
+ trial = RXRPC_JUMBO(5);
+ else if (good < RXRPC_JUMBO(8) && bad > RXRPC_JUMBO(8))
+ trial = RXRPC_JUMBO(8);
+ else if (good < RXRPC_JUMBO(7) && bad > RXRPC_JUMBO(7))
+ trial = RXRPC_JUMBO(7);
+ else
+ trial = (good + bad) / 2;
+ peer->pmtud_trial = trial;
+
+ if (good >= bad)
+ return;
+
+send_probe:
+ peer->pmtud_pending = true;
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 49dcda67a0d5..e1c63129586b 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -162,6 +162,11 @@ static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
#endif
peer->if_mtu = 1500;
+ if (peer->max_data < peer->if_mtu - peer->hdrsize) {
+ trace_rxrpc_pmtud_reduce(peer, 0, peer->if_mtu - peer->hdrsize,
+ rxrpc_pmtud_reduce_route);
+ peer->max_data = peer->if_mtu - peer->hdrsize;
+ }
memset(&fl, 0, sizeof(fl));
switch (peer->srx.transport.family) {
@@ -199,8 +204,16 @@ static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
}
peer->if_mtu = dst_mtu(dst);
+ peer->hdrsize += dst->header_len + dst->trailer_len;
+ peer->tx_seg_max = dst->dev->gso_max_segs;
dst_release(dst);
+ peer->max_data = umin(RXRPC_JUMBO(1), peer->if_mtu - peer->hdrsize);
+ peer->pmtud_good = 500;
+ peer->pmtud_bad = peer->if_mtu - peer->hdrsize + 1;
+ peer->pmtud_trial = umin(peer->max_data, peer->pmtud_bad - 1);
+ peer->pmtud_pending = true;
+
_leave(" [if_mtu %u]", peer->if_mtu);
}
@@ -222,11 +235,9 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
peer->service_conns = RB_ROOT;
seqlock_init(&peer->service_conn_lock);
spin_lock_init(&peer->lock);
- spin_lock_init(&peer->rtt_input_lock);
+ seqcount_init(&peer->mtu_lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
-
- rxrpc_peer_init_rtt(peer);
-
+ peer->recent_srtt_us = UINT_MAX;
peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
trace_rxrpc_peer(peer->debug_id, 1, why);
}
@@ -242,9 +253,7 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
unsigned long hash_key)
{
peer->hash_key = hash_key;
- rxrpc_assess_MTU_size(local, peer);
- peer->mtu = peer->if_mtu;
- peer->rtt_last_req = ktime_get_real();
+
switch (peer->srx.transport.family) {
case AF_INET:
@@ -268,7 +277,9 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
}
peer->hdrsize += sizeof(struct rxrpc_wire_header);
- peer->maxdata = peer->mtu - peer->hdrsize;
+ peer->max_data = peer->if_mtu - peer->hdrsize;
+
+ rxrpc_assess_MTU_size(local, peer);
}
/*
@@ -304,6 +315,7 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer)
* Set up a new incoming peer. There shouldn't be any other matching peers
* since we've already done a search in the list from the non-reentrant context
* (the data_ready handler) that is the only place we can add new peers.
+ * Called with interrupts disabled.
*/
void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
{
@@ -479,7 +491,7 @@ EXPORT_SYMBOL(rxrpc_kernel_get_call_peer);
*/
unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer)
{
- return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX;
+ return READ_ONCE(peer->recent_srtt_us);
}
EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 263a2251e3d2..d803562ca0ac 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -52,7 +52,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
enum rxrpc_call_state state;
- rxrpc_seq_t acks_hard_ack;
+ rxrpc_seq_t tx_bottom;
char lbuff[50], rbuff[50];
long timeout = 0;
@@ -79,7 +79,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
if (state != RXRPC_CALL_SERVER_PREALLOC)
timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real());
- acks_hard_ack = READ_ONCE(call->acks_hard_ack);
+ tx_bottom = READ_ONCE(call->tx_bottom);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
@@ -93,7 +93,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
rxrpc_call_states[state],
call->abort_code,
call->debug_id,
- acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
+ tx_bottom, READ_ONCE(call->tx_top) - tx_bottom,
call->ackr_window, call->ackr_wtop - call->ackr_window,
call->rx_serial,
call->cong_cwnd,
@@ -283,9 +283,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
- "Proto Local "
- " Remote "
- " Use SST MTU LastUse RTT RTO\n"
+ "Proto Local Remote Use SST Maxd LastUse RTT RTO\n"
);
return 0;
}
@@ -298,16 +296,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
now = ktime_get_seconds();
seq_printf(seq,
- "UDP %-47.47s %-47.47s %3u"
- " %3u %5u %6llus %8u %8u\n",
+ "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8d %8d\n",
lbuff,
rbuff,
refcount_read(&peer->ref),
peer->cong_ssthresh,
- peer->mtu,
+ peer->max_data,
now - peer->last_tx_at,
- peer->srtt_us >> 3,
- peer->rto_us);
+ READ_ONCE(peer->recent_srtt_us),
+ READ_ONCE(peer->recent_rto_us));
return 0;
}
@@ -476,10 +473,11 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
seq_printf(seq,
- "Data : send=%u sendf=%u fail=%u\n",
+ "Data : send=%u sendf=%u fail=%u emsz=%u\n",
atomic_read(&rxnet->stat_tx_data_send),
atomic_read(&rxnet->stat_tx_data_send_frag),
- atomic_read(&rxnet->stat_tx_data_send_fail));
+ atomic_read(&rxnet->stat_tx_data_send_fail),
+ atomic_read(&rxnet->stat_tx_data_send_msgsize));
seq_printf(seq,
"Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n",
atomic_read(&rxnet->stat_tx_data),
@@ -508,7 +506,7 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DELAY]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_IDLE]));
seq_printf(seq,
- "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
+ "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u z=%u\n",
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_REQUESTED]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DUPLICATE]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
@@ -517,13 +515,14 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING_RESPONSE]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]),
- atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]));
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]),
+ atomic_read(&rxnet->stat_rx_acks[0]));
seq_printf(seq,
- "Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n",
+ "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u\n",
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]),
- atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]),
- atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]));
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall]));
seq_printf(seq,
"Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n",
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]),
@@ -531,6 +530,30 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_slow_start]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_small_txwin]));
seq_printf(seq,
+ "Jumbo-Tx : %u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n",
+ atomic_read(&rxnet->stat_tx_jumbo[0]),
+ atomic_read(&rxnet->stat_tx_jumbo[1]),
+ atomic_read(&rxnet->stat_tx_jumbo[2]),
+ atomic_read(&rxnet->stat_tx_jumbo[3]),
+ atomic_read(&rxnet->stat_tx_jumbo[4]),
+ atomic_read(&rxnet->stat_tx_jumbo[5]),
+ atomic_read(&rxnet->stat_tx_jumbo[6]),
+ atomic_read(&rxnet->stat_tx_jumbo[7]),
+ atomic_read(&rxnet->stat_tx_jumbo[8]),
+ atomic_read(&rxnet->stat_tx_jumbo[9]));
+ seq_printf(seq,
+ "Jumbo-Rx : %u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n",
+ atomic_read(&rxnet->stat_rx_jumbo[0]),
+ atomic_read(&rxnet->stat_rx_jumbo[1]),
+ atomic_read(&rxnet->stat_rx_jumbo[2]),
+ atomic_read(&rxnet->stat_rx_jumbo[3]),
+ atomic_read(&rxnet->stat_rx_jumbo[4]),
+ atomic_read(&rxnet->stat_rx_jumbo[5]),
+ atomic_read(&rxnet->stat_rx_jumbo[6]),
+ atomic_read(&rxnet->stat_rx_jumbo[7]),
+ atomic_read(&rxnet->stat_rx_jumbo[8]),
+ atomic_read(&rxnet->stat_rx_jumbo[9]));
+ seq_printf(seq,
"Buffers : txb=%u rxb=%u\n",
atomic_read(&rxrpc_nr_txbuf),
atomic_read(&rxrpc_n_rx_skbs));
@@ -567,6 +590,8 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
atomic_set(&rxnet->stat_tx_ack_skip, 0);
memset(&rxnet->stat_tx_acks, 0, sizeof(rxnet->stat_tx_acks));
memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks));
+ memset(&rxnet->stat_tx_jumbo, 0, sizeof(rxnet->stat_tx_jumbo));
+ memset(&rxnet->stat_rx_jumbo, 0, sizeof(rxnet->stat_rx_jumbo));
memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack));
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index 4fe6b4d20ada..42f70e4636f8 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -92,11 +92,16 @@ struct rxrpc_jumbo_header {
/*
* The maximum number of subpackets that can possibly fit in a UDP packet is:
*
- * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1
- * = ((65535 - 28 - 28) / 1416) + 1
- * = 46 non-terminal packets and 1 terminal packet.
+ * (max_UDP - wirehdr + jumbohdr) / (jumbohdr + 1412)
+ * = ((65535 - 28 + 4) / 1416)
+ * = 45 non-terminal packets and 1 terminal packet.
*/
-#define RXRPC_MAX_NR_JUMBO 47
+#define RXRPC_MAX_NR_JUMBO 46
+
+/* Size of a jumbo packet with N subpackets, excluding UDP+IP */
+#define RXRPC_JUMBO(N) ((int)sizeof(struct rxrpc_wire_header) + \
+ RXRPC_JUMBO_DATALEN + \
+ ((N) - 1) * RXRPC_JUMBO_SUBPKTLEN)
/*****************************************************************************/
/*
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a482f88c5fc5..32cd5f1d541d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -36,16 +36,16 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
sk = &rx->sk;
if (rx && sk->sk_state < RXRPC_CLOSE) {
if (call->notify_rx) {
- spin_lock(&call->notify_lock);
+ spin_lock_irq(&call->notify_lock);
call->notify_rx(sk, call, call->user_call_ID);
- spin_unlock(&call->notify_lock);
+ spin_unlock_irq(&call->notify_lock);
} else {
- spin_lock(&rx->recvmsg_lock);
+ spin_lock_irq(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- spin_unlock(&rx->recvmsg_lock);
+ spin_unlock_irq(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -337,14 +337,14 @@ try_again:
* We also want to weed out calls that got requeued whilst we were
* shovelling data out.
*/
- spin_lock(&rx->recvmsg_lock);
+ spin_lock_irq(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!rxrpc_call_is_complete(call) &&
skb_queue_empty(&call->recvmsg_queue)) {
list_del_init(&call->recvmsg_link);
- spin_unlock(&rx->recvmsg_lock);
+ spin_unlock_irq(&rx->recvmsg_lock);
release_sock(&rx->sk);
trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
@@ -355,7 +355,7 @@ try_again:
list_del_init(&call->recvmsg_link);
else
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
- spin_unlock(&rx->recvmsg_lock);
+ spin_unlock_irq(&rx->recvmsg_lock);
call_debug_id = call->debug_id;
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
@@ -445,9 +445,9 @@ error_unlock_call:
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- spin_lock(&rx->recvmsg_lock);
+ spin_lock_irq(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- spin_unlock(&rx->recvmsg_lock);
+ spin_unlock_irq(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
index cdab7b7d08a0..7474f88d7b18 100644
--- a/net/rxrpc/rtt.c
+++ b/net/rxrpc/rtt.c
@@ -12,22 +12,22 @@
#include "ar-internal.h"
#define RXRPC_RTO_MAX (120 * USEC_PER_SEC)
-#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */
+#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */
#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
-static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
+static u32 rxrpc_rto_min_us(struct rxrpc_call *call)
{
return 200;
}
-static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
+static u32 __rxrpc_set_rto(const struct rxrpc_call *call)
{
- return (peer->srtt_us >> 3) + peer->rttvar_us;
+ return (call->srtt_us >> 3) + call->rttvar_us;
}
static u32 rxrpc_bound_rto(u32 rto)
{
- return min(rto, RXRPC_RTO_MAX);
+ return clamp(200000, rto + 100000, RXRPC_RTO_MAX);
}
/*
@@ -40,10 +40,10 @@ static u32 rxrpc_bound_rto(u32 rto)
* To save cycles in the RFC 1323 implementation it was better to break
* it up into three procedures. -- erics
*/
-static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
+static void rxrpc_rtt_estimator(struct rxrpc_call *call, long sample_rtt_us)
{
long m = sample_rtt_us; /* RTT */
- u32 srtt = peer->srtt_us;
+ u32 srtt = call->srtt_us;
/* The following amusing code comes from Jacobson's
* article in SIGCOMM '88. Note that rtt and mdev
@@ -66,7 +66,7 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
srtt += m; /* rtt = 7/8 rtt + 1/8 new */
if (m < 0) {
m = -m; /* m is now abs(error) */
- m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ m -= (call->mdev_us >> 2); /* similar update on mdev */
/* This is similar to one of Eifel findings.
* Eifel blocks mdev updates when rtt decreases.
* This solution is a bit different: we use finer gain
@@ -78,31 +78,31 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
if (m > 0)
m >>= 3;
} else {
- m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ m -= (call->mdev_us >> 2); /* similar update on mdev */
}
- peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
- if (peer->mdev_us > peer->mdev_max_us) {
- peer->mdev_max_us = peer->mdev_us;
- if (peer->mdev_max_us > peer->rttvar_us)
- peer->rttvar_us = peer->mdev_max_us;
+ call->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (call->mdev_us > call->mdev_max_us) {
+ call->mdev_max_us = call->mdev_us;
+ if (call->mdev_max_us > call->rttvar_us)
+ call->rttvar_us = call->mdev_max_us;
}
} else {
/* no previous measure. */
srtt = m << 3; /* take the measured time to be rtt */
- peer->mdev_us = m << 1; /* make sure rto = 3*rtt */
- peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
- peer->mdev_max_us = peer->rttvar_us;
+ call->mdev_us = m << 1; /* make sure rto = 3*rtt */
+ call->rttvar_us = umax(call->mdev_us, rxrpc_rto_min_us(call));
+ call->mdev_max_us = call->rttvar_us;
}
- peer->srtt_us = max(1U, srtt);
+ call->srtt_us = umax(srtt, 1);
}
/*
* Calculate rto without backoff. This is the second half of Van Jacobson's
* routine referred to above.
*/
-static void rxrpc_set_rto(struct rxrpc_peer *peer)
+static void rxrpc_set_rto(struct rxrpc_call *call)
{
u32 rto;
@@ -113,7 +113,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer)
* is invisible. Actually, Linux-2.4 also generates erratic
* ACKs in some circumstances.
*/
- rto = __rxrpc_set_rto(peer);
+ rto = __rxrpc_set_rto(call);
/* 2. Fixups made earlier cannot be right.
* If we do not estimate RTO correctly without them,
@@ -124,61 +124,73 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer)
/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
* guarantees that rto is higher.
*/
- peer->rto_us = rxrpc_bound_rto(rto);
+ call->rto_us = rxrpc_bound_rto(rto);
}
-static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
+static void rxrpc_update_rtt_min(struct rxrpc_call *call, ktime_t resp_time, long rtt_us)
+{
+ /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */
+ u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024;
+
+ minmax_running_min(&call->min_rtt, wlen_us, resp_time / 1024,
+ (u32)rtt_us ? : jiffies_to_usecs(1));
+}
+
+static void rxrpc_ack_update_rtt(struct rxrpc_call *call, ktime_t resp_time, long rtt_us)
{
if (rtt_us < 0)
return;
- //rxrpc_update_rtt_min(peer, rtt_us);
- rxrpc_rtt_estimator(peer, rtt_us);
- rxrpc_set_rto(peer);
+ /* Update RACK min RTT [RFC8985 6.1 Step 1]. */
+ rxrpc_update_rtt_min(call, resp_time, rtt_us);
+
+ rxrpc_rtt_estimator(call, rtt_us);
+ rxrpc_set_rto(call);
- /* RFC6298: only reset backoff on valid RTT measurement. */
- peer->backoff = 0;
+ /* Only reset backoff on valid RTT measurement [RFC6298]. */
+ call->backoff = 0;
}
/*
* Add RTT information to cache. This is called in softirq mode and has
- * exclusive access to the peer RTT data.
+ * exclusive access to the call RTT data.
*/
-void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
int rtt_slot,
rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
ktime_t send_time, ktime_t resp_time)
{
- struct rxrpc_peer *peer = call->peer;
s64 rtt_us;
rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
if (rtt_us < 0)
return;
- spin_lock(&peer->rtt_input_lock);
- rxrpc_ack_update_rtt(peer, rtt_us);
- if (peer->rtt_count < 3)
- peer->rtt_count++;
- spin_unlock(&peer->rtt_input_lock);
+ rxrpc_ack_update_rtt(call, resp_time, rtt_us);
+ if (call->rtt_count < 3)
+ call->rtt_count++;
+ call->rtt_taken++;
+
+ WRITE_ONCE(call->peer->recent_srtt_us, call->srtt_us / 8);
+ WRITE_ONCE(call->peer->recent_rto_us, call->rto_us);
trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
- peer->srtt_us >> 3, peer->rto_us);
+ rtt_us, call->srtt_us, call->rto_us);
}
/*
* Get the retransmission timeout to set in nanoseconds, backing it off each
* time we retransmit.
*/
-ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans)
{
u64 timo_us;
- u32 backoff = READ_ONCE(peer->backoff);
+ u32 backoff = READ_ONCE(call->backoff);
- timo_us = peer->rto_us;
+ timo_us = call->rto_us;
timo_us <<= backoff;
if (retrans && timo_us * 2 <= RXRPC_RTO_MAX)
- WRITE_ONCE(peer->backoff, backoff + 1);
+ WRITE_ONCE(call->backoff, backoff + 1);
if (timo_us < 1)
timo_us = 1;
@@ -186,10 +198,11 @@ ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
return ns_to_ktime(timo_us * NSEC_PER_USEC);
}
-void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
+void rxrpc_call_init_rtt(struct rxrpc_call *call)
{
- peer->rto_us = RXRPC_TIMEOUT_INIT;
- peer->mdev_us = RXRPC_TIMEOUT_INIT;
- peer->backoff = 0;
- //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
+ call->rtt_last_req = KTIME_MIN;
+ call->rto_us = RXRPC_TIMEOUT_INIT;
+ call->mdev_us = RXRPC_TIMEOUT_INIT;
+ call->backoff = 0;
+ //minmax_reset(&call->rtt_min, rxrpc_jiffies32, ~0U);
}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 48a1475e6b06..62b09d23ec08 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -148,14 +148,14 @@ error:
static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
struct rxrpc_txbuf *txb;
- size_t shdr, space;
+ size_t shdr, alloc, limit, part;
- remain = min(remain, 65535 - sizeof(struct rxrpc_wire_header));
+ remain = umin(remain, 65535 - sizeof(struct rxrpc_wire_header));
switch (call->conn->security_level) {
default:
- space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- return rxrpc_alloc_data_txbuf(call, space, 1, gfp);
+ alloc = umin(remain, RXRPC_JUMBO_DATALEN);
+ return rxrpc_alloc_data_txbuf(call, alloc, 1, gfp);
case RXRPC_SECURITY_AUTH:
shdr = sizeof(struct rxkad_level1_hdr);
break;
@@ -164,15 +164,21 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem
break;
}
- space = min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr);
- space = round_up(space, RXKAD_ALIGN);
+ limit = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN) - shdr;
+ if (remain < limit) {
+ part = remain;
+ alloc = round_up(shdr + part, RXKAD_ALIGN);
+ } else {
+ part = limit;
+ alloc = RXRPC_JUMBO_DATALEN;
+ }
- txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp);
+ txb = rxrpc_alloc_data_txbuf(call, alloc, RXKAD_ALIGN, gfp);
if (!txb)
return NULL;
txb->offset += shdr;
- txb->space -= shdr;
+ txb->space = part;
return txb;
}
@@ -263,13 +269,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
check = txb->seq ^ call->call_id;
hdr->data_size = htonl((u32)check << 16 | txb->len);
- txb->len += sizeof(struct rxkad_level1_hdr);
- pad = txb->len;
+ txb->pkt_len = sizeof(struct rxkad_level1_hdr) + txb->len;
+ pad = txb->pkt_len;
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
- txb->len += pad;
+ txb->pkt_len += pad;
}
/* start the encryption afresh */
@@ -298,7 +304,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
- size_t pad;
+ size_t content, pad;
u16 check;
int ret;
@@ -309,23 +315,20 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
rxkhdr->data_size = htonl(txb->len | (u32)check << 16);
rxkhdr->checksum = 0;
- txb->len += sizeof(struct rxkad_level2_hdr);
- pad = txb->len;
- pad = RXKAD_ALIGN - pad;
- pad &= RXKAD_ALIGN - 1;
- if (pad) {
+ content = sizeof(struct rxkad_level2_hdr) + txb->len;
+ txb->pkt_len = round_up(content, RXKAD_ALIGN);
+ pad = txb->pkt_len - content;
+ if (pad)
memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
- txb->len += pad;
- }
/* encrypt from the session key */
token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- sg_init_one(&sg, rxkhdr, txb->len);
+ sg_init_one(&sg, rxkhdr, txb->pkt_len);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x);
+ skcipher_request_set_crypt(req, &sg, &sg, txb->pkt_len, iv.x);
ret = crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
return ret;
@@ -384,19 +387,33 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
+ txb->pkt_len = txb->len;
ret = 0;
break;
case RXRPC_SECURITY_AUTH:
ret = rxkad_secure_packet_auth(call, txb, req);
+ if (txb->alloc_size == RXRPC_JUMBO_DATALEN)
+ txb->jumboable = true;
break;
case RXRPC_SECURITY_ENCRYPT:
ret = rxkad_secure_packet_encrypt(call, txb, req);
+ if (txb->alloc_size == RXRPC_JUMBO_DATALEN)
+ txb->jumboable = true;
break;
default:
ret = -EPERM;
break;
}
+ /* Clear excess space in the packet */
+ if (txb->pkt_len < txb->alloc_size) {
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ size_t gap = txb->alloc_size - txb->pkt_len;
+ void *p = whdr + 1;
+
+ memset(p + txb->pkt_len, 0, gap);
+ }
+
skcipher_request_free(req);
_leave(" = %d [set %x]", ret, y);
return ret;
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index 085e7892d310..7ef93407be83 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -503,7 +503,7 @@ static int rxperf_process_call(struct rxperf_call *call)
reply_len + sizeof(rxperf_magic_cookie));
while (reply_len > 0) {
- len = min_t(size_t, reply_len, PAGE_SIZE);
+ len = umin(reply_len, PAGE_SIZE);
bvec_set_page(&bv, ZERO_PAGE(0), len, 0);
iov_iter_bvec(&msg.msg_iter, WRITE, &bv, 1, len);
msg.msg_flags = MSG_MORE;
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index cb8dd1d3b1d4..9784adc8f275 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -114,10 +114,10 @@ found:
if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
ret = conn->security->init_connection_security(conn, token);
if (ret == 0) {
- spin_lock(&conn->state_lock);
+ spin_lock_irq(&conn->state_lock);
if (conn->state == RXRPC_CONN_CLIENT_UNSECURED)
conn->state = RXRPC_CONN_CLIENT;
- spin_unlock(&conn->state_lock);
+ spin_unlock_irq(&conn->state_lock);
}
}
mutex_unlock(&conn->security_lock);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 6abb8eec1b2b..c4c8b718cafa 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -94,9 +94,11 @@ no_wait:
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
{
+ rxrpc_seq_t tx_bottom = READ_ONCE(call->tx_bottom);
+
if (_tx_win)
- *_tx_win = call->tx_bottom;
- return call->tx_prepared - call->tx_bottom < 256;
+ *_tx_win = tx_bottom;
+ return call->send_top - tx_bottom < 256;
}
/*
@@ -132,13 +134,13 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
rxrpc_seq_t tx_start, tx_win;
signed long rtt, timeout;
- rtt = READ_ONCE(call->peer->srtt_us) >> 3;
+ rtt = READ_ONCE(call->srtt_us) >> 3;
rtt = usecs_to_jiffies(rtt) * 2;
if (rtt < 2)
rtt = 2;
timeout = rtt;
- tx_start = smp_load_acquire(&call->acks_hard_ack);
+ tx_start = READ_ONCE(call->tx_bottom);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -195,8 +197,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
DECLARE_WAITQUEUE(myself, current);
int ret;
- _enter(",{%u,%u,%u,%u}",
- call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize);
+ _enter(",{%u,%u,%u}",
+ call->tx_bottom, call->tx_top, call->tx_winsize);
add_wait_queue(&call->waitq, &myself);
@@ -240,37 +242,76 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ struct rxrpc_txqueue *sq = call->send_queue;
rxrpc_seq_t seq = txb->seq;
bool poke, last = txb->flags & RXRPC_LAST_PACKET;
-
+ int ix = seq & RXRPC_TXQ_MASK;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
- ASSERTCMP(txb->seq, ==, call->tx_prepared + 1);
-
- /* We have to set the timestamp before queueing as the retransmit
- * algorithm can see the packet as soon as we queue it.
- */
- txb->last_sent = ktime_get_real();
+ ASSERTCMP(txb->seq, ==, call->send_top + 1);
if (last)
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last);
else
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue);
+ if (WARN_ON_ONCE(sq->bufs[ix]))
+ trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue_dup);
+ else
+ trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue);
+
/* Add the packet to the call's output buffer */
- spin_lock(&call->tx_lock);
- poke = list_empty(&call->tx_sendmsg);
- list_add_tail(&txb->call_link, &call->tx_sendmsg);
- call->tx_prepared = seq;
- if (last)
+ poke = (READ_ONCE(call->tx_bottom) == call->send_top);
+ sq->bufs[ix] = txb;
+ /* Order send_top after the queue->next pointer and txb content. */
+ smp_store_release(&call->send_top, seq);
+ if (last) {
rxrpc_notify_end_tx(rx, call, notify_end_tx);
- spin_unlock(&call->tx_lock);
+ call->send_queue = NULL;
+ }
if (poke)
rxrpc_poke_call(call, rxrpc_call_poke_start);
}
/*
+ * Allocate a new txqueue unit and add it to the transmission queue.
+ */
+static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call)
+{
+ struct rxrpc_txqueue *tq;
+
+ tq = kzalloc(sizeof(*tq), sk->sk_allocation);
+ if (!tq)
+ return -ENOMEM;
+
+ tq->xmit_ts_base = KTIME_MIN;
+ for (int i = 0; i < RXRPC_NR_TXQUEUE; i++)
+ tq->segment_xmit_ts[i] = UINT_MAX;
+
+ if (call->send_queue) {
+ tq->qbase = call->send_top + 1;
+ call->send_queue->next = tq;
+ call->send_queue = tq;
+ } else if (WARN_ON(call->tx_queue)) {
+ kfree(tq);
+ return -ENOMEM;
+ } else {
+ /* We start at seq 1, so pretend seq 0 is hard-acked. */
+ tq->nr_reported_acks = 1;
+ tq->segment_acked = 1UL;
+ tq->qbase = 0;
+ call->tx_qbase = 0;
+ call->send_queue = tq;
+ call->tx_qtail = tq;
+ call->tx_queue = tq;
+ }
+
+ trace_rxrpc_tq(call, tq, call->send_top, rxrpc_tq_alloc);
+ return 0;
+}
+
+/*
* send data through a socket
* - must be called in process context
* - The caller holds the call user access mutex, but not the socket lock.
@@ -344,6 +385,13 @@ reload:
if (!rxrpc_check_tx_space(call, NULL))
goto wait_for_space;
+ /* See if we need to begin/extend the Tx queue. */
+ if (!call->send_queue || !((call->send_top + 1) & RXRPC_TXQ_MASK)) {
+ ret = rxrpc_alloc_txqueue(sk, call);
+ if (ret < 0)
+ goto maybe_error;
+ }
+
/* Work out the maximum size of a packet. Assume that
* the security header is going to be in the padded
* region (enc blocksize), but the trailer is not.
@@ -360,7 +408,7 @@ reload:
/* append next segment of data to the current buffer */
if (msg_data_left(msg) > 0) {
- size_t copy = min_t(size_t, txb->space, msg_data_left(msg));
+ size_t copy = umin(txb->space, msg_data_left(msg));
_debug("add %zu", copy);
if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset,
@@ -385,16 +433,12 @@ reload:
(msg_data_left(msg) == 0 && !more)) {
if (msg_data_left(msg) == 0 && !more)
txb->flags |= RXRPC_LAST_PACKET;
- else if (call->tx_top - call->acks_hard_ack <
- call->tx_winsize)
- txb->flags |= RXRPC_MORE_PACKETS;
ret = call->security->secure_packet(call, txb);
if (ret < 0)
goto out;
txb->kvec[0].iov_len += txb->len;
- txb->len = txb->kvec[0].iov_len;
rxrpc_queue_packet(rx, call, txb, notify_end_tx);
txb = NULL;
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 9bf9a1f6e4cb..46a20cf4c402 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -11,6 +11,8 @@
#include "ar-internal.h"
static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned int rxrpc_rx_mtu_min = 500;
+static const unsigned int rxrpc_jumbo_max = RXRPC_MAX_NR_JUMBO;
static const unsigned int four = 4;
static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
@@ -115,7 +117,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)SYSCTL_ONE,
+ .extra1 = (void *)&rxrpc_rx_mtu_min,
.extra2 = (void *)&n_65535,
},
{
@@ -125,7 +127,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)SYSCTL_ONE,
- .extra2 = (void *)&four,
+ .extra2 = (void *)&rxrpc_jumbo_max,
},
};
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index c3913d8a50d3..131d9e55c8e9 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -24,7 +24,7 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_
size_t total, hoff;
void *buf;
- txb = kmalloc(sizeof(*txb), gfp);
+ txb = kzalloc(sizeof(*txb), gfp);
if (!txb)
return NULL;
@@ -43,20 +43,14 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_
whdr = buf + hoff;
- INIT_LIST_HEAD(&txb->call_link);
- INIT_LIST_HEAD(&txb->tx_link);
refcount_set(&txb->ref, 1);
- txb->last_sent = KTIME_MIN;
txb->call_debug_id = call->debug_id;
txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->alloc_size = data_size;
txb->space = data_size;
- txb->len = 0;
txb->offset = sizeof(*whdr);
txb->flags = call->conn->out_clientflag;
- txb->ack_why = 0;
- txb->seq = call->tx_prepared + 1;
- txb->serial = 0;
- txb->cksum = 0;
+ txb->seq = call->send_top + 1;
txb->nr_kvec = 1;
txb->kvec[0].iov_base = whdr;
txb->kvec[0].iov_len = sizeof(*whdr);
@@ -79,84 +73,6 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_
return txb;
}
-/*
- * Allocate and partially initialise an ACK packet.
- */
-struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size)
-{
- struct rxrpc_wire_header *whdr;
- struct rxrpc_acktrailer *trailer;
- struct rxrpc_ackpacket *ack;
- struct rxrpc_txbuf *txb;
- gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
- void *buf, *buf2 = NULL;
- u8 *filler;
-
- txb = kmalloc(sizeof(*txb), gfp);
- if (!txb)
- return NULL;
-
- buf = page_frag_alloc(&call->local->tx_alloc,
- sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
- if (!buf) {
- kfree(txb);
- return NULL;
- }
-
- if (sack_size) {
- buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
- if (!buf2) {
- page_frag_free(buf);
- kfree(txb);
- return NULL;
- }
- }
-
- whdr = buf;
- ack = buf + sizeof(*whdr);
- filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
- trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
-
- INIT_LIST_HEAD(&txb->call_link);
- INIT_LIST_HEAD(&txb->tx_link);
- refcount_set(&txb->ref, 1);
- txb->call_debug_id = call->debug_id;
- txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
- txb->space = 0;
- txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer);
- txb->offset = 0;
- txb->flags = call->conn->out_clientflag;
- txb->ack_rwind = 0;
- txb->seq = 0;
- txb->serial = 0;
- txb->cksum = 0;
- txb->nr_kvec = 3;
- txb->kvec[0].iov_base = whdr;
- txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack);
- txb->kvec[1].iov_base = buf2;
- txb->kvec[1].iov_len = sack_size;
- txb->kvec[2].iov_base = filler;
- txb->kvec[2].iov_len = 3 + sizeof(*trailer);
-
- whdr->epoch = htonl(call->conn->proto.epoch);
- whdr->cid = htonl(call->cid);
- whdr->callNumber = htonl(call->call_id);
- whdr->seq = 0;
- whdr->type = RXRPC_PACKET_TYPE_ACK;
- whdr->flags = 0;
- whdr->userStatus = 0;
- whdr->securityIndex = call->security_ix;
- whdr->_rsvd = 0;
- whdr->serviceId = htons(call->dest_srx.srx_service);
-
- get_page(virt_to_head_page(trailer));
-
- trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
- rxrpc_txbuf_alloc_ack);
- atomic_inc(&rxrpc_nr_txbuf);
- return txb;
-}
-
void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
{
int r;
@@ -179,7 +95,8 @@ static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb)
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
rxrpc_txbuf_free);
for (i = 0; i < txb->nr_kvec; i++)
- if (txb->kvec[i].iov_base)
+ if (txb->kvec[i].iov_base &&
+ !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base))))
page_frag_free(txb->kvec[i].iov_base);
kfree(txb);
atomic_dec(&rxrpc_nr_txbuf);
@@ -202,37 +119,3 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
rxrpc_free_txbuf(txb);
}
}
-
-/*
- * Shrink the transmit buffer.
- */
-void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
-{
- struct rxrpc_txbuf *txb;
- rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack);
- bool wake = false;
-
- _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
-
- while ((txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link))) {
- hard_ack = smp_load_acquire(&call->acks_hard_ack);
- if (before(hard_ack, txb->seq))
- break;
-
- if (txb->seq != call->tx_bottom + 1)
- rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step);
- ASSERTCMP(txb->seq, ==, call->tx_bottom + 1);
- smp_store_release(&call->tx_bottom, call->tx_bottom + 1);
- list_del_rcu(&txb->call_link);
-
- trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
-
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
- if (after(call->acks_hard_ack, call->tx_bottom + 128))
- wake = true;
- }
-
- if (wake)
- wake_up(&call->waitq);
-}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a5e87f9ea986..2ca5332cfcc5 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -537,6 +537,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
return unlikely((s64)skb->tstamp > (s64)(now + q->horizon));
}
+#define FQDR(reason) SKB_DROP_REASON_FQ_##reason
+
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -548,7 +550,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX);
if (unlikely(q->band_pkt_count[band] >= sch->limit)) {
q->stat_band_drops[band]++;
- return qdisc_drop(skb, sch, to_free);
+ return qdisc_drop_reason(skb, sch, to_free,
+ FQDR(BAND_LIMIT));
}
now = ktime_get_ns();
@@ -558,8 +561,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Check if packet timestamp is too far in the future. */
if (fq_packet_beyond_horizon(skb, q, now)) {
if (q->horizon_drop) {
- q->stat_horizon_drops++;
- return qdisc_drop(skb, sch, to_free);
+ q->stat_horizon_drops++;
+ return qdisc_drop_reason(skb, sch, to_free,
+ FQDR(HORIZON_LIMIT));
}
q->stat_horizon_caps++;
skb->tstamp = now + q->horizon;
@@ -572,7 +576,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (f != &q->internal) {
if (unlikely(f->qlen >= q->flow_plimit)) {
q->stat_flows_plimit++;
- return qdisc_drop(skb, sch, to_free);
+ return qdisc_drop_reason(skb, sch, to_free,
+ FQDR(FLOW_LIMIT));
}
if (fq_flow_is_detached(f)) {
@@ -597,6 +602,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_SUCCESS;
}
+#undef FQDR
static void fq_check_throttled(struct fq_sched_data *q, u64 now)
{
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index a4b8296a2fa1..65d5b59da583 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
if (!p)
return -ENOMEM;
}
+ if (ctl->limit == 1) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+ return -EINVAL;
+ }
sch_tree_lock(sch);
if (ctl->quantum)
q->quantum = ctl->quantum;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9e6c69d18581..19ebff1c2579 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1117,7 +1117,10 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
ini->check_smcrv2 = true;
ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
if (!(ini->smcr_version & SMC_V2) ||
- smc->clcsock->sk->sk_family != AF_INET ||
+#if IS_ENABLED(CONFIG_IPV6)
+ (smc->clcsock->sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
+#endif
!smc_clc_ueid_count() ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V2;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 500952c2e67b..ede4d5f3111b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -795,9 +795,14 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
if (lgr->smc_version == SMC_V2) {
lnk->smcibdev = ini->smcrv2.ib_dev_v2;
lnk->ibport = ini->smcrv2.ib_port_v2;
+ lnk->wr_rx_sge_cnt = lnk->smcibdev->ibdev->attrs.max_recv_sge < 2 ? 1 : 2;
+ lnk->wr_rx_buflen = smc_link_shared_v2_rxbuf(lnk) ?
+ SMC_WR_BUF_SIZE : SMC_WR_BUF_V2_SIZE;
} else {
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
+ lnk->wr_rx_sge_cnt = 1;
+ lnk->wr_rx_buflen = SMC_WR_BUF_SIZE;
}
get_device(&lnk->smcibdev->ibdev->dev);
atomic_inc(&lnk->smcibdev->lnk_cnt);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 69b54ecd6503..48a1b1dcb576 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -122,10 +122,14 @@ struct smc_link {
} ____cacheline_aligned_in_smp;
struct completion tx_ref_comp;
- struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
+ u8 *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */
/* above three vectors have wr_rx_cnt elements and use the same index */
+ int wr_rx_sge_cnt; /* rx sge, V1 is 1, V2 is either 2 or 1 */
+ int wr_rx_buflen; /* buffer len for the first sge, len for the
+ * second sge is lgr shared if rx sge is 2.
+ */
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
u64 wr_rx_id; /* seq # of last recv WR */
@@ -506,6 +510,11 @@ static inline bool smc_link_active(struct smc_link *lnk)
return lnk->state == SMC_LNK_ACTIVE;
}
+static inline bool smc_link_shared_v2_rxbuf(struct smc_link *lnk)
+{
+ return lnk->wr_rx_sge_cnt > 1;
+}
+
static inline void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
{
sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9c563cdbea90..53828833a3f7 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -662,7 +662,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk)
/* create a queue pair within the protection domain for a link */
int smc_ib_create_queue_pair(struct smc_link *lnk)
{
- int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
struct ib_qp_init_attr qp_attr = {
.event_handler = smc_ib_qp_event_handler,
.qp_context = lnk,
@@ -676,7 +675,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_send_wr = SMC_WR_BUF_CNT * 3,
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
- .max_recv_sge = sges_per_buf,
+ .max_recv_sge = lnk->wr_rx_sge_cnt,
.max_inline_data = 0,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 018ce8133b02..f865c58c3aa7 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -997,13 +997,14 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
}
static void smc_llc_save_add_link_rkeys(struct smc_link *link,
- struct smc_link *link_new)
+ struct smc_link *link_new,
+ u8 *llc_msg)
{
struct smc_llc_msg_add_link_v2_ext *ext;
struct smc_link_group *lgr = link->lgr;
int max, i;
- ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
+ ext = (struct smc_llc_msg_add_link_v2_ext *)(llc_msg +
SMC_WR_TX_SIZE);
max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
down_write(&lgr->rmbs_lock);
@@ -1098,7 +1099,9 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
if (rc)
goto out_clear_lnk;
if (lgr->smc_version == SMC_V2) {
- smc_llc_save_add_link_rkeys(link, lnk_new);
+ u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ?
+ (u8 *)lgr->wr_rx_buf_v2 : (u8 *)llc;
+ smc_llc_save_add_link_rkeys(link, lnk_new, llc_msg);
} else {
rc = smc_llc_cli_rkey_exchange(link, lnk_new);
if (rc) {
@@ -1498,7 +1501,9 @@ int smc_llc_srv_add_link(struct smc_link *link,
if (rc)
goto out_err;
if (lgr->smc_version == SMC_V2) {
- smc_llc_save_add_link_rkeys(link, link_new);
+ u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ?
+ (u8 *)lgr->wr_rx_buf_v2 : (u8 *)add_llc;
+ smc_llc_save_add_link_rkeys(link, link_new, llc_msg);
} else {
rc = smc_llc_srv_rkey_exchange(link, link_new);
if (rc)
@@ -1807,8 +1812,12 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
if (lgr->smc_version == SMC_V2) {
struct smc_llc_msg_delete_rkey_v2 *llcv2;
- memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc));
- llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2;
+ if (smc_link_shared_v2_rxbuf(link)) {
+ memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc));
+ llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2;
+ } else {
+ llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)llc;
+ }
llcv2->num_inval_rkeys = 0;
max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 994c0cd4fddb..b04a21b8c511 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -439,7 +439,7 @@ static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
return; /* short message */
temp_wr_id = wc->wr_id;
index = do_div(temp_wr_id, link->wr_rx_cnt);
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
+ wr_rx = (struct smc_wr_rx_hdr *)(link->wr_rx_bufs + index * link->wr_rx_buflen);
hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
if (handler->type == wr_rx->type)
handler->handler(wc, wr_rx);
@@ -555,7 +555,6 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk)
{
- int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE);
u32 i;
@@ -608,13 +607,14 @@ static void smc_wr_init_sge(struct smc_link *lnk)
* the larger spillover buffer, allowing easy data mapping.
*/
for (i = 0; i < lnk->wr_rx_cnt; i++) {
- int x = i * sges_per_buf;
+ int x = i * lnk->wr_rx_sge_cnt;
lnk->wr_rx_sges[x].addr =
- lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
- lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE;
+ lnk->wr_rx_dma_addr + i * lnk->wr_rx_buflen;
+ lnk->wr_rx_sges[x].length = smc_link_shared_v2_rxbuf(lnk) ?
+ SMC_WR_TX_SIZE : lnk->wr_rx_buflen;
lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey;
- if (lnk->lgr->smc_version == SMC_V2) {
+ if (lnk->lgr->smc_version == SMC_V2 && smc_link_shared_v2_rxbuf(lnk)) {
lnk->wr_rx_sges[x + 1].addr =
lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE;
lnk->wr_rx_sges[x + 1].length =
@@ -624,7 +624,7 @@ static void smc_wr_init_sge(struct smc_link *lnk)
}
lnk->wr_rx_ibs[i].next = NULL;
lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x];
- lnk->wr_rx_ibs[i].num_sge = sges_per_buf;
+ lnk->wr_rx_ibs[i].num_sge = lnk->wr_rx_sge_cnt;
}
lnk->wr_reg.wr.next = NULL;
lnk->wr_reg.wr.num_sge = 0;
@@ -655,7 +655,7 @@ void smc_wr_free_link(struct smc_link *lnk)
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
- SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
+ lnk->wr_rx_buflen * lnk->wr_rx_cnt,
DMA_FROM_DEVICE);
lnk->wr_rx_dma_addr = 0;
}
@@ -740,13 +740,11 @@ int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
int smc_wr_alloc_link_mem(struct smc_link *link)
{
- int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1;
-
/* allocate link related memory */
link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
if (!link->wr_tx_bufs)
goto no_mem;
- link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
+ link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_buflen,
GFP_KERNEL);
if (!link->wr_rx_bufs)
goto no_mem_wr_tx_bufs;
@@ -774,7 +772,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
if (!link->wr_tx_sges)
goto no_mem_wr_tx_rdma_sges;
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
- sizeof(link->wr_rx_sges[0]) * sges_per_buf,
+ sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt,
GFP_KERNEL);
if (!link->wr_rx_sges)
goto no_mem_wr_tx_sges;
@@ -872,7 +870,7 @@ int smc_wr_create_link(struct smc_link *lnk)
smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
lnk->wr_rx_id = 0;
lnk->wr_rx_dma_addr = ib_dma_map_single(
- ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
+ ibdev, lnk->wr_rx_bufs, lnk->wr_rx_buflen * lnk->wr_rx_cnt,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
lnk->wr_rx_dma_addr = 0;
@@ -880,13 +878,15 @@ int smc_wr_create_link(struct smc_link *lnk)
goto out;
}
if (lnk->lgr->smc_version == SMC_V2) {
- lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev,
- lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE,
- DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
- lnk->wr_rx_v2_dma_addr = 0;
- rc = -EIO;
- goto dma_unmap;
+ if (smc_link_shared_v2_rxbuf(lnk)) {
+ lnk->wr_rx_v2_dma_addr =
+ ib_dma_map_single(ibdev, lnk->lgr->wr_rx_buf_v2,
+ SMC_WR_BUF_V2_SIZE, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) {
+ lnk->wr_rx_v2_dma_addr = 0;
+ rc = -EIO;
+ goto dma_unmap;
+ }
}
lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev,
lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE,
@@ -935,7 +935,7 @@ dma_unmap:
lnk->wr_tx_v2_dma_addr = 0;
}
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
- SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
+ lnk->wr_rx_buflen * lnk->wr_rx_cnt,
DMA_FROM_DEVICE);
lnk->wr_rx_dma_addr = 0;
out:
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index d1180370fdf4..e74940eab3a4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -949,8 +949,8 @@ void tipc_nametbl_stop(struct net *net)
}
spin_unlock_bh(&tn->nametbl_lock);
- synchronize_net();
- kfree(nt);
+ /* TODO: clear tn->nametbl, implement proper RCU rules ? */
+ kfree_rcu(nt, rcu);
}
static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 3bcd9ef8cee3..7ff6eeebaae6 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -90,6 +90,7 @@ struct publication {
/**
* struct name_table - table containing all existing port name publications
+ * @rcu: RCU callback head used for deferred freeing
* @services: name sequence hash lists
* @node_scope: all local publications with node scope
* - used by name_distr during re-init of name table
@@ -102,6 +103,7 @@ struct publication {
* @snd_nxt: next sequence number to be used
*/
struct name_table {
+ struct rcu_head rcu;
struct hlist_head services[TIPC_NAMETBL_SIZE];
struct list_head node_scope;
struct list_head cluster_scope;
diff --git a/scripts/.gitignore b/scripts/.gitignore
index 3dbb8bb2457b..c2ef68848da5 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
/asn1_compiler
+/gen_packed_field_checks
/generate_rust_target
/insert-sys-cert
/kallsyms
diff --git a/scripts/Makefile b/scripts/Makefile
index 6bcda4b9d054..546e8175e1c4 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -47,7 +47,7 @@ HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED
endif
# The following programs are only built on demand
-hostprogs += unifdef
+hostprogs += unifdef gen_packed_field_checks
# The module linker script is preprocessed on demand
targets += module.lds
diff --git a/scripts/gen_packed_field_checks.c b/scripts/gen_packed_field_checks.c
new file mode 100644
index 000000000000..60042b7616ee
--- /dev/null
+++ b/scripts/gen_packed_field_checks.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024, Intel Corporation
+#include <stdbool.h>
+#include <stdio.h>
+
+#define MAX_PACKED_FIELD_SIZE 50
+
+int main(int argc, char **argv)
+{
+ /* The first macro doesn't need a 'do {} while(0)' loop */
+ printf("#define CHECK_PACKED_FIELDS_1(fields) \\\n");
+ printf("\tCHECK_PACKED_FIELD(fields, 0)\n\n");
+
+ /* Remaining macros require a do/while loop, and are implemented
+ * recursively by calling the previous iteration's macro.
+ */
+ for (int i = 2; i <= MAX_PACKED_FIELD_SIZE; i++) {
+ printf("#define CHECK_PACKED_FIELDS_%d(fields) do { \\\n", i);
+ printf("\tCHECK_PACKED_FIELDS_%d(fields); \\\n", i - 1);
+ printf("\tCHECK_PACKED_FIELD(fields, %d); \\\n", i - 1);
+ printf("} while (0)\n\n");
+ }
+
+ printf("#define CHECK_PACKED_FIELDS(fields) \\\n");
+
+ for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++)
+ printf("\t__builtin_choose_expr(ARRAY_SIZE(fields) == %d, ({ CHECK_PACKED_FIELDS_%d(fields); }), \\\n",
+ i, i);
+
+ printf("\t({ BUILD_BUG_ON_MSG(1, \"CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than %d.\"); }) \\\n",
+ MAX_PACKED_FIELD_SIZE);
+
+ for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++)
+ printf(")");
+
+ printf("\n");
+}
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index d8201c4b1520..ec2288948795 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -801,6 +801,8 @@ class EnumSet(SpecEnumSet):
self.user_type = 'int'
self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-")
+ self.header = yaml.get('header', None)
+ self.enum_cnt_name = yaml.get('enum-cnt-name', None)
super().__init__(family, yaml)
@@ -2417,6 +2419,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'):
cw.block_start(line=start_line)
+def render_uapi_unified(family, cw, max_by_define, separate_ntf):
+ max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX"))
+ cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX"))
+ max_value = f"({cnt_name} - 1)"
+
+ uapi_enum_start(family, cw, family['operations'], 'enum-name')
+ val = 0
+ for op in family.msgs.values():
+ if separate_ntf and ('notify' in op or 'event' in op):
+ continue
+
+ suffix = ','
+ if op.value != val:
+ suffix = f" = {op.value},"
+ val = op.value
+ cw.p(op.enum_name + suffix)
+ val += 1
+ cw.nl()
+ cw.p(cnt_name + ('' if max_by_define else ','))
+ if not max_by_define:
+ cw.p(f"{max_name} = {max_value}")
+ cw.block_end(line=';')
+ if max_by_define:
+ cw.p(f"#define {max_name} {max_value}")
+ cw.nl()
+
+
+def render_uapi_directional(family, cw, max_by_define):
+ max_name = f"{family.op_prefix}USER_MAX"
+ cnt_name = f"__{family.op_prefix}USER_CNT"
+ max_value = f"({cnt_name} - 1)"
+
+ cw.block_start(line='enum')
+ cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,'))
+ val = 0
+ for op in family.msgs.values():
+ if 'do' in op and 'event' not in op:
+ suffix = ','
+ if op.value and op.value != val:
+ suffix = f" = {op.value},"
+ val = op.value
+ cw.p(op.enum_name + suffix)
+ val += 1
+ cw.nl()
+ cw.p(cnt_name + ('' if max_by_define else ','))
+ if not max_by_define:
+ cw.p(f"{max_name} = {max_value}")
+ cw.block_end(line=';')
+ if max_by_define:
+ cw.p(f"#define {max_name} {max_value}")
+ cw.nl()
+
+ max_name = f"{family.op_prefix}KERNEL_MAX"
+ cnt_name = f"__{family.op_prefix}KERNEL_CNT"
+ max_value = f"({cnt_name} - 1)"
+
+ cw.block_start(line='enum')
+ cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,'))
+ val = 0
+ for op in family.msgs.values():
+ if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op:
+ enum_name = op.enum_name
+ if 'event' not in op and 'notify' not in op:
+ enum_name = f'{enum_name}_REPLY'
+
+ suffix = ','
+ if op.value and op.value != val:
+ suffix = f" = {op.value},"
+ val = op.value
+ cw.p(enum_name + suffix)
+ val += 1
+ cw.nl()
+ cw.p(cnt_name + ('' if max_by_define else ','))
+ if not max_by_define:
+ cw.p(f"{max_name} = {max_value}")
+ cw.block_end(line=';')
+ if max_by_define:
+ cw.p(f"#define {max_name} {max_value}")
+ cw.nl()
+
+
def render_uapi(family, cw):
hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H"
hdr_prot = hdr_prot.replace('/', '_')
@@ -2440,6 +2523,9 @@ def render_uapi(family, cw):
if const['type'] == 'enum' or const['type'] == 'flags':
enum = family.consts[const['name']]
+ if enum.header:
+ continue
+
if enum.has_doc():
if enum.has_entry_doc():
cw.p('/**')
@@ -2472,9 +2558,12 @@ def render_uapi(family, cw):
max_val = f' = {enum.get_mask()},'
cw.p(max_name + max_val)
else:
+ cnt_name = enum.enum_cnt_name
max_name = c_upper(name_pfx + 'max')
- cw.p('__' + max_name + ',')
- cw.p(max_name + ' = (__' + max_name + ' - 1)')
+ if not cnt_name:
+ cnt_name = '__' + name_pfx + 'max'
+ cw.p(c_upper(cnt_name) + ',')
+ cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)')
cw.block_end(line=';')
cw.nl()
elif const['type'] == 'const':
@@ -2515,30 +2604,12 @@ def render_uapi(family, cw):
# Commands
separate_ntf = 'async-prefix' in family['operations']
- max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX"))
- cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX"))
- max_value = f"({cnt_name} - 1)"
-
- uapi_enum_start(family, cw, family['operations'], 'enum-name')
- val = 0
- for op in family.msgs.values():
- if separate_ntf and ('notify' in op or 'event' in op):
- continue
-
- suffix = ','
- if op.value != val:
- suffix = f" = {op.value},"
- val = op.value
- cw.p(op.enum_name + suffix)
- val += 1
- cw.nl()
- cw.p(cnt_name + ('' if max_by_define else ','))
- if not max_by_define:
- cw.p(f"{max_name} = {max_value}")
- cw.block_end(line=';')
- if max_by_define:
- cw.p(f"#define {max_name} {max_value}")
- cw.nl()
+ if family.msg_id_model == 'unified':
+ render_uapi_unified(family, cw, max_by_define, separate_ntf)
+ elif family.msg_id_model == 'directional':
+ render_uapi_directional(family, cw, max_by_define)
+ else:
+ raise Exception(f'Unsupported message enum-model {family.msg_id_model}')
if separate_ntf:
uapi_enum_start(family, cw, family['operations'], enum_name='async-enum')
@@ -2635,7 +2706,8 @@ def find_kernel_root(full_path):
def main():
parser = argparse.ArgumentParser(description='Netlink simple parsing generator')
- parser.add_argument('--mode', dest='mode', type=str, required=True)
+ parser.add_argument('--mode', dest='mode', type=str, required=True,
+ choices=('user', 'kernel', 'uapi'))
parser.add_argument('--spec', dest='spec', type=str, required=True)
parser.add_argument('--header', dest='header', action='store_true', default=None)
parser.add_argument('--source', dest='header', action='store_false')
@@ -2662,13 +2734,6 @@ def main():
os.sys.exit(1)
return
- supported_models = ['unified']
- if args.mode in ['user', 'kernel']:
- supported_models += ['directional']
- if parsed.msg_id_model not in supported_models:
- print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation')
- os.sys.exit(1)
-
cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out))
_, spec_kernel = find_kernel_root(args.spec)
@@ -2696,7 +2761,10 @@ def main():
cw.p('#define ' + hdr_prot)
cw.nl()
- hdr_file=os.path.basename(args.out_file[:-2]) + ".h"
+ if args.out_file:
+ hdr_file = os.path.basename(args.out_file[:-2]) + ".h"
+ else:
+ hdr_file = "generated_header_file.h"
if args.mode == 'kernel':
cw.p('#include <net/netlink.h>')
@@ -2718,12 +2786,17 @@ def main():
else:
cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
- headers = [parsed.uapi_header]
+ headers = []
for definition in parsed['definitions']:
if 'header' in definition:
headers.append(definition['header'])
+ if args.mode == 'user':
+ headers.append(parsed.uapi_header)
+ seen_header = []
for one in headers:
- cw.p(f"#include <{one}>")
+ if one not in seen_header:
+ cw.p(f"#include <{one}>")
+ seen_header.append(one)
cw.nl()
if args.mode == "user":
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 99b0e8c17fca..04c7ff577bb8 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -54,16 +54,16 @@ struct epoll_params {
#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
#endif
-static uint32_t cfg_port = 8000;
+static uint16_t cfg_port = 8000;
static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
static char *cfg_outfile;
static int cfg_max_events = 8;
-static int cfg_ifindex;
+static uint32_t cfg_ifindex;
/* busy poll params */
static uint32_t cfg_busy_poll_usecs;
-static uint32_t cfg_busy_poll_budget;
-static uint32_t cfg_prefer_busy_poll;
+static uint16_t cfg_busy_poll_budget;
+static uint8_t cfg_prefer_busy_poll;
/* IRQ params */
static uint32_t cfg_defer_hard_irqs;
@@ -79,6 +79,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ unsigned long long tmp;
int ret;
int c;
@@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+ /* most options take integer values, except o and b, so reduce
+ * code duplication a bit for the common case by calling
+ * strtoull here and leave bounds checking and casting per
+ * option below.
+ */
+ if (c != 'o' && c != 'b')
+ tmp = strtoull(optarg, NULL, 0);
+
switch (c) {
case 'u':
- cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
- if (cfg_busy_poll_usecs == ULONG_MAX ||
- cfg_busy_poll_usecs > UINT32_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
error(1, ERANGE, "busy_poll_usecs too large");
+
+ cfg_busy_poll_usecs = (uint32_t)tmp;
break;
case 'P':
- cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
- if (cfg_prefer_busy_poll == ULONG_MAX ||
- cfg_prefer_busy_poll > 1)
+ if (tmp == ULLONG_MAX || tmp > 1)
error(1, ERANGE,
"prefer busy poll should be 0 or 1");
+
+ cfg_prefer_busy_poll = (uint8_t)tmp;
break;
case 'g':
- cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
- if (cfg_busy_poll_budget == ULONG_MAX ||
- cfg_busy_poll_budget > UINT16_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
error(1, ERANGE,
"busy poll budget must be [0, UINT16_MAX]");
+
+ cfg_busy_poll_budget = (uint16_t)tmp;
break;
case 'p':
- cfg_port = strtoul(optarg, NULL, 0);
- if (cfg_port > UINT16_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
error(1, ERANGE, "port must be <= 65535");
+
+ cfg_port = (uint16_t)tmp;
break;
case 'b':
ret = inet_aton(optarg, &cfg_bind_addr);
@@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv)
error(1, 0, "outfile invalid");
break;
case 'm':
- cfg_max_events = strtol(optarg, NULL, 0);
-
- if (cfg_max_events == LONG_MIN ||
- cfg_max_events == LONG_MAX ||
- cfg_max_events <= 0)
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
error(1, ERANGE,
- "max events must be > 0 and < LONG_MAX");
+ "max events must be > 0 and <= INT_MAX");
+
+ cfg_max_events = (int)tmp;
break;
case 'd':
- cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);
-
- if (cfg_defer_hard_irqs == ULONG_MAX ||
- cfg_defer_hard_irqs > INT32_MAX)
+ if (tmp == ULLONG_MAX || tmp > INT32_MAX)
error(1, ERANGE,
"defer_hard_irqs must be <= INT32_MAX");
+
+ cfg_defer_hard_irqs = (uint32_t)tmp;
break;
case 'r':
- cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);
-
- if (cfg_gro_flush_timeout == ULLONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
error(1, ERANGE,
- "gro_flush_timeout must be < ULLONG_MAX");
+ "gro_flush_timeout must be < UINT64_MAX");
+
+ cfg_gro_flush_timeout = (uint64_t)tmp;
break;
case 's':
- cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);
-
- if (cfg_irq_suspend_timeout == ULLONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
error(1, ERANGE,
"irq_suspend_timeout must be < ULLONG_MAX");
+
+ cfg_irq_suspend_timeout = (uint64_t)tmp;
break;
case 'i':
- cfg_ifindex = strtoul(optarg, NULL, 0);
- if (cfg_ifindex == ULONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
error(1, ERANGE,
- "ifindex must be < ULONG_MAX");
+ "ifindex must be <= INT_MAX");
+
+ cfg_ifindex = (int)tmp;
break;
}
}
@@ -215,7 +223,7 @@ static void setup_queue(void)
struct netdev_napi_set_req *set_req = NULL;
struct ynl_sock *ys;
struct ynl_error yerr;
- uint32_t napi_id;
+ uint32_t napi_id = 0;
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys)
@@ -277,8 +285,8 @@ static void run_poller(void)
* here
*/
epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
- epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
- epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
+ epoll_params.busy_poll_budget = cfg_busy_poll_budget;
+ epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
epoll_params.__pad = 0;
val = 1;
@@ -342,5 +350,9 @@ int main(int argc, char *argv[])
parse_opts(argc, argv);
setup_queue();
run_poller();
+
+ if (cfg_outfile)
+ free(cfg_outfile);
+
return 0;
}
diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh
index c03151e7791c..c159230c9b62 100755
--- a/tools/testing/selftests/net/fdb_notify.sh
+++ b/tools/testing/selftests/net/fdb_notify.sh
@@ -49,7 +49,7 @@ test_dup_vxlan_self()
{
ip_link_add br up type bridge vlan_filtering 1
ip_link_add vx up type vxlan id 2000 dstport 4789
- ip_link_master vx br
+ ip_link_set_master vx br
do_test_dup add "vxlan" dev vx self dst 192.0.2.1
do_test_dup del "vxlan" dev vx self dst 192.0.2.1
@@ -59,7 +59,7 @@ test_dup_vxlan_master()
{
ip_link_add br up type bridge vlan_filtering 1
ip_link_add vx up type vxlan id 2000 dstport 4789
- ip_link_master vx br
+ ip_link_set_master vx br
do_test_dup add "vxlan master" dev vx master
do_test_dup del "vxlan master" dev vx master
@@ -79,7 +79,7 @@ test_dup_macvlan_master()
ip_link_add br up type bridge vlan_filtering 1
ip_link_add dd up type dummy
ip_link_add mv up link dd type macvlan mode passthru
- ip_link_master mv br
+ ip_link_set_master mv br
do_test_dup add "macvlan master" dev mv self
do_test_dup del "macvlan master" dev mv self
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 7d885cff8d79..00bde7b6f39e 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
+ vxlan_reserved.sh \
vxlan_symmetric_ipv6.sh \
vxlan_symmetric.sh
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1c8a26046589..2b5700b61ffa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change"
NUM_NETIFS=4
source lib.sh
@@ -77,12 +77,16 @@ cleanup()
ping_ipv4()
{
- ping_test $h1 192.0.2.2
+ local msg=$1
+
+ ping_test $h1 192.0.2.2 "$msg"
}
ping_ipv6()
{
- ping6_test $h1 2001:db8:1::2
+ local msg=$1
+
+ ping6_test $h1 2001:db8:1::2 "$msg"
}
learning()
@@ -95,6 +99,21 @@ flooding()
flood_test $swp2 $h1 $h2
}
+pvid_change()
+{
+ # Test that the changing of the VLAN-aware PVID does not affect
+ # VLAN-unaware forwarding
+ bridge vlan add vid 3 dev $swp1 pvid untagged
+
+ ping_ipv4 " with bridge port $swp1 PVID changed"
+ ping_ipv6 " with bridge port $swp1 PVID changed"
+
+ bridge vlan del vid 3 dev $swp1
+
+ ping_ipv4 " with bridge port $swp1 PVID deleted"
+ ping_ipv6 " with bridge port $swp1 PVID deleted"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7337f398f9cc..1fd40bada694 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -932,13 +932,6 @@ packets_rate()
echo $(((t1 - t0) / interval))
}
-mac_get()
-{
- local if_name=$1
-
- ip -j link show dev $if_name | jq -r '.[]["address"]'
-}
-
ether_addr_to_u64()
{
local addr="$1"
diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
new file mode 100755
index 000000000000..46c31794b91b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
@@ -0,0 +1,352 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|---------------+
+# |
+# +----|--------------------------------+
+# | SW | |
+# | +--|------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 192.0.2.17 | |
+# | | id 1000 dstport $VXPORT | |
+# | +---------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +--|----------------------------------+
+# |
+# +--|----------------------------------+
+# | | |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | |
+# | VRP2 (vrf) |
+# +-------------------------------------+
+
+: ${VXPORT:=4789}
+: ${ALL_TESTS:="
+ default_test
+ plain_test
+ reserved_0_test
+ reserved_10_test
+ reserved_31_test
+ reserved_56_test
+ reserved_63_test
+ "}
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+ defer simple_if_fini $h1 192.0.2.1/28
+
+ tc qdisc add dev $h1 clsact
+ defer tc qdisc del dev $h1 clsact
+
+ tc filter add dev $h1 ingress pref 77 \
+ prot ip flower skip_hw ip_proto icmp action drop
+ defer tc filter del dev $h1 ingress pref 77
+}
+
+switch_create()
+{
+ ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip_link_set_addr br1 $(mac_get $swp1)
+ ip_link_set_up br1
+
+ ip_link_set_up $rp1
+ ip_addr_add $rp1 192.0.2.17/28
+ ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
+
+ ip_link_set_master $swp1 br1
+ ip_link_set_up $swp1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+ defer simple_if_fini $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ forwarding_enable
+ defer forwarding_restore
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+vxlan_header_bytes()
+{
+ local vni=$1; shift
+ local -a extra_bits=("$@")
+ local -a bits
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ bits[i]=0
+ done
+
+ # Bit 4 is the I flag and is always on.
+ bits[4]=1
+
+ for i in ${extra_bits[@]}; do
+ bits[i]=1
+ done
+
+ # Bits 32..55 carry the VNI
+ local mask=0x800000
+ for ((i=0; i < 24; i++)); do
+ bits[$((i + 32))]=$(((vni & mask) != 0))
+ ((mask >>= 1))
+ done
+
+ local bytes
+ for ((i=0; i < 8; i++)); do
+ local byte=0
+ local j
+ for ((j=0; j < 8; j++)); do
+ local bit=${bits[8 * i + j]}
+ ((byte += bit << (7 - j)))
+ done
+ bytes+=$(printf %02x $byte):
+ done
+
+ echo ${bytes%:}
+}
+
+neg_bytes()
+{
+ local bytes=$1; shift
+
+ local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8
+ [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:)
+ local out
+ local i
+
+ for ((i=0; i < ${#bytes}; i++)); do
+ local c=${bytes:$i:1}
+ out+=${neg[$c]}
+ done
+ echo $out
+}
+
+vxlan_ping_do()
+{
+ local count=$1; shift
+ local dev=$1; shift
+ local next_hop_mac=$1; shift
+ local dest_ip=$1; shift
+ local dest_mac=$1; shift
+ local vni=$1; shift
+ local reserved_bits=$1; shift
+
+ local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits)
+
+ $MZ $dev -c $count -d 100msec -q \
+ -b $next_hop_mac -B $dest_ip \
+ -t udp sp=23456,dp=$VXPORT,p=$(:
+ )"$vxlan_header:"$( : VXLAN
+ )"$dest_mac:"$( : ETH daddr
+ )"00:11:22:33:44:55:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"99:83:"$( : IP identification
+ )"40:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"00:00:"$( : IP header csum
+ )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr
+ )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr
+ )"08:"$( : ICMP type
+ )"00:"$( : ICMP code
+ )"8b:f2:"$( : ICMP csum
+ )"1f:6a:"$( : ICMP request identifier
+ )"00:01:"$( : ICMP request seq. number
+ )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload
+ )"6d:74:0b:00:00:00:00:00:"$( :
+ )"10:11:12:13:14:15:16:17:"$( :
+ )"18:19:1a:1b:1c:1d:1e:1f:"$( :
+ )"20:21:22:23:24:25:26:27:"$( :
+ )"28:29:2a:2b:2c:2d:2e:2f:"$( :
+ )"30:31:32:33:34:35:36:37"
+}
+
+vxlan_device_add()
+{
+ ip_link_add vx1 up type vxlan id 1000 \
+ local 192.0.2.17 dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 100 "$@"
+ ip_link_set_master vx1 br1
+}
+
+vxlan_all_reserved_bits()
+{
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ if ((i == 4 || i >= 32 && i < 56)); then
+ continue
+ fi
+ echo $i
+ done
+}
+
+vxlan_ping_vanilla()
+{
+ vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000
+}
+
+vxlan_ping_reserved()
+{
+ for bit in $(vxlan_all_reserved_bits); do
+ vxlan_ping_do 1 $rp2 $(mac_get $rp1) \
+ 192.0.2.17 $(mac_get $h1) 1000 "$bit"
+ ((n++))
+ done
+}
+
+vxlan_ping_test()
+{
+ local what=$1; shift
+ local get_stat=$1; shift
+ local expect=$1; shift
+
+ RET=0
+
+ local t0=$($get_stat)
+
+ "$@"
+ check_err $? "Failure when running $@"
+
+ local t1=$($get_stat)
+ local delta=$((t1 - t0))
+
+ ((expect == delta))
+ check_err $? "Expected to capture $expect packets, got $delta."
+
+ log_test "$what"
+}
+
+__default_test_do()
+{
+ local n_allowed_bits=$1; shift
+ local what=$1; shift
+
+ vxlan_ping_test "$what: clean packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ 10 vxlan_ping_vanilla
+
+ local t0=$(link_stats_get vx1 rx errors)
+ vxlan_ping_test "$what: mangled packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ $n_allowed_bits vxlan_ping_reserved
+ local t1=$(link_stats_get vx1 rx errors)
+
+ RET=0
+ local expect=$((39 - n_allowed_bits))
+ local delta=$((t1 - t0))
+ ((expect == delta))
+ check_err $? "Expected $expect error packets, got $delta."
+ log_test "$what: drops reported"
+}
+
+default_test_do()
+{
+ vxlan_device_add
+ __default_test_do 0 "Default"
+}
+
+default_test()
+{
+ in_defer_scope \
+ default_test_do
+}
+
+plain_test_do()
+{
+ vxlan_device_add reserved_bits 0xf7ffffff000000ff
+ __default_test_do 0 "reserved_bits 0xf7ffffff000000ff"
+}
+
+plain_test()
+{
+ in_defer_scope \
+ plain_test_do
+}
+
+reserved_test()
+{
+ local bit=$1; shift
+
+ local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit)
+ local reserved_bytes=$(neg_bytes $allowed_bytes)
+ local reserved_bits=${reserved_bytes//:/}
+
+ vxlan_device_add reserved_bits 0x$reserved_bits
+ __default_test_do 1 "reserved_bits 0x$reserved_bits"
+}
+
+reserved_0_test()
+{
+ in_defer_scope \
+ reserved_test 0
+}
+
+reserved_10_test()
+{
+ in_defer_scope \
+ reserved_test 10
+}
+
+reserved_31_test()
+{
+ in_defer_scope \
+ reserved_test 31
+}
+
+reserved_56_test()
+{
+ in_defer_scope \
+ reserved_test 56
+}
+
+reserved_63_test()
+{
+ in_defer_scope \
+ reserved_test 63
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 8994fec1c38f..2cd5c743b2d9 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -435,6 +435,13 @@ xfail_on_veth()
fi
}
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
kill_process()
{
local pid=$1; shift
@@ -451,7 +458,7 @@ ip_link_add()
defer ip link del dev "$name"
}
-ip_link_master()
+ip_link_set_master()
{
local member=$1; shift
local master=$1; shift
@@ -459,3 +466,35 @@ ip_link_master()
ip link set dev "$member" master "$master"
defer ip link set dev "$member" nomaster
}
+
+ip_link_set_addr()
+{
+ local name=$1; shift
+ local addr=$1; shift
+
+ local old_addr=$(mac_get "$name")
+ ip link set dev "$name" address "$addr"
+ defer ip link set dev "$name" address "$old_addr"
+}
+
+ip_link_set_up()
+{
+ local name=$1; shift
+
+ ip link set dev "$name" up
+ defer ip link set dev "$name" down
+}
+
+ip_addr_add()
+{
+ local name=$1; shift
+
+ ip addr add dev "$name" "$@"
+ defer ip addr del dev "$name" "$@"
+}
+
+ip_route_add()
+{
+ ip route add "$@"
+ defer ip route del "$@"
+}
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 640bc43452fa..88fa1d53ba2b 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -92,6 +92,9 @@ run_udp() {
echo "udp"
run_in_netns ${args}
+ echo "udp sendmmsg"
+ run_in_netns ${args} -m
+
echo "udp gso"
run_in_netns ${args} -S 0
diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py
new file mode 100755
index 000000000000..0f44a6199495
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Script that checks that SFQ rejects a limit of 1 at the kernel
+# level. We can't use iproute2's tc because it does not accept a limit
+# of 1.
+
+import sys
+import os
+
+from pyroute2 import IPRoute
+from pyroute2.netlink.exceptions import NetlinkError
+
+ip = IPRoute()
+ifidx = ip.link_lookup(ifname=sys.argv[1])
+
+try:
+ ip.tc('add', 'sfq', ifidx, limit=1)
+ sys.exit(1)
+except NetlinkError:
+ sys.exit(0)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 16d51936b385..50e8d72781cb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -208,5 +208,25 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "4d6f",
+ "name": "Check that limit of 1 is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": [
+ ]
}
]