From a07484c083eabc809e45a198bd639bfd37ac70b6 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Wed, 22 Feb 2023 15:35:30 +0700
Subject: bpf, docs: Fix link to BTF doc

Ross reported broken link to BTF documentation
(Documentation/bpf/btf.rst) in Documentation/bpf/bpf_devel_QA.rst. The
link in question is written using external link syntax, with the target
refers to BTF doc in reST source (btf.rst), which doesn't exist in
resulting HTML output.

Fix the link by replacing external link syntax with simply writing out
the target doc, which the link will be generated to the correct HTML doc
target.

Fixes: 6736aa793c2b5f ("selftests/bpf: Add general instructions for test execution")
Reported-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Ross Zwisler <zwisler@google.com>
Link: https://lore.kernel.org/linux-doc/Y++09LKx25dtR4Ow@google.com/
Link: https://lore.kernel.org/bpf/20230222083530.26136-1-bagasdotme@gmail.com
---
 Documentation/bpf/bpf_devel_QA.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 03d4993eda6f..715f7321020f 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -469,7 +469,7 @@ under test should match the config file fragment in
 tools/testing/selftests/bpf as closely as possible.
 
 Finally to ensure support for latest BPF Type Format features -
-discussed in `Documentation/bpf/btf.rst`_ - pahole version 1.16
+discussed in Documentation/bpf/btf.rst - pahole version 1.16
 is required for kernels built with CONFIG_DEBUG_INFO_BTF=y.
 pahole is delivered in the dwarves package or can be built
 from source at
@@ -690,6 +690,5 @@ when:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
 .. _Documentation/dev-tools/kselftest.rst:
    https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
-.. _Documentation/bpf/btf.rst: btf.rst
 
 Happy BPF hacking!
-- 
cgit 


From 2d311f480b52eeb2e1fd432d64b78d82952c3808 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 26 Feb 2023 23:20:16 -0800
Subject: riscv, bpf: Fix patch_text implicit declaration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bpf_jit_comp64.c uses patch_text(), so add <asm/patch.h> to it
to prevent the build error:

../arch/riscv/net/bpf_jit_comp64.c: In function 'bpf_arch_text_poke':
../arch/riscv/net/bpf_jit_comp64.c:691:23: error: implicit declaration of function 'patch_text'; did you mean 'path_get'? [-Werror=implicit-function-declaration]
  691 |                 ret = patch_text(ip, new_insns, ninsns);
      |                       ^~~~~~~~~~

Fixes: 596f2e6f9cf4 ("riscv, bpf: Add bpf_arch_text_poke support for RV64")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Pu Lehui <pulehui@huawei.com>
Acked-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://lore.kernel.org/r/202302271000.Aj4nMXbZ-lkp@intel.com
Link: https://lore.kernel.org/bpf/20230227072016.14618-1-rdunlap@infradead.org
---
 arch/riscv/net/bpf_jit_comp64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index f5a668736c79..acdc3f040195 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -10,6 +10,7 @@
 #include <linux/filter.h>
 #include <linux/memory.h>
 #include <linux/stop_machine.h>
+#include <asm/patch.h>
 #include "bpf_jit.h"
 
 #define RV_REG_TCC RV_REG_A6
-- 
cgit 


From f99e6d7c4ed3be2531bd576425a5bd07fb133bd7 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Mon, 27 Feb 2023 10:11:56 +0100
Subject: bgmac: fix *initial* chip reset to support BCM5358
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While bringing hardware up we should perform a full reset including the
switch bit (BGMAC_BCMA_IOCTL_SW_RESET aka SICF_SWRST). It's what
specification says and what reference driver does.

This seems to be critical for the BCM5358. Without this hardware doesn't
get initialized properly and doesn't seem to transmit or receive any
packets.

Originally bgmac was calling bgmac_chip_reset() before setting
"has_robosw" property which resulted in expected behaviour. That has
changed as a side effect of adding platform device support which
regressed BCM5358 support.

Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support")
Cc: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20230227091156.19509-1-zajec5@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/broadcom/bgmac.c | 8 ++++++--
 drivers/net/ethernet/broadcom/bgmac.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 3038386a5afd..1761df8fb7f9 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac)
 
 		if (iost & BGMAC_BCMA_IOST_ATTACHED) {
 			flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
-			if (!bgmac->has_robosw)
+			if (bgmac->in_init || !bgmac->has_robosw)
 				flags |= BGMAC_BCMA_IOCTL_SW_RESET;
 		}
 		bgmac_clk_enable(bgmac, flags);
 	}
 
-	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
+	if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw))
 		bgmac_idm_write(bgmac, BCMA_IOCTL,
 				bgmac_idm_read(bgmac, BCMA_IOCTL) &
 				~BGMAC_BCMA_IOCTL_SW_RESET);
@@ -1490,6 +1490,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 	struct net_device *net_dev = bgmac->net_dev;
 	int err;
 
+	bgmac->in_init = true;
+
 	bgmac_chip_intrs_off(bgmac);
 
 	net_dev->irq = bgmac->irq;
@@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 	/* Omit FCS from max MTU size */
 	net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN;
 
+	bgmac->in_init = false;
+
 	err = register_netdev(bgmac->net_dev);
 	if (err) {
 		dev_err(bgmac->dev, "Cannot register net device\n");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index e05ac92c0650..d73ef262991d 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -472,6 +472,8 @@ struct bgmac {
 	int irq;
 	u32 int_mask;
 
+	bool in_init;
+
 	/* Current MAC state */
 	int mac_speed;
 	int mac_duplex;
-- 
cgit 


From 11f180a5d62a51b484e9648f9b310e1bd50b1a57 Mon Sep 17 00:00:00 2001
From: Kang Chen <void0red@gmail.com>
Date: Mon, 27 Feb 2023 17:30:37 +0800
Subject: nfc: fdp: add null check of devm_kmalloc_array in
 fdp_nci_i2c_read_device_properties

devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause
out-of-bounds write in device_property_read_u8_array later.

Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver")
Signed-off-by: Kang Chen <void0red@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/nfc/fdp/i2c.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 2d53e0f88d2f..1e0f2297f9c6 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -247,6 +247,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
 					   len, sizeof(**fw_vsc_cfg),
 					   GFP_KERNEL);
 
+		if (!*fw_vsc_cfg)
+			goto alloc_err;
+
 		r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME,
 						  *fw_vsc_cfg, len);
 
@@ -260,6 +263,7 @@ vsc_read_err:
 		*fw_vsc_cfg = NULL;
 	}
 
+alloc_err:
 	dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s",
 		*clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
 }
-- 
cgit 


From 8f9850dd8d23c1290cb642ce9548a440da5771ec Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 27 Feb 2023 13:03:22 +0300
Subject: net: phy: unlock on error in phy_probe()

If genphy_c45_read_eee_adv() fails then we need to do a reset and unlock
the &phydev->lock mutex before returning.

Fixes: 3eeca4e199ce ("net: phy: do not force EEE support")
Signed-off-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://lore.kernel.org/r/Y/x/6kHCjnQHqOpF@kili
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/phy/phy_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 3f8a64fb9d71..9e9fd8ff00f6 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3146,7 +3146,7 @@ static int phy_probe(struct device *dev)
 	 */
 	err = genphy_c45_read_eee_adv(phydev, phydev->advertising_eee);
 	if (err)
-		return err;
+		goto out;
 
 	/* There is no "enabled" flag. If PHY is advertising, assume it is
 	 * kind of enabled.
-- 
cgit 


From ae44f1c9d1fc54aeceb335fedb1e73b2c3ee4561 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 24 Feb 2023 17:59:39 +0200
Subject: powerpc: dts: t1040rdb: fix compatible string for Rev A boards

It looks like U-Boot fails to start the kernel properly when the
compatible string of the board isn't fsl,T1040RDB, so stop overriding it
from the rev-a.dts.

Fixes: 5ebb74749202 ("powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
index 73f8c998c64d..d4f5f159d6f2 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
@@ -10,7 +10,6 @@
 
 / {
 	model = "fsl,T1040RDB-REV-A";
-	compatible = "fsl,T1040RDB-REV-A";
 };
 
 &seville_port0 {
-- 
cgit 


From 8b322f9fdb35ecee0d4a40de8af923444bd624ea Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 24 Feb 2023 17:59:40 +0200
Subject: powerpc: dts: t1040rdb: enable both CPU ports

Since commit eca70102cfb1 ("net: dsa: felix: add support for changing
DSA master") included in kernel v6.1, the driver supports 2 CPU ports,
and they can be put in a LAG, for example (see
Documentation/networking/dsa/configuration.rst for more details).

Defining the second CPU port in the device tree should not cause any
compatibility issue, because the default CPU port was &seville_port8
before this change, and still is &seville_port8 now (the numerically
first CPU port is used by default).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 arch/powerpc/boot/dts/fsl/t1040rdb.dts      | 5 ++++-
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
index b6733e7e6580..dd3aab81e9de 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -180,6 +180,9 @@
 };
 
 &seville_port8 {
-	ethernet = <&enet0>;
+	status = "okay";
+};
+
+&seville_port9 {
 	status = "okay";
 };
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index f58eb820eb5e..ad0ab33336b8 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -686,6 +686,7 @@
 			seville_port8: port@8 {
 				reg = <8>;
 				phy-mode = "internal";
+				ethernet = <&enet0>;
 				status = "disabled";
 
 				fixed-link {
@@ -697,6 +698,7 @@
 			seville_port9: port@9 {
 				reg = <9>;
 				phy-mode = "internal";
+				ethernet = <&enet1>;
 				status = "disabled";
 
 				fixed-link {
-- 
cgit 


From 4d42cd6bc2ac1b9be50ade13771daec90c9d18b1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Feb 2023 10:12:01 -0800
Subject: tls: rx: fix return value for async crypto

Gaurav reports that TLS Rx is broken with async crypto
accelerators. The commit under fixes missed updating
the retval byte counting logic when updating how records
are stored. Even tho both before and after the change
'decrypted' was updated inside the main loop, it was
completely overwritten when processing the async
completions. Now that the rx_list only holds
non-zero-copy records we need to add, not overwrite.

Reported-and-bisected-by: Gaurav Jain <gaurav.jain@nxp.com>
Fixes: cbbdee9918a2 ("tls: rx: async: don't put async zc on the list")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217064
Tested-by: Gaurav Jain <gaurav.jain@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230227181201.1793772-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 782d3701b86f..021d760f9133 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2127,7 +2127,7 @@ recv_end:
 		else
 			err = process_rx_list(ctx, msg, &control, 0,
 					      async_copy_bytes, is_peek);
-		decrypted = max(err, 0);
+		decrypted += max(err, 0);
 	}
 
 	copied += decrypted;
-- 
cgit 


From 880ce5f20033cd6ecb2c0edfe0376c9e45220012 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 27 Feb 2023 14:17:06 +0000
Subject: net: avoid skb end_offset change in __skb_unclone_keeptruesize()

Once initial skb->head has been allocated from skb_small_head_cache,
we need to make sure to use the same strategy whenever skb->head
has to be re-allocated, as found by syzbot [1]

This means kmalloc_reserve() can not fallback from using
skb_small_head_cache to generic (power-of-two) kmem caches.

It seems that we probably want to rework things in the future,
to partially revert following patch, because we no longer use
ksize() for skb allocated in TX path.

2b88cba55883 ("net: preserve skb_end_offset() in skb_unclone_keeptruesize()")

Ideally, TCP stack should never put payload in skb->head,
this effort has to be completed.

In the mean time, add a sanity check.

[1]
BUG: KASAN: invalid-free in slab_free mm/slub.c:3787 [inline]
BUG: KASAN: invalid-free in kmem_cache_free+0xee/0x5c0 mm/slub.c:3809
Free of addr ffff88806cdee800 by task syz-executor239/5189

CPU: 0 PID: 5189 Comm: syz-executor239 Not tainted 6.2.0-rc8-syzkaller-02400-gd1fabc68f8e0 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106
print_address_description mm/kasan/report.c:306 [inline]
print_report+0x15e/0x45d mm/kasan/report.c:417
kasan_report_invalid_free+0x9b/0x1b0 mm/kasan/report.c:482
____kasan_slab_free+0x1a5/0x1c0 mm/kasan/common.c:216
kasan_slab_free include/linux/kasan.h:177 [inline]
slab_free_hook mm/slub.c:1781 [inline]
slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1807
slab_free mm/slub.c:3787 [inline]
kmem_cache_free+0xee/0x5c0 mm/slub.c:3809
skb_kfree_head net/core/skbuff.c:857 [inline]
skb_kfree_head net/core/skbuff.c:853 [inline]
skb_free_head+0x16f/0x1a0 net/core/skbuff.c:872
skb_release_data+0x57a/0x820 net/core/skbuff.c:901
skb_release_all net/core/skbuff.c:966 [inline]
__kfree_skb+0x4f/0x70 net/core/skbuff.c:980
tcp_wmem_free_skb include/net/tcp.h:302 [inline]
tcp_rtx_queue_purge net/ipv4/tcp.c:3061 [inline]
tcp_write_queue_purge+0x617/0xcf0 net/ipv4/tcp.c:3074
tcp_v4_destroy_sock+0x125/0x810 net/ipv4/tcp_ipv4.c:2302
inet_csk_destroy_sock+0x19a/0x440 net/ipv4/inet_connection_sock.c:1195
__tcp_close+0xb96/0xf50 net/ipv4/tcp.c:3021
tcp_close+0x2d/0xc0 net/ipv4/tcp.c:3033
inet_release+0x132/0x270 net/ipv4/af_inet.c:426
__sock_release+0xcd/0x280 net/socket.c:651
sock_close+0x1c/0x20 net/socket.c:1393
__fput+0x27c/0xa90 fs/file_table.c:320
task_work_run+0x16f/0x270 kernel/task_work.c:179
resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
exit_to_user_mode_prepare+0x23c/0x250 kernel/entry/common.c:203
__syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296
do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f2511f546c3
Code: c7 c2 c0 ff ff ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8
RSP: 002b:00007ffef0103d48 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f2511f546c3
RDX: 0000000000000978 RSI: 00000000200000c0 RDI: 0000000000000003
RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000003434
R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffef0103d6c
R13: 00007ffef0103d80 R14: 00007ffef0103dc0 R15: 0000000000000003
</TASK>

Allocated by task 5189:
kasan_save_stack+0x22/0x40 mm/kasan/common.c:45
kasan_set_track+0x25/0x30 mm/kasan/common.c:52
____kasan_kmalloc mm/kasan/common.c:374 [inline]
____kasan_kmalloc mm/kasan/common.c:333 [inline]
__kasan_kmalloc+0xa5/0xb0 mm/kasan/common.c:383
kasan_kmalloc include/linux/kasan.h:211 [inline]
__do_kmalloc_node mm/slab_common.c:968 [inline]
__kmalloc_node_track_caller+0x5b/0xc0 mm/slab_common.c:988
kmalloc_reserve+0xf1/0x230 net/core/skbuff.c:539
pskb_expand_head+0x237/0x1160 net/core/skbuff.c:1995
__skb_unclone_keeptruesize+0x93/0x220 net/core/skbuff.c:2094
skb_unclone_keeptruesize include/linux/skbuff.h:1910 [inline]
skb_prepare_for_shift net/core/skbuff.c:3804 [inline]
skb_shift+0xef8/0x1e20 net/core/skbuff.c:3877
tcp_skb_shift net/ipv4/tcp_input.c:1538 [inline]
tcp_shift_skb_data net/ipv4/tcp_input.c:1646 [inline]
tcp_sacktag_walk+0x93b/0x18a0 net/ipv4/tcp_input.c:1713
tcp_sacktag_write_queue+0x1599/0x31d0 net/ipv4/tcp_input.c:1974
tcp_ack+0x2e9f/0x5a10 net/ipv4/tcp_input.c:3847
tcp_rcv_established+0x667/0x2230 net/ipv4/tcp_input.c:6006
tcp_v4_do_rcv+0x670/0x9b0 net/ipv4/tcp_ipv4.c:1721
sk_backlog_rcv include/net/sock.h:1113 [inline]
__release_sock+0x133/0x3b0 net/core/sock.c:2921
release_sock+0x58/0x1b0 net/core/sock.c:3488
tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1485
inet_sendmsg+0x9d/0xe0 net/ipv4/af_inet.c:825
sock_sendmsg_nosec net/socket.c:722 [inline]
sock_sendmsg+0xde/0x190 net/socket.c:745
sock_write_iter+0x295/0x3d0 net/socket.c:1136
call_write_iter include/linux/fs.h:2189 [inline]
new_sync_write fs/read_write.c:491 [inline]
vfs_write+0x9ed/0xdd0 fs/read_write.c:584
ksys_write+0x1ec/0x250 fs/read_write.c:637
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

The buggy address belongs to the object at ffff88806cdee800
which belongs to the cache kmalloc-1k of size 1024
The buggy address is located 0 bytes inside of
1024-byte region [ffff88806cdee800, ffff88806cdeec00)

The buggy address belongs to the physical page:
page:ffffea0001b37a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x6cde8
head:ffffea0001b37a00 order:3 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0
flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000010200 ffff888012441dc0 dead000000000122 0000000000000000
raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1f2a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_MEMALLOC|__GFP_HARDWALL), pid 75, tgid 75 (kworker/u4:4), ts 96369578780, free_ts 26734162530
prep_new_page mm/page_alloc.c:2531 [inline]
get_page_from_freelist+0x119c/0x2ce0 mm/page_alloc.c:4283
__alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5549
alloc_pages+0x1aa/0x270 mm/mempolicy.c:2287
alloc_slab_page mm/slub.c:1851 [inline]
allocate_slab+0x25f/0x350 mm/slub.c:1998
new_slab mm/slub.c:2051 [inline]
___slab_alloc+0xa91/0x1400 mm/slub.c:3193
__slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3292
__slab_alloc_node mm/slub.c:3345 [inline]
slab_alloc_node mm/slub.c:3442 [inline]
__kmem_cache_alloc_node+0x1a4/0x430 mm/slub.c:3491
__do_kmalloc_node mm/slab_common.c:967 [inline]
__kmalloc_node_track_caller+0x4b/0xc0 mm/slab_common.c:988
kmalloc_reserve+0xf1/0x230 net/core/skbuff.c:539
__alloc_skb+0x129/0x330 net/core/skbuff.c:608
__netdev_alloc_skb+0x74/0x410 net/core/skbuff.c:672
__netdev_alloc_skb_ip_align include/linux/skbuff.h:3203 [inline]
netdev_alloc_skb_ip_align include/linux/skbuff.h:3213 [inline]
batadv_iv_ogm_aggregate_new+0x106/0x4e0 net/batman-adv/bat_iv_ogm.c:558
batadv_iv_ogm_queue_add net/batman-adv/bat_iv_ogm.c:670 [inline]
batadv_iv_ogm_schedule_buff+0xe6b/0x1450 net/batman-adv/bat_iv_ogm.c:849
batadv_iv_ogm_schedule net/batman-adv/bat_iv_ogm.c:868 [inline]
batadv_iv_ogm_schedule net/batman-adv/bat_iv_ogm.c:861 [inline]
batadv_iv_send_outstanding_bat_ogm_packet+0x744/0x910 net/batman-adv/bat_iv_ogm.c:1712
process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289
worker_thread+0x669/0x1090 kernel/workqueue.c:2436
page last free stack trace:
reset_page_owner include/linux/page_owner.h:24 [inline]
free_pages_prepare mm/page_alloc.c:1446 [inline]
free_pcp_prepare+0x66a/0xc20 mm/page_alloc.c:1496
free_unref_page_prepare mm/page_alloc.c:3369 [inline]
free_unref_page+0x1d/0x490 mm/page_alloc.c:3464
free_contig_range+0xb5/0x180 mm/page_alloc.c:9488
destroy_args+0xa8/0x64c mm/debug_vm_pgtable.c:998
debug_vm_pgtable+0x28de/0x296f mm/debug_vm_pgtable.c:1318
do_one_initcall+0x141/0x790 init/main.c:1306
do_initcall_level init/main.c:1379 [inline]
do_initcalls init/main.c:1395 [inline]
do_basic_setup init/main.c:1414 [inline]
kernel_init_freeable+0x6f9/0x782 init/main.c:1634
kernel_init+0x1e/0x1d0 init/main.c:1522
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308

Memory state around the buggy address:
ffff88806cdee700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff88806cdee780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88806cdee800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
ffff88806cdee880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fixes: bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eb7d33b41e71..1a31815104d6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -517,18 +517,16 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
 #ifdef HAVE_SKB_SMALL_HEAD_CACHE
 	if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
 	    !(flags & KMALLOC_NOT_NORMAL_BITS)) {
-
-		/* skb_small_head_cache has non power of two size,
-		 * likely forcing SLUB to use order-3 pages.
-		 * We deliberately attempt a NOMEMALLOC allocation only.
-		 */
 		obj = kmem_cache_alloc_node(skb_small_head_cache,
 				flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
 				node);
-		if (obj) {
-			*size = SKB_SMALL_HEAD_CACHE_SIZE;
+		*size = SKB_SMALL_HEAD_CACHE_SIZE;
+		if (obj || !(gfp_pfmemalloc_allowed(flags)))
 			goto out;
-		}
+		/* Try again but now we are using pfmemalloc reserves */
+		ret_pfmemalloc = true;
+		obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
+		goto out;
 	}
 #endif
 	*size = obj_size = kmalloc_size_roundup(obj_size);
@@ -2082,6 +2080,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
 }
 EXPORT_SYMBOL(skb_realloc_headroom);
 
+/* Note: We plan to rework this in linux-6.4 */
 int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
 {
 	unsigned int saved_end_offset, saved_truesize;
@@ -2100,6 +2099,22 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
 	if (likely(skb_end_offset(skb) == saved_end_offset))
 		return 0;
 
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+	/* We can not change skb->end if the original or new value
+	 * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
+	 */
+	if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM ||
+	    skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) {
+		/* We think this path should not be taken.
+		 * Add a temporary trace to warn us just in case.
+		 */
+		pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n",
+			    saved_end_offset, skb_end_offset(skb));
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+#endif
+
 	shinfo = skb_shinfo(skb);
 
 	/* We are about to change back skb->end,
-- 
cgit 


From fb07390463c95e6eef254044d6dde050bfb9807a Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Mon, 27 Feb 2023 12:23:52 -0300
Subject: net/sched: act_connmark: handle errno on tcf_idr_check_alloc

Smatch reports that 'ci' can be used uninitialized.
The current code ignores errno coming from tcf_idr_check_alloc, which
will lead to the incorrect usage of 'ci'. Handle the errno as it should.

Fixes: 288864effe33 ("net/sched: act_connmark: transition to percpu stats and rcu")
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_connmark.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8dabfb52ea3d..0d7aee8933c5 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -158,6 +158,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		nparms->zone = parm->zone;
 
 		ret = 0;
+	} else {
+		err = ret;
+		goto out_free;
 	}
 
 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
-- 
cgit 


From 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 27 Feb 2023 15:30:24 +0000
Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping()

ila_xlat_nl_cmd_get_mapping() generates an empty skb,
triggerring a recent sanity check [1].

Instead, return an error code, so that user space
can get it.

[1]
skb_assert_len
WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline]
WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
Modules linked in:
CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023
pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : skb_assert_len include/linux/skbuff.h:2527 [inline]
pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
lr : skb_assert_len include/linux/skbuff.h:2527 [inline]
lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
sp : ffff80001e0d6c40
x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0
x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00
x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10
x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0
x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000
x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001
x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600
x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001
x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744
x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e
Call trace:
skb_assert_len include/linux/skbuff.h:2527 [inline]
__dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
dev_queue_xmit include/linux/netdevice.h:3033 [inline]
__netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline]
__netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325
netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338
__netlink_sendskb net/netlink/af_netlink.c:1283 [inline]
netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292
netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380
nlmsg_unicast include/net/netlink.h:1099 [inline]
genlmsg_unicast include/net/genetlink.h:433 [inline]
genlmsg_reply include/net/genetlink.h:443 [inline]
ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493
genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline]
genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline]
genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065
netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574
genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942
sock_sendmsg_nosec net/socket.c:714 [inline]
sock_sendmsg net/socket.c:734 [inline]
____sys_sendmsg+0x558/0x844 net/socket.c:2479
___sys_sendmsg net/socket.c:2533 [inline]
__sys_sendmsg+0x26c/0x33c net/socket.c:2562
__do_sys_sendmsg net/socket.c:2571 [inline]
__se_sys_sendmsg net/socket.c:2569 [inline]
__arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569
__invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52
el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142
do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193
el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637
el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655
el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591
irq event stamp: 136484
hardirqs last enabled at (136483): [<ffff800008350244>] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345
hardirqs last disabled at (136484): [<ffff800012358d60>] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405
softirqs last enabled at (136418): [<ffff800008020ea8>] softirq_handle_end kernel/softirq.c:414 [inline]
softirqs last enabled at (136418): [<ffff800008020ea8>] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600
softirqs last disabled at (136371): [<ffff80000802b4a4>] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80
---[ end trace 0000000000000000 ]---
skb len=0 headroom=0 headlen=0 tailroom=192
mac=(0,0) net=(0,-1) trans=-1
shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0))
csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0)
hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0
dev name=nlmon0 feat=0x0000000000005861

Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ila/ila_xlat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 47447f0241df..bee45dfeb187 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
 
 	rcu_read_lock();
 
+	ret = -ESRCH;
 	ila = ila_lookup_by_params(&xp, ilan);
 	if (ila) {
 		ret = ila_dump_info(ila,
-- 
cgit 


From dfd2f0eb2347dbdf391fd5b8255fefc58a745472 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 27 Feb 2023 18:44:36 +0000
Subject: net/sched: flower: fix fl_change() error recovery path

The two "goto errout;" paths in fl_change() became wrong
after cited commit.

Indeed we only must not call __fl_put() until the net pointer
has been set in tcf_exts_init_ex()

This is a minimal fix. We might in the future validate TCA_FLOWER_FLAGS
before we allocate @fnew.

BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:72 [inline]
BUG: KASAN: null-ptr-deref in atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline]
BUG: KASAN: null-ptr-deref in refcount_read include/linux/refcount.h:147 [inline]
BUG: KASAN: null-ptr-deref in __refcount_add_not_zero include/linux/refcount.h:152 [inline]
BUG: KASAN: null-ptr-deref in __refcount_inc_not_zero include/linux/refcount.h:227 [inline]
BUG: KASAN: null-ptr-deref in refcount_inc_not_zero include/linux/refcount.h:245 [inline]
BUG: KASAN: null-ptr-deref in maybe_get_net include/net/net_namespace.h:269 [inline]
BUG: KASAN: null-ptr-deref in tcf_exts_get_net include/net/pkt_cls.h:260 [inline]
BUG: KASAN: null-ptr-deref in __fl_put net/sched/cls_flower.c:513 [inline]
BUG: KASAN: null-ptr-deref in __fl_put+0x13e/0x3b0 net/sched/cls_flower.c:508
Read of size 4 at addr 000000000000014c by task syz-executor548/5082

CPU: 0 PID: 5082 Comm: syz-executor548 Not tainted 6.2.0-syzkaller-05251-g5b7c4cabbb65 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106
print_report mm/kasan/report.c:420 [inline]
kasan_report+0xec/0x130 mm/kasan/report.c:517
check_region_inline mm/kasan/generic.c:183 [inline]
kasan_check_range+0x141/0x190 mm/kasan/generic.c:189
instrument_atomic_read include/linux/instrumented.h:72 [inline]
atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline]
refcount_read include/linux/refcount.h:147 [inline]
__refcount_add_not_zero include/linux/refcount.h:152 [inline]
__refcount_inc_not_zero include/linux/refcount.h:227 [inline]
refcount_inc_not_zero include/linux/refcount.h:245 [inline]
maybe_get_net include/net/net_namespace.h:269 [inline]
tcf_exts_get_net include/net/pkt_cls.h:260 [inline]
__fl_put net/sched/cls_flower.c:513 [inline]
__fl_put+0x13e/0x3b0 net/sched/cls_flower.c:508
fl_change+0x101b/0x4ab0 net/sched/cls_flower.c:2341
tc_new_tfilter+0x97c/0x2290 net/sched/cls_api.c:2310
rtnetlink_rcv_msg+0x996/0xd50 net/core/rtnetlink.c:6165
netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2574
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x925/0xe30 net/netlink/af_netlink.c:1942
sock_sendmsg_nosec net/socket.c:722 [inline]
sock_sendmsg+0xde/0x190 net/socket.c:745
____sys_sendmsg+0x334/0x900 net/socket.c:2504
___sys_sendmsg+0x110/0x1b0 net/socket.c:2558
__sys_sendmmsg+0x18f/0x460 net/socket.c:2644
__do_sys_sendmmsg net/socket.c:2673 [inline]
__se_sys_sendmmsg net/socket.c:2670 [inline]
__x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2670

Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier")
Reported-by: syzbot+baabf3efa7c1e57d28b2@syzkaller.appspotmail.com
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e960a46b0520..475fe222a855 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2200,8 +2200,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
 
 		if (!tc_flags_valid(fnew->flags)) {
+			kfree(fnew);
 			err = -EINVAL;
-			goto errout;
+			goto errout_tb;
 		}
 	}
 
@@ -2226,8 +2227,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		}
 		spin_unlock(&tp->lock);
 
-		if (err)
-			goto errout;
+		if (err) {
+			kfree(fnew);
+			goto errout_tb;
+		}
 	}
 	fnew->handle = handle;
 
@@ -2337,7 +2340,6 @@ errout_mask:
 	fl_mask_put(head, fnew->mask);
 errout_idr:
 	idr_remove(&head->handle_idr, fnew->handle);
-errout:
 	__fl_put(fnew);
 errout_tb:
 	kfree(tb);
-- 
cgit 


From 81563d8548b0478075c720666be348d4199b8591 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 28 Feb 2023 21:47:42 +0100
Subject: net: lan966x: Fix port police support using tc-matchall

When the police was removed from the port, then it was trying to
remove the police from the police id and not from the actual
police index.
The police id represents the id of the police and police index
represents the position in HW where the police is situated.
The port police id can be any number while the port police index
is a number based on the port chip port.
Fix this by deleting the police from HW that is situated at the
police index and not police id.

Fixes: 5390334b59a3 ("net: lan966x: Add port police support using tc-matchall")
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan966x/lan966x_police.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c
index a9aec900d608..7d66fe75cd3b 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c
@@ -194,7 +194,7 @@ int lan966x_police_port_del(struct lan966x_port *port,
 		return -EINVAL;
 	}
 
-	err = lan966x_police_del(port, port->tc.police_id);
+	err = lan966x_police_del(port, POL_IDX_PORT + port->chip_port);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Failed to add policer to port");
-- 
cgit 


From 2067e7a00aa604b94de31d64f29b8893b1696f26 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 27 Feb 2023 17:36:46 +0800
Subject: selftests: nft_nat: ensuring the listening side is up before starting
 the client

The test_local_dnat_portonly() function initiates the client-side as
soon as it sets the listening side to the background. This could lead to
a race condition where the server may not be ready to listen. To ensure
that the server-side is up and running before initiating the
client-side, a delay is introduced to the test_local_dnat_portonly()
function.

Before the fix:
  # ./nft_nat.sh
  PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU
  PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU
  PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush
  PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU
  2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused
  ERROR: inet port rewrite

After the fix:
  # ./nft_nat.sh
  PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ
  PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ
  PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush
  PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ
  PASS: inet port rewrite without l3 address

Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/nft_nat.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 924ecb3f1f73..dd40d9f6f259 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -404,6 +404,8 @@ EOF
 	echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
 	sc_s=$!
 
+	sleep 1
+
 	result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
 
 	if [ "$result" = "SERVER-inet" ];then
-- 
cgit 


From 860e874290fb3be08e966c9c8ffc510c5b0f2bd8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 28 Feb 2023 17:09:03 +0100
Subject: netfilter: nft_last: copy content when cloning expression

If the ruleset contains last timestamps, restore them accordingly.
Otherwise, listing after restoration shows never used items.

Fixes: 33a24de37e81 ("netfilter: nft_last: move stateful fields out of expression data")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_last.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 7f2bda6641bd..8e6d7eaf9dc8 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -105,11 +105,15 @@ static void nft_last_destroy(const struct nft_ctx *ctx,
 static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
 {
 	struct nft_last_priv *priv_dst = nft_expr_priv(dst);
+	struct nft_last_priv *priv_src = nft_expr_priv(src);
 
 	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
 	if (!priv_dst->last)
 		return -ENOMEM;
 
+	priv_dst->last->set = priv_src->last->set;
+	priv_dst->last->jiffies = priv_src->last->jiffies;
+
 	return 0;
 }
 
-- 
cgit 


From aabef97a35160461e9c576848ded737558d89055 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 28 Feb 2023 20:43:02 +0100
Subject: netfilter: nft_quota: copy content when cloning expression

If the ruleset contains consumed quota, restore them accordingly.
Otherwise, listing after restoration shows never used items.

Restore the user-defined quota and flags too.

Fixes: ed0a0c60f0e5 ("netfilter: nft_quota: move stateful fields out of expression data")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_quota.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 123578e28917..3ba12a7471b0 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -236,12 +236,16 @@ static void nft_quota_destroy(const struct nft_ctx *ctx,
 static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
 {
 	struct nft_quota *priv_dst = nft_expr_priv(dst);
+	struct nft_quota *priv_src = nft_expr_priv(src);
+
+	priv_dst->quota = priv_src->quota;
+	priv_dst->flags = priv_src->flags;
 
 	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
 	if (!priv_dst->consumed)
 		return -ENOMEM;
 
-	atomic64_set(priv_dst->consumed, 0);
+	*priv_dst->consumed = *priv_src->consumed;
 
 	return 0;
 }
-- 
cgit 


From 49c47cc21b5b7a3d8deb18fc57b0aa2ab1286962 Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Tue, 28 Feb 2023 10:33:44 +0800
Subject: net: tls: fix possible race condition between
 do_tls_getsockopt_conf() and do_tls_setsockopt_conf()

ctx->crypto_send.info is not protected by lock_sock in
do_tls_getsockopt_conf(). A race condition between do_tls_getsockopt_conf()
and error paths of do_tls_setsockopt_conf() may lead to a use-after-free
or null-deref.

More discussion:  https://lore.kernel.org/all/Y/ht6gQL+u6fj3dG@hog/

Fixes: 3c4d7559159b ("tls: kernel TLS support")
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Link: https://lore.kernel.org/r/20230228023344.9623-1-hbh25y@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_main.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 3735cb00905d..b32c112984dd 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -405,13 +405,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(crypto_info_aes_gcm_128->iv,
 		       cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 		       TLS_CIPHER_AES_GCM_128_IV_SIZE);
 		memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval,
 				 crypto_info_aes_gcm_128,
 				 sizeof(*crypto_info_aes_gcm_128)))
@@ -429,13 +427,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(crypto_info_aes_gcm_256->iv,
 		       cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
 		       TLS_CIPHER_AES_GCM_256_IV_SIZE);
 		memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval,
 				 crypto_info_aes_gcm_256,
 				 sizeof(*crypto_info_aes_gcm_256)))
@@ -451,13 +447,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(aes_ccm_128->iv,
 		       cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE,
 		       TLS_CIPHER_AES_CCM_128_IV_SIZE);
 		memcpy(aes_ccm_128->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128)))
 			rc = -EFAULT;
 		break;
@@ -472,13 +466,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(chacha20_poly1305->iv,
 		       cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE,
 		       TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE);
 		memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval, chacha20_poly1305,
 				sizeof(*chacha20_poly1305)))
 			rc = -EFAULT;
@@ -493,13 +485,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(sm4_gcm_info->iv,
 		       cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE,
 		       TLS_CIPHER_SM4_GCM_IV_SIZE);
 		memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info)))
 			rc = -EFAULT;
 		break;
@@ -513,13 +503,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(sm4_ccm_info->iv,
 		       cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE,
 		       TLS_CIPHER_SM4_CCM_IV_SIZE);
 		memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info)))
 			rc = -EFAULT;
 		break;
@@ -535,13 +523,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(crypto_info_aria_gcm_128->iv,
 		       cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE,
 		       TLS_CIPHER_ARIA_GCM_128_IV_SIZE);
 		memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval,
 				 crypto_info_aria_gcm_128,
 				 sizeof(*crypto_info_aria_gcm_128)))
@@ -559,13 +545,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 			rc = -EINVAL;
 			goto out;
 		}
-		lock_sock(sk);
 		memcpy(crypto_info_aria_gcm_256->iv,
 		       cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE,
 		       TLS_CIPHER_ARIA_GCM_256_IV_SIZE);
 		memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq,
 		       TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE);
-		release_sock(sk);
 		if (copy_to_user(optval,
 				 crypto_info_aria_gcm_256,
 				 sizeof(*crypto_info_aria_gcm_256)))
@@ -614,11 +598,9 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval,
 	if (len < sizeof(value))
 		return -EINVAL;
 
-	lock_sock(sk);
 	value = -EINVAL;
 	if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW)
 		value = ctx->rx_no_pad;
-	release_sock(sk);
 	if (value < 0)
 		return value;
 
@@ -635,6 +617,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
 {
 	int rc = 0;
 
+	lock_sock(sk);
+
 	switch (optname) {
 	case TLS_TX:
 	case TLS_RX:
@@ -651,6 +635,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
 		rc = -ENOPROTOOPT;
 		break;
 	}
+
+	release_sock(sk);
+
 	return rc;
 }
 
-- 
cgit 


From f3221361dc85d4de22586ce8441ec2c67b454f5d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 28 Feb 2023 16:28:57 -0800
Subject: net: tls: avoid hanging tasks on the tx_lock

syzbot sent a hung task report and Eric explains that adversarial
receiver may keep RWIN at 0 for a long time, so we are not guaranteed
to make forward progress. Thread which took tx_lock and went to sleep
may not release tx_lock for hours. Use interruptible sleep where
possible and reschedule the work if it can't take the lock.

Testing: existing selftest passes

Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com
Fixes: 79ffe6087e91 ("net/tls: add a TX lock")
Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/
Cc: stable@vger.kernel.org # wait 4 weeks
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 021d760f9133..635b8bf6b937 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -956,7 +956,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 			       MSG_CMSG_COMPAT))
 		return -EOPNOTSUPP;
 
-	mutex_lock(&tls_ctx->tx_lock);
+	ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
+	if (ret)
+		return ret;
 	lock_sock(sk);
 
 	if (unlikely(msg->msg_controllen)) {
@@ -1290,7 +1292,9 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
 		return -EOPNOTSUPP;
 
-	mutex_lock(&tls_ctx->tx_lock);
+	ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
+	if (ret)
+		return ret;
 	lock_sock(sk);
 	ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
 	release_sock(sk);
@@ -2435,11 +2439,19 @@ static void tx_work_handler(struct work_struct *work)
 
 	if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
 		return;
-	mutex_lock(&tls_ctx->tx_lock);
-	lock_sock(sk);
-	tls_tx_records(sk, -1);
-	release_sock(sk);
-	mutex_unlock(&tls_ctx->tx_lock);
+
+	if (mutex_trylock(&tls_ctx->tx_lock)) {
+		lock_sock(sk);
+		tls_tx_records(sk, -1);
+		release_sock(sk);
+		mutex_unlock(&tls_ctx->tx_lock);
+	} else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+		/* Someone is holding the tx_lock, they will likely run Tx
+		 * and cancel the work on their way out of the lock section.
+		 * Schedule a long delay just in case.
+		 */
+		schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10));
+	}
 }
 
 static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
-- 
cgit 


From 5c1ebbfabcd61142a4551bfc0e51840f9bdae7af Mon Sep 17 00:00:00 2001
From: Brian Vazquez <brianvv@google.com>
Date: Wed, 1 Mar 2023 13:32:47 +0000
Subject: net: use indirect calls helpers for sk_exit_memory_pressure()

Florian reported a regression and sent a patch with the following
changelog:

<quote>
 There is a noticeable tcp performance regression (loopback or cross-netns),
 seen with iperf3 -Z (sendfile mode) when generic retpolines are needed.

 With SK_RECLAIM_THRESHOLD checks gone number of calls to enter/leave
 memory pressure happen much more often. For TCP indirect calls are
 used.

 We can't remove the if-set-return short-circuit check in
 tcp_enter_memory_pressure because there are callers other than
 sk_enter_memory_pressure.  Doing a check in the sk wrapper too
 reduces the indirect calls enough to recover some performance.

 Before,
 0.00-60.00  sec   322 GBytes  46.1 Gbits/sec                  receiver

 After:
 0.00-60.04  sec   359 GBytes  51.4 Gbits/sec                  receiver

 "iperf3 -c $peer -t 60 -Z -f g", connected via veth in another netns.
</quote>

It seems we forgot to upstream this indirect call mitigation we
had for years, lets do this instead.

[edumazet] - It seems we forgot to upstream this indirect call
             mitigation we had for years, let's do this instead.
           - Changed to INDIRECT_CALL_INET_1() to avoid bots reports.

Fixes: 4890b686f408 ("net: keep sk->sk_forward_alloc as small as possible")
Reported-by: Florian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/netdev/20230227152741.4a53634b@kernel.org/T/
Signed-off-by: Brian Vazquez <brianvv@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230301133247.2346111-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/core/sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 341c565dbc26..c25888795390 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2818,7 +2818,8 @@ static void sk_enter_memory_pressure(struct sock *sk)
 static void sk_leave_memory_pressure(struct sock *sk)
 {
 	if (sk->sk_prot->leave_memory_pressure) {
-		sk->sk_prot->leave_memory_pressure(sk);
+		INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
+				     tcp_leave_memory_pressure, sk);
 	} else {
 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
 
-- 
cgit 


From 6c993779ea1d0cccdb3a5d7d45446dd229e610a3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 16 Feb 2023 23:25:04 -0500
Subject: ca8210: fix mac_len negative array access

This patch fixes a buffer overflow access of skb->data if
ieee802154_hdr_peek_addrs() fails.

Reported-by: lianhui tang <bluetlh@gmail.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230217042504.3303396-1-aahringo@redhat.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/ca8210.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index e1a569b99e4a..0b0c6c0764fe 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -1913,6 +1913,8 @@ static int ca8210_skb_tx(
 	 * packet
 	 */
 	mac_len = ieee802154_hdr_peek_addrs(skb, &header);
+	if (mac_len < 0)
+		return mac_len;
 
 	secspec.security_level = header.sec.level;
 	secspec.key_id_mode = header.sec.key_id_mode;
-- 
cgit 


From 02f18662f6c671382345fcb696e808d78f4c194a Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 1 Mar 2023 16:44:50 +0100
Subject: ieee802154: Prevent user from crashing the host

Avoid crashing the machine by checking
info->attrs[NL802154_ATTR_SCAN_TYPE] presence before de-referencing it,
which was the primary intend of the blamed patch.

Reported-by: Sanan Hasanov <sanan.hasanov@Knights.ucf.edu>
Suggested-by: Eric Dumazet <edumazet@google.com>
Fixes: a0b6106672b5 ("ieee802154: Convert scan error messages to extack")
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/r/20230301154450.547716-1-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 net/ieee802154/nl802154.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 2215f576ee37..d8f4379d4fa6 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1412,7 +1412,7 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		return -EOPNOTSUPP;
 	}
 
-	if (!nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE])) {
+	if (!info->attrs[NL802154_ATTR_SCAN_TYPE]) {
 		NL_SET_ERR_MSG(info->extack, "Malformed request, missing scan type");
 		return -EINVAL;
 	}
-- 
cgit 


From e57cf3639c323eeed05d3725fd82f91b349adca8 Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino@microchip.com>
Date: Wed, 1 Mar 2023 08:43:07 -0700
Subject: net: lan78xx: fix accessing the LAN7800's internal phy specific
 registers from the MAC driver

Move the LAN7800 internal phy (phy ID  0x0007c132) specific register
accesses to the phy driver (microchip.c).

Fix the error reported by Enguerrand de Ribaucourt in December 2022,
"Some operations during the cable switch workaround modify the register
LAN88XX_INT_MASK of the PHY. However, this register is specific to the
LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801,
that register (0x19), corresponds to the LED and MAC address
configuration, resulting in unapropriate behavior."

I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800
with the internal PHY.

Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error")
Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++
 drivers/net/usb/lan78xx.c   | 27 +--------------------------
 2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index ccecee2524ce..0b88635f4fbc 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -342,6 +342,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev)
 	return genphy_config_aneg(phydev);
 }
 
+static void lan88xx_link_change_notify(struct phy_device *phydev)
+{
+	int temp;
+
+	/* At forced 100 F/H mode, chip may fail to set mode correctly
+	 * when cable is switched between long(~50+m) and short one.
+	 * As workaround, set to 10 before setting to 100
+	 * at forced 100 F/H mode.
+	 */
+	if (!phydev->autoneg && phydev->speed == 100) {
+		/* disable phy interrupt */
+		temp = phy_read(phydev, LAN88XX_INT_MASK);
+		temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
+		phy_write(phydev, LAN88XX_INT_MASK, temp);
+
+		temp = phy_read(phydev, MII_BMCR);
+		temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
+		phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
+		temp |= BMCR_SPEED100;
+		phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
+
+		/* clear pending interrupt generated while workaround */
+		temp = phy_read(phydev, LAN88XX_INT_STS);
+
+		/* enable phy interrupt back */
+		temp = phy_read(phydev, LAN88XX_INT_MASK);
+		temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
+		phy_write(phydev, LAN88XX_INT_MASK, temp);
+	}
+}
+
 static struct phy_driver microchip_phy_driver[] = {
 {
 	.phy_id		= 0x0007c132,
@@ -359,6 +390,7 @@ static struct phy_driver microchip_phy_driver[] = {
 
 	.config_init	= lan88xx_config_init,
 	.config_aneg	= lan88xx_config_aneg,
+	.link_change_notify = lan88xx_link_change_notify,
 
 	.config_intr	= lan88xx_phy_config_intr,
 	.handle_interrupt = lan88xx_handle_interrupt,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f18ab8e220db..068488890d57 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2115,33 +2115,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev)
 static void lan78xx_link_status_change(struct net_device *net)
 {
 	struct phy_device *phydev = net->phydev;
-	int temp;
-
-	/* At forced 100 F/H mode, chip may fail to set mode correctly
-	 * when cable is switched between long(~50+m) and short one.
-	 * As workaround, set to 10 before setting to 100
-	 * at forced 100 F/H mode.
-	 */
-	if (!phydev->autoneg && (phydev->speed == 100)) {
-		/* disable phy interrupt */
-		temp = phy_read(phydev, LAN88XX_INT_MASK);
-		temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
-		phy_write(phydev, LAN88XX_INT_MASK, temp);
 
-		temp = phy_read(phydev, MII_BMCR);
-		temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
-		phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
-		temp |= BMCR_SPEED100;
-		phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
-
-		/* clear pending interrupt generated while workaround */
-		temp = phy_read(phydev, LAN88XX_INT_STS);
-
-		/* enable phy interrupt back */
-		temp = phy_read(phydev, LAN88XX_INT_MASK);
-		temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
-		phy_write(phydev, LAN88XX_INT_MASK, temp);
-	}
+	phy_print_status(phydev);
 }
 
 static int irq_map(struct irq_domain *d, unsigned int irq,
-- 
cgit 


From 9781e98a97110f5e76999058368b4be76a788484 Mon Sep 17 00:00:00 2001
From: Shigeru Yoshida <syoshida@redhat.com>
Date: Thu, 2 Mar 2023 01:39:13 +0900
Subject: net: caif: Fix use-after-free in cfusbl_device_notify()

syzbot reported use-after-free in cfusbl_device_notify() [1].  This
causes a stack trace like below:

BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138
Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214

CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: netns cleanup_net
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313
 print_report mm/kasan/report.c:429 [inline]
 kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
 cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138
 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87
 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945
 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline]
 call_netdevice_notifiers net/core/dev.c:1997 [inline]
 netdev_wait_allrefs_any net/core/dev.c:10227 [inline]
 netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341
 default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334
 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167
 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594
 process_one_work+0x996/0x1610 kernel/workqueue.c:2289
 worker_thread+0x665/0x1080 kernel/workqueue.c:2436
 kthread+0x2e9/0x3a0 kernel/kthread.c:376
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302
 </TASK>

When unregistering a net device, unregister_netdevice_many_notify()
sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers
with NETDEV_UNREGISTER, and adds the device to the todo list.

Later on, devices in the todo list are processed by netdev_run_todo().
netdev_run_todo() waits devices' reference count become 1 while
rebdoadcasting NETDEV_UNREGISTER notification.

When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple
times, the parent device might be freed.  This could cause UAF.
Processing NETDEV_UNREGISTER multiple times also causes inbalance of
reference count for the module.

This patch fixes the issue by accepting only first NETDEV_UNREGISTER
notification.

Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface")
CC: sjur.brandeland@stericsson.com <sjur.brandeland@stericsson.com>
Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1]
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/caif/caif_usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index ebc202ffdd8d..bf61ea4b8132 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
 	struct usb_device *usbdev;
 	int res;
 
+	if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED)
+		return 0;
+
 	/* Check whether we have a NCM device, and find its VID/PID. */
 	if (!(dev->dev.parent && dev->dev.parent->driver &&
 	      strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0))
-- 
cgit 


From 7cf93538e087a792cb476008a757ab7c1c23b68c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 1 Mar 2023 10:36:40 -0800
Subject: tools: ynl: fully inherit attrs in subsets

To avoid having to repeat the entire definition of an attribute
(including the value) use the Attr object from the original set.
In fact this is already the documented expectation.

Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/userspace-api/netlink/specs.rst |  3 ++-
 tools/net/ynl/lib/nlspec.py                   | 23 +++++++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 6ffe8137cd90..1424ab1b9b33 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -199,7 +199,8 @@ The ``value`` property can be skipped, in which case the attribute ID
 will be the value of the previous attribute plus one (recursively)
 and ``0`` for the first attribute in the attribute set.
 
-Note that the ``value`` of an attribute is defined only in its main set.
+Note that the ``value`` of an attribute is defined only in its main set
+(not in subsets).
 
 enum
 ~~~~
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 71da568e2c28..dff31dad36c5 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -95,15 +95,22 @@ class SpecAttrSet(SpecElement):
         self.attrs = collections.OrderedDict()
         self.attrs_by_val = collections.OrderedDict()
 
-        val = 0
-        for elem in self.yaml['attributes']:
-            if 'value' in elem:
-                val = elem['value']
+        if self.subset_of is None:
+            val = 0
+            for elem in self.yaml['attributes']:
+                if 'value' in elem:
+                    val = elem['value']
 
-            attr = self.new_attr(elem, val)
-            self.attrs[attr.name] = attr
-            self.attrs_by_val[attr.value] = attr
-            val += 1
+                attr = self.new_attr(elem, val)
+                self.attrs[attr.name] = attr
+                self.attrs_by_val[attr.value] = attr
+                val += 1
+        else:
+            real_set = family.attr_sets[self.subset_of]
+            for elem in self.yaml['attributes']:
+                attr = real_set[elem['name']]
+                self.attrs[attr.name] = attr
+                self.attrs_by_val[attr.value] = attr
 
     def new_attr(self, elem, value):
         return SpecAttr(self.family, self, elem, value)
-- 
cgit 


From ad4fafcde5bc1badb8fc2c7f260a5d6b83a038e4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 1 Mar 2023 10:36:41 -0800
Subject: tools: ynl: use 1 as the default for first entry in attrs/ops

Pretty much all families use value: 1 or reserve as unspec
the first entry in attribute set and the first operation.
Make this the default. Update documentation (the doc for
values of operations just refers back to doc for attrs
so updating only attrs).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/userspace-api/netlink/specs.rst | 7 ++++++-
 tools/net/ynl/lib/nlspec.py                   | 6 +++---
 tools/net/ynl/ynl-gen-c.py                    | 7 +++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 1424ab1b9b33..32e53328d113 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -197,7 +197,12 @@ value
 Numerical attribute ID, used in serialized Netlink messages.
 The ``value`` property can be skipped, in which case the attribute ID
 will be the value of the previous attribute plus one (recursively)
-and ``0`` for the first attribute in the attribute set.
+and ``1`` for the first attribute in the attribute set.
+
+Attributes (and operations) use ``1`` as the default value for the first
+entry (unlike enums in definitions which start from ``0``) because
+entry ``0`` is almost always reserved as undefined. Spec can explicitly
+set value to ``0`` if needed.
 
 Note that the ``value`` of an attribute is defined only in its main set
 (not in subsets).
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index dff31dad36c5..9d394e50de23 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -96,7 +96,7 @@ class SpecAttrSet(SpecElement):
         self.attrs_by_val = collections.OrderedDict()
 
         if self.subset_of is None:
-            val = 0
+            val = 1
             for elem in self.yaml['attributes']:
                 if 'value' in elem:
                     val = elem['value']
@@ -252,7 +252,7 @@ class SpecFamily(SpecElement):
         self._resolution_list.append(elem)
 
     def _dictify_ops_unified(self):
-        val = 0
+        val = 1
         for elem in self.yaml['operations']['list']:
             if 'value' in elem:
                 val = elem['value']
@@ -263,7 +263,7 @@ class SpecFamily(SpecElement):
             self.msgs[op.name] = op
 
     def _dictify_ops_directional(self):
-        req_val = rsp_val = 0
+        req_val = rsp_val = 1
         for elem in self.yaml['operations']['list']:
             if 'notify' in elem:
                 if 'value' in elem:
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 274e9c566f61..62f8f2c3c56c 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2044,14 +2044,17 @@ def render_uapi(family, cw):
     max_value = f"({cnt_name} - 1)"
 
     uapi_enum_start(family, cw, family['operations'], 'enum-name')
+    val = 0
     for op in family.msgs.values():
         if separate_ntf and ('notify' in op or 'event' in op):
             continue
 
         suffix = ','
-        if 'value' in op:
-            suffix = f" = {op['value']},"
+        if op.value != val:
+            suffix = f" = {op.value},"
+            val = op.value
         cw.p(op.enum_name + suffix)
+        val += 1
     cw.nl()
     cw.p(cnt_name + ('' if max_by_define else ','))
     if not max_by_define:
-- 
cgit 


From bcec7171eba901cc5f427ebbad9fe17ed4749b8f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 1 Mar 2023 10:36:42 -0800
Subject: netlink: specs: update for codegen enumerating from 1

Now that the codegen rules had been changed we can update
the specs to reflect the new default.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/netlink/specs/ethtool.yaml | 15 ---------------
 Documentation/netlink/specs/fou.yaml     |  2 ++
 Documentation/netlink/specs/netdev.yaml  |  2 --
 3 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 08b776908d15..35c462bce56f 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -11,7 +11,6 @@ attribute-sets:
       -
         name: dev-index
         type: u32
-        value: 1
       -
         name: dev-name
         type: string
@@ -25,7 +24,6 @@ attribute-sets:
       -
         name: index
         type: u32
-        value: 1
       -
         name: name
         type: string
@@ -39,14 +37,12 @@ attribute-sets:
         name: bit
         type: nest
         nested-attributes: bitset-bit
-        value: 1
   -
     name: bitset
     attributes:
       -
         name: nomask
         type: flag
-        value: 1
       -
         name: size
         type: u32
@@ -61,7 +57,6 @@ attribute-sets:
       -
         name: index
         type: u32
-        value: 1
       -
         name: value
         type: string
@@ -71,7 +66,6 @@ attribute-sets:
       -
         name: string
         type: nest
-        value: 1
         multi-attr: true
         nested-attributes: string
   -
@@ -80,7 +74,6 @@ attribute-sets:
       -
         name: id
         type: u32
-        value: 1
       -
         name: count
         type: u32
@@ -96,14 +89,12 @@ attribute-sets:
         name: stringset
         type: nest
         multi-attr: true
-        value: 1
         nested-attributes: stringset
   -
     name: strset
     attributes:
       -
         name: header
-        value: 1
         type: nest
         nested-attributes: header
       -
@@ -119,7 +110,6 @@ attribute-sets:
     attributes:
       -
         name: header
-        value: 1
         type: nest
         nested-attributes: header
       -
@@ -132,7 +122,6 @@ attribute-sets:
     attributes:
       -
         name: header
-        value: 1
         type: nest
         nested-attributes: header
       -
@@ -180,7 +169,6 @@ attribute-sets:
     attributes:
       -
         name: pad
-        value: 1
         type: pad
       -
         name: reassembly-errors
@@ -205,7 +193,6 @@ attribute-sets:
     attributes:
       -
         name: header
-        value: 1
         type: nest
         nested-attributes: header
       -
@@ -251,13 +238,11 @@ operations:
 
       do: &strset-get-op
         request:
-          value: 1
           attributes:
             - header
             - stringsets
             - counts-only
         reply:
-          value: 1
           attributes:
             - header
             - stringsets
diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml
index 266c386eedf3..cca4cf98f03a 100644
--- a/Documentation/netlink/specs/fou.yaml
+++ b/Documentation/netlink/specs/fou.yaml
@@ -26,6 +26,7 @@ attribute-sets:
       -
         name: unspec
         type: unused
+        value: 0
       -
         name: port
         type: u16
@@ -71,6 +72,7 @@ operations:
     -
       name: unspec
       doc: unused
+      value: 0
 
     -
       name: add
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index cffef09729f1..ba9ee13cf729 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -48,7 +48,6 @@ attribute-sets:
         name: ifindex
         doc: netdev ifindex
         type: u32
-        value: 1
         checks:
           min: 1
       -
@@ -66,7 +65,6 @@ operations:
     -
       name: dev-get
       doc: Get / dump information about a netdev.
-      value: 1
       attribute-set: dev
       do:
         request:
-- 
cgit 


From 84cba1840e68430325ac133a11be06bfb2f7acd8 Mon Sep 17 00:00:00 2001
From: Petr Oros <poros@redhat.com>
Date: Wed, 1 Mar 2023 21:47:07 +0100
Subject: ice: copy last block omitted in ice_get_module_eeprom()

ice_get_module_eeprom() is broken since commit e9c9692c8a81 ("ice:
Reimplement module reads used by ethtool") In this refactor,
ice_get_module_eeprom() reads the eeprom in blocks of size 8.
But the condition that should protect the buffer overflow
ignores the last block. The last block always contains zeros.

Bug uncovered by ethtool upstream commit 9538f384b535
("netlink: eeprom: Defer page requests to individual parsers")
After this commit, ethtool reads a block with length = 1;
to read the SFF-8024 identifier value.

unpatched driver:
$ ethtool -m enp65s0f0np0 offset 0x90 length 8
Offset          Values
------          ------
0x0090:         00 00 00 00 00 00 00 00
$ ethtool -m enp65s0f0np0 offset 0x90 length 12
Offset          Values
------          ------
0x0090:         00 00 01 a0 4d 65 6c 6c 00 00 00 00
$

$ ethtool -m enp65s0f0np0
Offset          Values
------          ------
0x0000:         11 06 06 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0010:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0020:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0030:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0040:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0050:         00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0060:         00 00 00 00 00 00 00 00 00 00 00 00 00 01 08 00
0x0070:         00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00

patched driver:
$ ethtool -m enp65s0f0np0 offset 0x90 length 8
Offset          Values
------          ------
0x0090:         00 00 01 a0 4d 65 6c 6c
$ ethtool -m enp65s0f0np0 offset 0x90 length 12
Offset          Values
------          ------
0x0090:         00 00 01 a0 4d 65 6c 6c 61 6e 6f 78
$ ethtool -m enp65s0f0np0
    Identifier                                : 0x11 (QSFP28)
    Extended identifier                       : 0x00
    Extended identifier description           : 1.5W max. Power consumption
    Extended identifier description           : No CDR in TX, No CDR in RX
    Extended identifier description           : High Power Class (> 3.5 W) not enabled
    Connector                                 : 0x23 (No separable connector)
    Transceiver codes                         : 0x88 0x00 0x00 0x00 0x00 0x00 0x00 0x00
    Transceiver type                          : 40G Ethernet: 40G Base-CR4
    Transceiver type                          : 25G Ethernet: 25G Base-CR CA-N
    Encoding                                  : 0x05 (64B/66B)
    BR, Nominal                               : 25500Mbps
    Rate identifier                           : 0x00
    Length (SMF,km)                           : 0km
    Length (OM3 50um)                         : 0m
    Length (OM2 50um)                         : 0m
    Length (OM1 62.5um)                       : 0m
    Length (Copper or Active cable)           : 1m
    Transmitter technology                    : 0xa0 (Copper cable unequalized)
    Attenuation at 2.5GHz                     : 4db
    Attenuation at 5.0GHz                     : 5db
    Attenuation at 7.0GHz                     : 7db
    Attenuation at 12.9GHz                    : 10db
    ........
    ....

Fixes: e9c9692c8a81 ("ice: Reimplement module reads used by ethtool")
Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index b360bd8f1599..f86e814354a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -4331,6 +4331,8 @@ ice_get_module_eeprom(struct net_device *netdev,
 		 * SFP modules only ever use page 0.
 		 */
 		if (page == 0 || !(data[0x2] & 0x4)) {
+			u32 copy_len;
+
 			/* If i2c bus is busy due to slow page change or
 			 * link management access, call can fail. This is normal.
 			 * So we retry this a few times.
@@ -4354,8 +4356,8 @@ ice_get_module_eeprom(struct net_device *netdev,
 			}
 
 			/* Make sure we have enough room for the new block */
-			if ((i + SFF_READ_BLOCK_SIZE) < ee->len)
-				memcpy(data + i, value, SFF_READ_BLOCK_SIZE);
+			copy_len = min_t(u32, SFF_READ_BLOCK_SIZE, ee->len - i);
+			memcpy(data + i, value, copy_len);
 		}
 	}
 	return 0;
-- 
cgit 


From 3e04419cbe2d0bc10ffc20c006c6513ffa4b1ca3 Mon Sep 17 00:00:00 2001
From: Huanhuan Wang <huanhuan.wang@corigine.com>
Date: Thu, 2 Mar 2023 10:58:28 +0100
Subject: nfp: fix incorrectly set csum flag for nfd3 path

The csum flag of IPsec packet are set repeatedly. Therefore, the csum
flag set of IPsec and non-IPsec packet need to be distinguished.

As the ipv6 header does not have a csum field, so l3-csum flag is not
required to be set for ipv6 case.

L4-csum flag include the tcp csum flag and udp csum flag, we shouldn't
set the udp and tcp csum flag at the same time for one packet, should
set l4-csum flag according to the transport layer is tcp or udp.

Fixes: 57f273adbcd4 ("nfp: add framework to support ipsec offloading")
Signed-off-by: Huanhuan Wang <huanhuan.wang@corigine.com>
Reviewed-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfd3/dp.c    |  7 ++++---
 drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c | 25 +++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
index 59fb0583cc08..0cc026b0aefd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
@@ -324,14 +324,15 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev)
 
 	/* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
 	nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
-	nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb);
+	if (ipsec)
+		nfp_nfd3_ipsec_tx(txd, skb);
+	else
+		nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb);
 	if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
 		txd->flags |= NFD3_DESC_TX_VLAN;
 		txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 	}
 
-	if (ipsec)
-		nfp_nfd3_ipsec_tx(txd, skb);
 	/* Gather DMA */
 	if (nr_frags > 0) {
 		__le64 second_half;
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c b/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c
index e90f8c975903..51087693072c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c
@@ -10,9 +10,30 @@
 void nfp_nfd3_ipsec_tx(struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct iphdr *iph = ip_hdr(skb);
+	int l4_proto;
 
 	if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)) {
-		txd->flags |= NFD3_DESC_TX_CSUM | NFD3_DESC_TX_IP4_CSUM |
-			      NFD3_DESC_TX_TCP_CSUM | NFD3_DESC_TX_UDP_CSUM;
+		txd->flags |= NFD3_DESC_TX_CSUM;
+
+		if (iph->version == 4)
+			txd->flags |= NFD3_DESC_TX_IP4_CSUM;
+
+		if (x->props.mode == XFRM_MODE_TRANSPORT)
+			l4_proto = xo->proto;
+		else if (x->props.mode == XFRM_MODE_TUNNEL)
+			l4_proto = xo->inner_ipproto;
+		else
+			return;
+
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			txd->flags |= NFD3_DESC_TX_UDP_CSUM;
+			return;
+		case IPPROTO_TCP:
+			txd->flags |= NFD3_DESC_TX_TCP_CSUM;
+			return;
+		}
 	}
 }
-- 
cgit 


From 8b46168ca79c3aad04bc20605e523127266624d4 Mon Sep 17 00:00:00 2001
From: Huanhuan Wang <huanhuan.wang@corigine.com>
Date: Thu, 2 Mar 2023 10:58:29 +0100
Subject: nfp: fix incorrectly set csum flag for nfdk path

The csum flag of IPsec packet are set repeatedly. Therefore, the csum
flag set of IPsec and non-IPsec packet need to be distinguished.

As the ipv6 header does not have a csum field, so l3-csum flag is not
required to be set for ipv6 case.

Fixes: 436396f26d50 ("nfp: support IPsec offloading for NFP3800")
Signed-off-by: Huanhuan Wang <huanhuan.wang@corigine.com>
Reviewed-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfdk/dp.c    | 6 ++++--
 drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c | 8 ++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index d60c0e991a91..33b6d74adb4b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -387,7 +387,8 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
 	if (!skb_is_gso(skb)) {
 		real_len = skb->len;
 		/* Metadata desc */
-		metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata);
+		if (!ipsec)
+			metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata);
 		txd->raw = cpu_to_le64(metadata);
 		txd++;
 	} else {
@@ -395,7 +396,8 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
 		(txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb);
 		real_len = txbuf->real_len;
 		/* Metadata desc */
-		metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata);
+		if (!ipsec)
+			metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata);
 		txd->raw = cpu_to_le64(metadata);
 		txd += 2;
 		txbuf++;
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c b/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c
index 58d8f59eb885..cec199f4c852 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c
@@ -9,9 +9,13 @@
 u64 nfp_nfdk_ipsec_tx(u64 flags, struct sk_buff *skb)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
+	struct iphdr *iph = ip_hdr(skb);
 
-	if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM))
-		flags |= NFDK_DESC_TX_L3_CSUM | NFDK_DESC_TX_L4_CSUM;
+	if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)) {
+		if (iph->version == 4)
+			flags |= NFDK_DESC_TX_L3_CSUM;
+		flags |= NFDK_DESC_TX_L4_CSUM;
+	}
 
 	return flags;
 }
-- 
cgit 


From 1cf78d4c4144ffdbc2c9d505db482cb204bb480b Mon Sep 17 00:00:00 2001
From: Huanhuan Wang <huanhuan.wang@corigine.com>
Date: Thu, 2 Mar 2023 10:58:30 +0100
Subject: nfp: fix esp-tx-csum-offload doesn't take effect

When esp-tx-csum-offload is set to on, the protocol stack shouldn't
calculate the IPsec offload packet's csum, but it does. Because the
callback `.ndo_features_check` incorrectly masked NETIF_F_CSUM_MASK bit.

Fixes: 57f273adbcd4 ("nfp: add framework to support ipsec offloading")
Signed-off-by: Huanhuan Wang <huanhuan.wang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 81b7ca0ad222..62f0bf91d1e1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -38,6 +38,7 @@
 #include <net/tls.h>
 #include <net/vxlan.h>
 #include <net/xdp_sock_drv.h>
+#include <net/xfrm.h>
 
 #include "nfpcore/nfp_dev.h"
 #include "nfpcore/nfp_nsp.h"
@@ -1897,6 +1898,9 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
 			features &= ~NETIF_F_GSO_MASK;
 	}
 
+	if (xfrm_offload(skb))
+		return features;
+
 	/* VXLAN/GRE check */
 	switch (vlan_get_protocol(skb)) {
 	case htons(ETH_P_IP):
-- 
cgit 


From d900f3d20cc3169ce42ec72acc850e662a4d4db2 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Fri, 3 Mar 2023 16:09:46 +0800
Subject: bpf, sockmap: Fix an infinite loop error when len is 0 in
 tcp_bpf_recvmsg_parser()

When the buffer length of the recvmsg system call is 0, we got the
flollowing soft lockup problem:

watchdog: BUG: soft lockup - CPU#3 stuck for 27s! [a.out:6149]
CPU: 3 PID: 6149 Comm: a.out Kdump: loaded Not tainted 6.2.0+ #30
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
RIP: 0010:remove_wait_queue+0xb/0xc0
Code: 5e 41 5f c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 41 57 <41> 56 41 55 41 54 55 48 89 fd 53 48 89 f3 4c 8d 6b 18 4c 8d 73 20
RSP: 0018:ffff88811b5978b8 EFLAGS: 00000246
RAX: 0000000000000000 RBX: ffff88811a7d3780 RCX: ffffffffb7a4d768
RDX: dffffc0000000000 RSI: ffff88811b597908 RDI: ffff888115408040
RBP: 1ffff110236b2f1b R08: 0000000000000000 R09: ffff88811a7d37e7
R10: ffffed10234fa6fc R11: 0000000000000001 R12: ffff88811179b800
R13: 0000000000000001 R14: ffff88811a7d38a8 R15: ffff88811a7d37e0
FS:  00007f6fb5398740(0000) GS:ffff888237180000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000000 CR3: 000000010b6ba002 CR4: 0000000000370ee0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 tcp_msg_wait_data+0x279/0x2f0
 tcp_bpf_recvmsg_parser+0x3c6/0x490
 inet_recvmsg+0x280/0x290
 sock_recvmsg+0xfc/0x120
 ____sys_recvmsg+0x160/0x3d0
 ___sys_recvmsg+0xf0/0x180
 __sys_recvmsg+0xea/0x1a0
 do_syscall_64+0x3f/0x90
 entry_SYSCALL_64_after_hwframe+0x72/0xdc

The logic in tcp_bpf_recvmsg_parser is as follows:

msg_bytes_ready:
	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
	if (!copied) {
		wait data;
		goto msg_bytes_ready;
	}

In this case, "copied" always is 0, the infinite loop occurs.

According to the Linux system call man page, 0 should be returned in this
case. Therefore, in tcp_bpf_recvmsg_parser(), if the length is 0, directly
return. Also modify several other functions with the same problem.

Fixes: 1f5be6b3b063 ("udp: Implement udp_bpf_recvmsg() for sockmap")
Fixes: 9825d866ce0d ("af_unix: Implement unix_dgram_bpf_recvmsg()")
Fixes: c5d2177a72a1 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self")
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20230303080946.1146638-1-liujian56@huawei.com
---
 net/ipv4/tcp_bpf.c  | 6 ++++++
 net/ipv4/udp_bpf.c  | 3 +++
 net/unix/unix_bpf.c | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index cf26d65ca389..ebf917511937 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -186,6 +186,9 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
+	if (!len)
+		return 0;
+
 	psock = sk_psock_get(sk);
 	if (unlikely(!psock))
 		return tcp_recvmsg(sk, msg, len, flags, addr_len);
@@ -244,6 +247,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
+	if (!len)
+		return 0;
+
 	psock = sk_psock_get(sk);
 	if (unlikely(!psock))
 		return tcp_recvmsg(sk, msg, len, flags, addr_len);
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index e5dc91d0e079..0735d820e413 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -68,6 +68,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
+	if (!len)
+		return 0;
+
 	psock = sk_psock_get(sk);
 	if (unlikely(!psock))
 		return sk_udp_recvmsg(sk, msg, len, flags, addr_len);
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index e9bf15513961..2f9d8271c6ec 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -54,6 +54,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
 	struct sk_psock *psock;
 	int copied;
 
+	if (!len)
+		return 0;
+
 	psock = sk_psock_get(sk);
 	if (unlikely(!psock))
 		return __unix_recvmsg(sk, msg, len, flags);
-- 
cgit 


From a9334b702a03b693f54ebd3b98f67bf722b74870 Mon Sep 17 00:00:00 2001
From: Rongguang Wei <weirongguang@kylinos.cn>
Date: Thu, 2 Mar 2023 14:21:43 +0800
Subject: net: stmmac: add to set device wake up flag when stmmac init phy

When MAC is not support PMT, driver will check PHY's WoL capability
and set device wakeup capability in stmmac_init_phy(). We can enable
the WoL through ethtool, the driver would enable the device wake up
flag. Now the device_may_wakeup() return true.

But if there is a way which enable the PHY's WoL capability derectly,
like in BIOS. The driver would not know the enable thing and would not
set the device wake up flag. The phy_suspend may failed like this:

[   32.409063] PM: dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x50 returns -16
[   32.409065] PM: Device stmmac-1:00 failed to suspend: error -16
[   32.409067] PM: Some devices failed to suspend, or early wake event detected

Add to set the device wakeup enable flag according to the get_wol
function result in PHY can fix the error in this scene.

v2: add a Fixes tag.

Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy")
Signed-off-by: Rongguang Wei <weirongguang@kylinos.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e4902a7bb61e..8f543c3ab5c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1170,6 +1170,7 @@ static int stmmac_init_phy(struct net_device *dev)
 
 		phylink_ethtool_get_wol(priv->phylink, &wol);
 		device_set_wakeup_capable(priv->device, !!wol.supported);
+		device_set_wakeup_enable(priv->device, !!wol.wolopts);
 	}
 
 	return ret;
-- 
cgit 


From f4b47a2e9463950df3e7c8b70e017877c1d4eb11 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 3 Mar 2023 16:37:54 +0000
Subject: net: phylib: get rid of unnecessary locking

The locking in phy_probe() and phy_remove() does very little to prevent
any races with e.g. phy_attach_direct(), but instead causes lockdep ABBA
warnings. Remove it.

======================================================
WARNING: possible circular locking dependency detected
6.2.0-dirty #1108 Tainted: G        W   E
------------------------------------------------------
ip/415 is trying to acquire lock:
ffff5c268f81ef50 (&dev->lock){+.+.}-{3:3}, at: phy_attach_direct+0x17c/0x3a0 [libphy]

but task is already holding lock:
ffffaef6496cb518 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x154/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (rtnl_mutex){+.+.}-{3:3}:
       __lock_acquire+0x35c/0x6c0
       lock_acquire.part.0+0xcc/0x220
       lock_acquire+0x68/0x84
       __mutex_lock+0x8c/0x414
       mutex_lock_nested+0x34/0x40
       rtnl_lock+0x24/0x30
       sfp_bus_add_upstream+0x34/0x150
       phy_sfp_probe+0x4c/0x94 [libphy]
       mv3310_probe+0x148/0x184 [marvell10g]
       phy_probe+0x8c/0x200 [libphy]
       call_driver_probe+0xbc/0x15c
       really_probe+0xc0/0x320
       __driver_probe_device+0x84/0x120
       driver_probe_device+0x44/0x120
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       device_add+0x360/0x53c
       phy_device_register+0x60/0xa4 [libphy]
       fwnode_mdiobus_phy_device_register+0xc0/0x190 [fwnode_mdio]
       fwnode_mdiobus_register_phy+0x160/0xd80 [fwnode_mdio]
       of_mdiobus_register+0x140/0x340 [of_mdio]
       orion_mdio_probe+0x298/0x3c0 [mvmdio]
       platform_probe+0x70/0xe0
       call_driver_probe+0x34/0x15c
       really_probe+0xc0/0x320
       __driver_probe_device+0x84/0x120
       driver_probe_device+0x44/0x120
       __driver_attach+0x104/0x210
       bus_for_each_dev+0x78/0xdc
       driver_attach+0x2c/0x3c
       bus_add_driver+0x184/0x240
       driver_register+0x80/0x13c
       __platform_driver_register+0x30/0x3c
       xt_compat_calc_jump+0x28/0xa4 [x_tables]
       do_one_initcall+0x50/0x1b0
       do_init_module+0x50/0x1fc
       load_module+0x684/0x744
       __do_sys_finit_module+0xc4/0x140
       __arm64_sys_finit_module+0x28/0x34
       invoke_syscall+0x50/0x120
       el0_svc_common.constprop.0+0x6c/0x1b0
       do_el0_svc+0x34/0x44
       el0_svc+0x48/0xf0
       el0t_64_sync_handler+0xb8/0xc0
       el0t_64_sync+0x1a0/0x1a4

-> #0 (&dev->lock){+.+.}-{3:3}:
       check_prev_add+0xb4/0xc80
       validate_chain+0x414/0x47c
       __lock_acquire+0x35c/0x6c0
       lock_acquire.part.0+0xcc/0x220
       lock_acquire+0x68/0x84
       __mutex_lock+0x8c/0x414
       mutex_lock_nested+0x34/0x40
       phy_attach_direct+0x17c/0x3a0 [libphy]
       phylink_fwnode_phy_connect.part.0+0x70/0xe4 [phylink]
       phylink_fwnode_phy_connect+0x48/0x60 [phylink]
       mvpp2_open+0xec/0x2e0 [mvpp2]
       __dev_open+0x104/0x214
       __dev_change_flags+0x1d4/0x254
       dev_change_flags+0x2c/0x7c
       do_setlink+0x254/0xa50
       __rtnl_newlink+0x430/0x514
       rtnl_newlink+0x58/0x8c
       rtnetlink_rcv_msg+0x17c/0x560
       netlink_rcv_skb+0x64/0x150
       rtnetlink_rcv+0x20/0x30
       netlink_unicast+0x1d4/0x2b4
       netlink_sendmsg+0x1a4/0x400
       ____sys_sendmsg+0x228/0x290
       ___sys_sendmsg+0x88/0xec
       __sys_sendmsg+0x70/0xd0
       __arm64_sys_sendmsg+0x2c/0x40
       invoke_syscall+0x50/0x120
       el0_svc_common.constprop.0+0x6c/0x1b0
       do_el0_svc+0x34/0x44
       el0_svc+0x48/0xf0
       el0t_64_sync_handler+0xb8/0xc0
       el0t_64_sync+0x1a0/0x1a4

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(rtnl_mutex);
                               lock(&dev->lock);
                               lock(rtnl_mutex);
  lock(&dev->lock);

 *** DEADLOCK ***

Fixes: 298e54fa810e ("net: phy: add core phylib sfp support")
Reported-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9e9fd8ff00f6..1785f1cead97 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3098,8 +3098,6 @@ static int phy_probe(struct device *dev)
 	if (phydrv->flags & PHY_IS_INTERNAL)
 		phydev->is_internal = true;
 
-	mutex_lock(&phydev->lock);
-
 	/* Deassert the reset signal */
 	phy_device_reset(phydev, 0);
 
@@ -3188,12 +3186,10 @@ static int phy_probe(struct device *dev)
 	phydev->state = PHY_READY;
 
 out:
-	/* Assert the reset signal */
+	/* Re-assert the reset signal on error */
 	if (err)
 		phy_device_reset(phydev, 1);
 
-	mutex_unlock(&phydev->lock);
-
 	return err;
 }
 
@@ -3203,9 +3199,7 @@ static int phy_remove(struct device *dev)
 
 	cancel_delayed_work_sync(&phydev->state_queue);
 
-	mutex_lock(&phydev->lock);
 	phydev->state = PHY_DOWN;
-	mutex_unlock(&phydev->lock);
 
 	sfp_bus_del_upstream(phydev->sfp_bus);
 	phydev->sfp_bus = NULL;
-- 
cgit 


From accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 3 Mar 2023 18:43:57 -0800
Subject: bnxt_en: Avoid order-5 memory allocation for TPA data

The driver needs to keep track of all the possible concurrent TPA (GRO/LRO)
completions on the aggregation ring.  On P5 chips, the maximum number
of concurrent TPA is 256 and the amount of memory we allocate is order-5
on systems using 4K pages.  Memory allocation failure has been reported:

NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1
CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1
Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022
Call Trace:
 dump_stack+0x57/0x6e
 warn_alloc.cold.120+0x7b/0xdd
 ? _cond_resched+0x15/0x30
 ? __alloc_pages_direct_compact+0x15f/0x170
 __alloc_pages_slowpath.constprop.108+0xc58/0xc70
 __alloc_pages_nodemask+0x2d0/0x300
 kmalloc_order+0x24/0xe0
 kmalloc_order_trace+0x19/0x80
 bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en]
 ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en]
 __bnxt_open_nic+0x12e/0x780 [bnxt_en]
 bnxt_open+0x10b/0x240 [bnxt_en]
 __dev_open+0xe9/0x180
 __dev_change_flags+0x1af/0x220
 dev_change_flags+0x21/0x60
 do_setlink+0x35c/0x1100

Instead of allocating this big chunk of memory and dividing it up for the
concurrent TPA instances, allocate each small chunk separately for each
TPA instance.  This will reduce it to order-0 allocations.

Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.")
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Damodharam Ammepalli <damodharam.ammepalli@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5d4b1f2ebeac..f96d539b469c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3145,7 +3145,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
 
 static void bnxt_free_tpa_info(struct bnxt *bp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
@@ -3153,8 +3153,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
 		kfree(rxr->rx_tpa_idx_map);
 		rxr->rx_tpa_idx_map = NULL;
 		if (rxr->rx_tpa) {
-			kfree(rxr->rx_tpa[0].agg_arr);
-			rxr->rx_tpa[0].agg_arr = NULL;
+			for (j = 0; j < bp->max_tpa; j++) {
+				kfree(rxr->rx_tpa[j].agg_arr);
+				rxr->rx_tpa[j].agg_arr = NULL;
+			}
 		}
 		kfree(rxr->rx_tpa);
 		rxr->rx_tpa = NULL;
@@ -3163,14 +3165,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
 
 static int bnxt_alloc_tpa_info(struct bnxt *bp)
 {
-	int i, j, total_aggs = 0;
+	int i, j;
 
 	bp->max_tpa = MAX_TPA;
 	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 		if (!bp->max_tpa_v2)
 			return 0;
 		bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
-		total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
 	}
 
 	for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -3184,12 +3185,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
 
 		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
 			continue;
-		agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
-		rxr->rx_tpa[0].agg_arr = agg;
-		if (!agg)
-			return -ENOMEM;
-		for (j = 1; j < bp->max_tpa; j++)
-			rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
+		for (j = 0; j < bp->max_tpa; j++) {
+			agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
+			if (!agg)
+				return -ENOMEM;
+			rxr->rx_tpa[j].agg_arr = agg;
+		}
 		rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
 					      GFP_KERNEL);
 		if (!rxr->rx_tpa_idx_map)
-- 
cgit 


From 89b59a84cb166f1ab5b6de9830e61324937c661e Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Fri, 3 Mar 2023 18:43:58 -0800
Subject: bnxt_en: Fix the double free during device removal

Following warning reported by KASAN during driver unload

==================================================================
BUG: KASAN: double-free in bnxt_remove_one+0x103/0x200 [bnxt_en]
Free of addr ffff88814e8dd4c0 by task rmmod/17469
CPU: 47 PID: 17469 Comm: rmmod Kdump: loaded Tainted: G S                 6.2.0-rc7+ #2
Hardware name: Dell Inc. PowerEdge R740/01YM03, BIOS 2.3.10 08/15/2019
Call Trace:
 <TASK>
 dump_stack_lvl+0x33/0x46
 print_report+0x17b/0x4b3
 ? __call_rcu_common.constprop.79+0x27e/0x8c0
 ? __pfx_free_object_rcu+0x10/0x10
 ? __virt_addr_valid+0xe3/0x160
 ? bnxt_remove_one+0x103/0x200 [bnxt_en]
 kasan_report_invalid_free+0x64/0xd0
 ? bnxt_remove_one+0x103/0x200 [bnxt_en]
 ? bnxt_remove_one+0x103/0x200 [bnxt_en]
 __kasan_slab_free+0x179/0x1c0
 ? bnxt_remove_one+0x103/0x200 [bnxt_en]
 __kmem_cache_free+0x194/0x350
 bnxt_remove_one+0x103/0x200 [bnxt_en]
 pci_device_remove+0x62/0x110
 device_release_driver_internal+0xf6/0x1c0
 driver_detach+0x76/0xe0
 bus_remove_driver+0x89/0x160
 pci_unregister_driver+0x26/0x110
 ? strncpy_from_user+0x188/0x1c0
 bnxt_exit+0xc/0x24 [bnxt_en]
 __x64_sys_delete_module+0x21f/0x390
 ? __pfx___x64_sys_delete_module+0x10/0x10
 ? __pfx_mem_cgroup_handle_over_high+0x10/0x10
 ? _raw_spin_lock+0x87/0xe0
 ? __pfx__raw_spin_lock+0x10/0x10
 ? __audit_syscall_entry+0x185/0x210
 ? ktime_get_coarse_real_ts64+0x51/0x80
 ? syscall_trace_enter.isra.18+0x126/0x1a0
 do_syscall_64+0x37/0x90
 entry_SYSCALL_64_after_hwframe+0x72/0xdc
RIP: 0033:0x7effcb6fd71b
Code: 73 01 c3 48 8b 0d 6d 17 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 3d 17 2c 00 f7 d8 64 89 01 48
RSP: 002b:00007ffeada270b8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
RAX: ffffffffffffffda RBX: 00005623660e0750 RCX: 00007effcb6fd71b
RDX: 000000000000000a RSI: 0000000000000800 RDI: 00005623660e07b8
RBP: 0000000000000000 R08: 00007ffeada26031 R09: 0000000000000000
R10: 00007effcb771280 R11: 0000000000000206 R12: 00007ffeada272e0
R13: 00007ffeada28bc4 R14: 00005623660e02a0 R15: 00005623660e0750
 </TASK>

Auxiliary device structures are freed in bnxt_aux_dev_release. So avoid
calling kfree from bnxt_remove_one.

Also, set bp->edev to NULL before freeing the auxilary private structure.

Fixes: d80d88b0dfff ("bnxt_en: Add auxiliary driver support")
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 2 --
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f96d539b469c..808236dc898b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -13205,8 +13205,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	bnxt_free_hwrm_resources(bp);
 	bnxt_ethtool_free(bp);
 	bnxt_dcb_free(bp);
-	kfree(bp->edev);
-	bp->edev = NULL;
 	kfree(bp->ptp_cfg);
 	bp->ptp_cfg = NULL;
 	kfree(bp->fw_health);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index d4cc9c371e7b..e7b5e28ee29f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -317,9 +317,11 @@ static void bnxt_aux_dev_release(struct device *dev)
 {
 	struct bnxt_aux_priv *aux_priv =
 		container_of(dev, struct bnxt_aux_priv, aux_dev.dev);
+	struct bnxt *bp = netdev_priv(aux_priv->edev->net);
 
 	ida_free(&bnxt_aux_dev_ids, aux_priv->id);
 	kfree(aux_priv->edev->ulp_tbl);
+	bp->edev = NULL;
 	kfree(aux_priv->edev);
 	kfree(aux_priv);
 }
-- 
cgit 


From 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e Mon Sep 17 00:00:00 2001
From: Ivan Delalande <colona@arista.com>
Date: Thu, 2 Mar 2023 17:48:31 -0800
Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type

It seems that change was unintentional, we have userspace code that
needs the mark while listening for events like REPLY, DESTROY, etc.
Also include 0-marks in requested dumps, as they were before that fix.

Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark")
Signed-off-by: Ivan Delalande <colona@arista.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c11dff91d52d..bfc3aaa2c872 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -328,11 +328,12 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct,
+			       bool dump)
 {
 	u32 mark = READ_ONCE(ct->mark);
 
-	if (!mark)
+	if (!mark && !dump)
 		return 0;
 
 	if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
@@ -343,7 +344,7 @@ nla_put_failure:
 	return -1;
 }
 #else
-#define ctnetlink_dump_mark(a, b) (0)
+#define ctnetlink_dump_mark(a, b, c) (0)
 #endif
 
 #ifdef CONFIG_NF_CONNTRACK_SECMARK
@@ -548,7 +549,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb,
 static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
 {
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
-	    ctnetlink_dump_mark(skb, ct) < 0 ||
+	    ctnetlink_dump_mark(skb, ct, true) < 0 ||
 	    ctnetlink_dump_secctx(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
@@ -831,8 +832,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if (events & (1 << IPCT_MARK) &&
-	    ctnetlink_dump_mark(skb, ct) < 0)
+	if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
 		goto nla_put_failure;
 #endif
 	nlmsg_end(skb, nlh);
@@ -2735,7 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 		goto nla_put_failure;
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if (ctnetlink_dump_mark(skb, ct) < 0)
+	if (ctnetlink_dump_mark(skb, ct, true) < 0)
 		goto nla_put_failure;
 #endif
 	if (ctnetlink_dump_labels(skb, ct) < 0)
-- 
cgit 


From 4a02426787bf024dafdb79b362285ee325de3f5e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 3 Mar 2023 10:58:56 +0100
Subject: netfilter: tproxy: fix deadlock due to missing BH disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xtables packet traverser performs an unconditional local_bh_disable(),
but the nf_tables evaluation loop does not.

Functions that are called from either xtables or nftables must assume
that they can be called in process context.

inet_twsk_deschedule_put() assumes that no softirq interrupt can occur.
If tproxy is used from nf_tables its possible that we'll deadlock
trying to aquire a lock already held in process context.

Add a small helper that takes care of this and use it.

Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/
Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
Reported-and-tested-by: Major Dávid <major.david@balasys.hu>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h   | 7 +++++++
 net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +-
 net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 82d0e41b76f2..faa108b1ba67 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
 	return false;
 }
 
+static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw)
+{
+	local_bh_disable();
+	inet_twsk_deschedule_put(tw);
+	local_bh_enable();
+}
+
 /* assign a socket to the skb -- consumes sk */
 static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
 {
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index b22b2c745c76..69e331799604 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
 					    hp->source, lport ? lport : hp->dest,
 					    skb->dev, NF_TPROXY_LOOKUP_LISTENER);
 		if (sk2) {
-			inet_twsk_deschedule_put(inet_twsk(sk));
+			nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
 		}
 	}
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 929502e51203..52f828bb5a83 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 					    lport ? lport : hp->dest,
 					    skb->dev, NF_TPROXY_LOOKUP_LISTENER);
 		if (sk2) {
-			inet_twsk_deschedule_put(inet_twsk(sk));
+			nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
 		}
 	}
-- 
cgit 


From 32db18d606c9a6eac7ac8bb12b4bf0e2eb1d5d14 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Tue, 28 Feb 2023 14:45:22 +0700
Subject: bpf, doc: Do not link to docs.kernel.org for kselftest link

The question on how to run BPF selftests have a reference link to kernel
selftest documentation (Documentation/dev-tools/kselftest.rst). However,
it uses external link to the documentation at kernel.org/docs (aka
docs.kernel.org) instead, which requires Internet access.

Fix this and replace the link with internal linking, by using :doc: directive
while keeping the anchor text.

Fixes: b7a27c3aafa252 ("bpf, doc: howto use/run the BPF selftests")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230228074523.11493-2-bagasdotme@gmail.com
---
 Documentation/bpf/bpf_devel_QA.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 715f7321020f..eb087c599ffa 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -461,8 +461,8 @@ needed::
 
   $ sudo make run_tests
 
-See the kernels selftest `Documentation/dev-tools/kselftest.rst`_
-document for further documentation.
+See :doc:`kernel selftest documentation </dev-tools/kselftest>`
+for details.
 
 To maximize the number of tests passing, the .config of the kernel
 under test should match the config file fragment in
@@ -688,7 +688,5 @@ when:
 .. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
 .. _selftests:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
-.. _Documentation/dev-tools/kselftest.rst:
-   https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
 
 Happy BPF hacking!
-- 
cgit 


From b7abcd9c656b982a99e18f795bb1cf81f84b656d Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Tue, 28 Feb 2023 14:45:23 +0700
Subject: bpf, doc: Link to submitting-patches.rst for general patch submission
 info

The link for patch submission information in general refers to index
page for "Working with the kernel development community" section of
kernel docs, whereas the link should have been
Documentation/process/submitting-patches.rst instead.

Fix it by replacing the index target with the appropriate doc.

Fixes: 542228384888f5 ("bpf, doc: convert bpf_devel_QA.rst to use RST formatting")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230228074523.11493-3-bagasdotme@gmail.com
---
 Documentation/bpf/bpf_devel_QA.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index eb087c599ffa..b421d94dc9f2 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -7,8 +7,8 @@ workflows related to reporting bugs, submitting patches, and queueing
 patches for stable kernels.
 
 For general information about submitting patches, please refer to
-`Documentation/process/`_. This document only describes additional specifics
-related to BPF.
+Documentation/process/submitting-patches.rst. This document only describes
+additional specifics related to BPF.
 
 .. contents::
     :local:
@@ -684,7 +684,6 @@ when:
 
 
 .. Links
-.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
 .. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
 .. _selftests:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
-- 
cgit 


From 294635a8165a31408a8b3a24f9c74849ca3d8701 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Fri, 24 Feb 2023 17:36:07 +0100
Subject: bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

&xdp_buff and &xdp_frame are bound in a way that

xdp_buff->data_hard_start == xdp_frame

It's always the case and e.g. xdp_convert_buff_to_frame() relies on
this.
IOW, the following:

	for (u32 i = 0; i < 0xdead; i++) {
		xdpf = xdp_convert_buff_to_frame(&xdp);
		xdp_convert_frame_to_buff(xdpf, &xdp);
	}

shouldn't ever modify @xdpf's contents or the pointer itself.
However, "live packet" code wrongly treats &xdp_frame as part of its
context placed *before* the data_hard_start. With such flow,
data_hard_start is sizeof(*xdpf) off to the right and no longer points
to the XDP frame.

Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several
places and praying that there are no more miscalcs left somewhere in the
code, unionize ::frm with ::data in a flex array, so that both starts
pointing to the actual data_hard_start and the XDP frame actually starts
being a part of it, i.e. a part of the headroom, not the context.
A nice side effect is that the maximum frame size for this mode gets
increased by 40 bytes, as xdp_buff::frame_sz includes everything from
data_hard_start (-> includes xdpf already) to the end of XDP/skb shared
info.
Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it
hardcoded for 64 bit && 4k pages, it can be made more flexible later on.

Minor: align `&head->data` with how `head->frm` is assigned for
consistency.
Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for
clarity.

(was found while testing XDP traffic generator on ice, which calls
 xdp_convert_frame_to_buff() for each XDP frame)

Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN")
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20230224163607.2994755-1-aleksander.lobakin@intel.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 net/bpf/test_run.c                                    | 19 +++++++++++++------
 .../selftests/bpf/prog_tests/xdp_do_redirect.c        |  7 ++++---
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6f3d654b3339..f81b24320a36 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -97,8 +97,11 @@ reset:
 struct xdp_page_head {
 	struct xdp_buff orig_ctx;
 	struct xdp_buff ctx;
-	struct xdp_frame frm;
-	u8 data[];
+	union {
+		/* ::data_hard_start starts here */
+		DECLARE_FLEX_ARRAY(struct xdp_frame, frame);
+		DECLARE_FLEX_ARRAY(u8, data);
+	};
 };
 
 struct xdp_test_data {
@@ -113,6 +116,10 @@ struct xdp_test_data {
 	u32 frame_cnt;
 };
 
+/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE
+ * must be updated accordingly this gets changed, otherwise BPF selftests
+ * will fail.
+ */
 #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
 #define TEST_XDP_MAX_BATCH 256
 
@@ -132,8 +139,8 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
 	headroom -= meta_len;
 
 	new_ctx = &head->ctx;
-	frm = &head->frm;
-	data = &head->data;
+	frm = head->frame;
+	data = head->data;
 	memcpy(data + headroom, orig_ctx->data_meta, frm_len);
 
 	xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
@@ -223,7 +230,7 @@ static void reset_ctx(struct xdp_page_head *head)
 	head->ctx.data = head->orig_ctx.data;
 	head->ctx.data_meta = head->orig_ctx.data_meta;
 	head->ctx.data_end = head->orig_ctx.data_end;
-	xdp_update_frame_from_buff(&head->ctx, &head->frm);
+	xdp_update_frame_from_buff(&head->ctx, head->frame);
 }
 
 static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
@@ -285,7 +292,7 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
 		head = phys_to_virt(page_to_phys(page));
 		reset_ctx(head);
 		ctx = &head->ctx;
-		frm = &head->frm;
+		frm = head->frame;
 		xdp->frame_cnt++;
 
 		act = bpf_prog_run_xdp(prog, ctx);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 2666c84dbd01..7271a18ab3e2 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -65,12 +65,13 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
 }
 
 /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
- * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes
+ * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
+ * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
  */
 #if defined(__s390x__)
-#define MAX_PKT_SIZE 3176
+#define MAX_PKT_SIZE 3216
 #else
-#define MAX_PKT_SIZE 3368
+#define MAX_PKT_SIZE 3408
 #endif
 static void test_max_pkt_size(int fd)
 {
-- 
cgit 


From 9b459804ff9973e173fabafba2a1319f771e85fa Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lorenz.bauer@isovalent.com>
Date: Mon, 6 Mar 2023 11:21:37 +0000
Subject: btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR

btf_datasec_resolve contains a bug that causes the following BTF
to fail loading:

    [1] DATASEC a size=2 vlen=2
        type_id=4 offset=0 size=1
        type_id=7 offset=1 size=1
    [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
    [3] PTR (anon) type_id=2
    [4] VAR a type_id=3 linkage=0
    [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
    [6] TYPEDEF td type_id=5
    [7] VAR b type_id=6 linkage=0

This error message is printed during btf_check_all_types:

    [1] DATASEC a size=2 vlen=2
        type_id=7 offset=1 size=1 Invalid type

By tracing btf_*_resolve we can pinpoint the problem:

    btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0
        btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0
            btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0
        btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0
    btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22

The last invocation of btf_datasec_resolve should invoke btf_var_resolve
by means of env_stack_push, instead it returns EINVAL. The reason is that
env_stack_push is never executed for the second VAR.

    if (!env_type_is_resolve_sink(env, var_type) &&
        !env_type_is_resolved(env, var_type_id)) {
        env_stack_set_next_member(env, i + 1);
        return env_stack_push(env, var_type, var_type_id);
    }

env_type_is_resolve_sink() changes its behaviour based on resolve_mode.
For RESOLVE_PTR, we can simplify the if condition to the following:

    (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved()

Since we're dealing with a VAR the clause evaluates to false. This is
not sufficient to trigger the bug however. The log output and EINVAL
are only generated if btf_type_id_size() fails.

    if (!btf_type_id_size(btf, &type_id, &type_size)) {
        btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
        return -EINVAL;
    }

Most types are sized, so for example a VAR referring to an INT is not a
problem. The bug is only triggered if a VAR points at a modifier. Since
we skipped btf_var_resolve that modifier was also never resolved, which
means that btf_resolved_type_id returns 0 aka VOID for the modifier.
This in turn causes btf_type_id_size to return NULL, triggering EINVAL.

To summarise, the following conditions are necessary:

- VAR pointing at PTR, STRUCT, UNION or ARRAY
- Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or
  TYPE_TAG

The fix is to reset resolve_mode to RESOLVE_TBD before attempting to
resolve a VAR from a DATASEC.

Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/btf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index fa22ec79ac0e..73780748404c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4569,6 +4569,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env,
 	struct btf *btf = env->btf;
 	u16 i;
 
+	env->resolve_mode = RESOLVE_TBD;
 	for_each_vsi_from(i, v->next_member, v->t, vsi) {
 		u32 var_type_id = vsi->type, type_id, type_size = 0;
 		const struct btf_type *var_type = btf_type_by_id(env->btf,
-- 
cgit 


From dfdd608c3b365f0fd49d7e13911ebcde06b9865b Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lorenz.bauer@isovalent.com>
Date: Mon, 6 Mar 2023 11:21:38 +0000
Subject: selftests/bpf: check that modifier resolves after pointer

Add a regression test that ensures that a VAR pointing at a
modifier which follows a PTR (or STRUCT or ARRAY) is resolved
correctly by the datasec validator.

Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
Link: https://lore.kernel.org/r/20230306112138.155352-3-lmb@isovalent.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index cbb600be943d..210d643fda6c 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -879,6 +879,34 @@ static struct btf_raw_test raw_tests[] = {
 	.btf_load_err = true,
 	.err_str = "Invalid elem",
 },
+{
+	.descr = "var after datasec, ptr followed by modifier",
+	.raw_types = {
+		/* .bss section */				/* [1] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2),
+			sizeof(void*)+4),
+		BTF_VAR_SECINFO_ENC(4, 0, sizeof(void*)),
+		BTF_VAR_SECINFO_ENC(6, sizeof(void*), 4),
+		/* int */					/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int* */					/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		BTF_VAR_ENC(NAME_TBD, 3, 0),			/* [4] */
+		/* const int */					/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0a\0b\0c\0",
+	.str_sec_size = sizeof("\0a\0b\0c\0"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void*)+4,
+	.key_type_id = 0,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
 /* Test member exceeds the size of struct.
  *
  * struct A {
-- 
cgit 


From 58aac3a2ef414fea6d7fdf823ea177744a087d13 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 4 Mar 2023 11:52:44 +0100
Subject: net: phy: smsc: fix link up detection in forced irq mode

Currently link up can't be detected in forced mode if polling
isn't used. Only link up interrupt source we have is aneg
complete which isn't applicable in forced mode. Therefore we
have to use energy-on as link up indicator.

Fixes: 7365494550f6 ("net: phy: smsc: skip ENERGYON interrupt if disabled")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/smsc.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index ac7481ce2fc1..00d9eff91dcf 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -44,7 +44,6 @@ static struct smsc_hw_stat smsc_hw_stats[] = {
 };
 
 struct smsc_phy_priv {
-	u16 intmask;
 	bool energy_enable;
 };
 
@@ -57,7 +56,6 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
 
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
-	struct smsc_phy_priv *priv = phydev->priv;
 	int rc;
 
 	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
@@ -65,14 +63,9 @@ static int smsc_phy_config_intr(struct phy_device *phydev)
 		if (rc)
 			return rc;
 
-		priv->intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6;
-		if (priv->energy_enable)
-			priv->intmask |= MII_LAN83C185_ISF_INT7;
-
-		rc = phy_write(phydev, MII_LAN83C185_IM, priv->intmask);
+		rc = phy_write(phydev, MII_LAN83C185_IM,
+			       MII_LAN83C185_ISF_INT_PHYLIB_EVENTS);
 	} else {
-		priv->intmask = 0;
-
 		rc = phy_write(phydev, MII_LAN83C185_IM, 0);
 		if (rc)
 			return rc;
@@ -85,7 +78,6 @@ static int smsc_phy_config_intr(struct phy_device *phydev)
 
 static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev)
 {
-	struct smsc_phy_priv *priv = phydev->priv;
 	int irq_status;
 
 	irq_status = phy_read(phydev, MII_LAN83C185_ISF);
@@ -96,7 +88,7 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 
-	if (!(irq_status & priv->intmask))
+	if (!(irq_status & MII_LAN83C185_ISF_INT_PHYLIB_EVENTS))
 		return IRQ_NONE;
 
 	phy_trigger_machine(phydev);
-- 
cgit 


From 193250ace270fecd586dd2d0dfbd9cbd2ade977f Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Sat, 4 Mar 2023 13:43:20 +0000
Subject: net: ethernet: mtk_eth_soc: fix RX data corruption issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix data corruption issue with SerDes connected PHYs operating at 1.25
Gbps speed where we could previously observe about 30% packet loss while
the bad packet counter was increasing.

As almost all boards with MediaTek MT7622 or MT7986 use either the MT7531
switch IC operating at 3.125Gbps SerDes rate or single-port PHYs using
rate-adaptation to 2500Base-X mode, this issue only got exposed now when
we started trying to use SFP modules operating with 1.25 Gbps with the
BananaPi R3 board.

The fix is to set bit 12 which disables the RX FIFO clear function when
setting up MAC MCR, MediaTek SDK did the same change stating:
"If without this patch, kernel might receive invalid packets that are
corrupted by GMAC."[1]

[1]: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/d8a2975939a12686c4a95c40db21efdc3f821f63

Fixes: 42c03844e93d ("net-next: mediatek: add support for MediaTek MT7622 SoC")
Tested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/138da2735f92c8b6f8578ec2e5a794ee515b665f.1677937317.git.daniel@makrotopia.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++-
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 14be6ea51b88..3cb43623d3db 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -616,7 +616,8 @@ static int mtk_mac_finish(struct phylink_config *config, unsigned int mode,
 	mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
 	mcr_new = mcr_cur;
 	mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
-		   MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
+		   MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK |
+		   MAC_MCR_RX_FIFO_CLR_DIS;
 
 	/* Only update control register when needed! */
 	if (mcr_new != mcr_cur)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index afc9d52e79bf..b65de174c3d9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -397,6 +397,7 @@
 #define MAC_MCR_FORCE_MODE	BIT(15)
 #define MAC_MCR_TX_EN		BIT(14)
 #define MAC_MCR_RX_EN		BIT(13)
+#define MAC_MCR_RX_FIFO_CLR_DIS	BIT(12)
 #define MAC_MCR_BACKOFF_EN	BIT(9)
 #define MAC_MCR_BACKPR_EN	BIT(8)
 #define MAC_MCR_FORCE_RX_FC	BIT(5)
-- 
cgit 


From e539a105f947b9db470fec39fe91d85fe737a432 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 4 Mar 2023 11:26:10 -0800
Subject: net: tls: fix device-offloaded sendpage straddling records

Adrien reports that incorrect data is transmitted when a single
page straddles multiple records. We would transmit the same
data in all iterations of the loop.

Reported-by: Adrien Moulin <amoulin@corp.free.fr>
Link: https://lore.kernel.org/all/61481278.42813558.1677845235112.JavaMail.zimbra@corp.free.fr
Fixes: c1318b39c7d3 ("tls: Add opt-in zerocopy mode of sendfile()")
Tested-by: Adrien Moulin <amoulin@corp.free.fr>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Maxim Mikityanskiy <maxtram95@gmail.com>
Link: https://lore.kernel.org/r/20230304192610.3818098-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 6c593788dc25..a7cc4f9faac2 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -508,6 +508,8 @@ handle_error:
 			zc_pfrag.offset = iter_offset.offset;
 			zc_pfrag.size = copy;
 			tls_append_frag(record, &zc_pfrag, copy);
+
+			iter_offset.offset += copy;
 		} else if (copy) {
 			copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
 
-- 
cgit 


From c77737b736ceb50fdf150434347dbd81ec76dbb1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 7 Mar 2023 05:22:54 +0000
Subject: netfilter: conntrack: adopt safer max chain length

Customers using GKE 1.25 and 1.26 are facing conntrack issues
root caused to commit c9c3b6811f74 ("netfilter: conntrack: make
max chain length random").

Even if we assume Uniform Hashing, a bucket often reachs 8 chained
items while the load factor of the hash table is smaller than 0.5

With a limit of 16, we reach load factors of 3.
With a limit of 32, we reach load factors of 11.
With a limit of 40, we reach load factors of 15.
With a limit of 50, we reach load factors of 24.

This patch changes MIN_CHAINLEN to 50, to minimize risks.

Ideally, we could in the future add a cushion based on expected
load factor (2 * nf_conntrack_max / nf_conntrack_buckets),
because some setups might expect unusual values.

Fixes: c9c3b6811f74 ("netfilter: conntrack: make max chain length random")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7250082e7de5..c6a6a6099b4e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -96,8 +96,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex);
 #define GC_SCAN_MAX_DURATION	msecs_to_jiffies(10)
 #define GC_SCAN_EXPIRED_MAX	(64000u / HZ)
 
-#define MIN_CHAINLEN	8u
-#define MAX_CHAINLEN	(32u - MIN_CHAINLEN)
+#define MIN_CHAINLEN	50u
+#define MAX_CHAINLEN	(80u - MIN_CHAINLEN)
 
 static struct conntrack_gc_work conntrack_gc_work;
 
-- 
cgit 


From 418383e6ed6b4624a54ec05c535f13d184fbf33b Mon Sep 17 00:00:00 2001
From: Enrico Sau <enrico.sau@gmail.com>
Date: Mon, 6 Mar 2023 12:59:33 +0100
Subject: net: usb: cdc_mbim: avoid altsetting toggling for Telit FE990

Add quirk CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE for Telit FE990
0x1081 composition in order to avoid bind error.

Signed-off-by: Enrico Sau <enrico.sau@gmail.com>
Link: https://lore.kernel.org/r/20230306115933.198259-1-enrico.sau@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/usb/cdc_mbim.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index c89639381eca..cd4083e0b3b9 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -665,6 +665,11 @@ static const struct usb_device_id mbim_devs[] = {
 	  .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
 	},
 
+	/* Telit FE990 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1081, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
+	},
+
 	/* default entry */
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info_zlp,
-- 
cgit 


From 382e363d5bed0cec5807b35761d14e55955eee63 Mon Sep 17 00:00:00 2001
From: Enrico Sau <enrico.sau@gmail.com>
Date: Mon, 6 Mar 2023 13:05:28 +0100
Subject: net: usb: qmi_wwan: add Telit 0x1080 composition

Add the following Telit FE990 composition:

0x1080: tty, adb, rmnet, tty, tty, tty, tty

Signed-off-by: Enrico Sau <enrico.sau@gmail.com>
Link: https://lore.kernel.org/r/20230306120528.198842-1-enrico.sau@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index a808d718c012..571e37e67f9c 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1364,6 +1364,7 @@ static const struct usb_device_id products[] = {
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)},	/* Telit FN980 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)},	/* Telit LN920 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)},	/* Telit FN990 */
+	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},	/* Telit ME910 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)},	/* Telit ME910 dual modem */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
-- 
cgit 


From fef3f92e8a4214652d8f33f50330dc5a92efbf11 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Fri, 27 Jan 2023 14:24:10 +0100
Subject: ice: Fix DSCP PFC TLV creation

When creating the TLV to send to the FW for configuring DSCP mode PFC,the
PFCENABLE field was being masked with a 4 bit mask (0xF), but this is an 8
bit bitmask for enabled classes for PFC.  This means that traffic classes
4-7 could not be enabled for PFC.

Remove the mask completely, as it is not necessary, as we are assigning 8
bits to an 8 bit field.

Fixes: 2a87bd73e50d ("ice: Add DSCP support")
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Karen Ostrowska <karen.ostrowska@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index c557dfc50aad..396e555023aa 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -1411,7 +1411,7 @@ ice_add_dscp_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
 	tlv->ouisubtype = htonl(ouisubtype);
 
 	buf[0] = dcbcfg->pfc.pfccap & 0xF;
-	buf[1] = dcbcfg->pfc.pfcena & 0xF;
+	buf[1] = dcbcfg->pfc.pfcena;
 }
 
 /**
-- 
cgit 


From c4a9c8e78aad0fd9b79c9b1e83bab3288a15ce3c Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Date: Mon, 13 Feb 2023 12:27:33 +0100
Subject: ice: don't ignore return codes in VSI related code

There were few smatch warnings reported by Dan:
- ice_vsi_cfg_xdp_txqs can return 0 instead of ret, which is cleaner
- return values in ice_vsi_cfg_def were ignored
- in ice_vsi_rebuild return value was ignored in case rebuild failed,
  it was a never reached code, however, rewrite it for clarity.
- ice_vsi_cfg_tc can return 0 instead of ret

Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 781475480ff2..0f52ea38b6f3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2126,7 +2126,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
 	ice_for_each_rxq(vsi, i)
 		ice_tx_xsk_pool(vsi, i);
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -2693,12 +2693,14 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
 		return ret;
 
 	/* allocate memory for Tx/Rx ring stat pointers */
-	if (ice_vsi_alloc_stat_arrays(vsi))
+	ret = ice_vsi_alloc_stat_arrays(vsi);
+	if (ret)
 		goto unroll_vsi_alloc;
 
 	ice_alloc_fd_res(vsi);
 
-	if (ice_vsi_get_qs(vsi)) {
+	ret = ice_vsi_get_qs(vsi);
+	if (ret) {
 		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
 			vsi->idx);
 		goto unroll_vsi_alloc_stat;
@@ -2811,6 +2813,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
 		break;
 	default:
 		/* clean up the resources and exit */
+		ret = -EINVAL;
 		goto unroll_vsi_init;
 	}
 
@@ -3508,10 +3511,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 		if (vsi_flags & ICE_VSI_FLAG_INIT) {
 			ret = -EIO;
 			goto err_vsi_cfg_tc_lan;
-		} else {
-			kfree(coalesce);
-			return ice_schedule_reset(pf, ICE_RESET_PFR);
 		}
+
+		kfree(coalesce);
+		return ice_schedule_reset(pf, ICE_RESET_PFR);
 	}
 
 	ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
@@ -3759,7 +3762,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 	dev = ice_pf_to_dev(pf);
 	if (vsi->tc_cfg.ena_tc == ena_tc &&
 	    vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
-		return ret;
+		return 0;
 
 	ice_for_each_traffic_class(i) {
 		/* build bitmap of enabled TCs */
-- 
cgit 


From 8f5c5a790e3025d6eca96bf7ee5e3873dc92373f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2023 16:25:36 +0100
Subject: ethernet: ice: avoid gcc-9 integer overflow warning

With older compilers like gcc-9, the calculation of the vlan
priority field causes a false-positive warning from the byteswap:

In file included from drivers/net/ethernet/intel/ice/ice_tc_lib.c:4:
drivers/net/ethernet/intel/ice/ice_tc_lib.c: In function 'ice_parse_cls_flower':
include/uapi/linux/swab.h:15:15: error: integer overflow in expression '(int)(short unsigned int)((int)match.key-><U67c8>.<U6698>.vlan_priority << 13) & 57344 & 255' of type 'int' results in '0' [-Werror=overflow]
   15 |  (((__u16)(x) & (__u16)0x00ffU) << 8) |   \
      |   ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~
include/uapi/linux/swab.h:106:2: note: in expansion of macro '___constant_swab16'
  106 |  ___constant_swab16(x) :   \
      |  ^~~~~~~~~~~~~~~~~~
include/uapi/linux/byteorder/little_endian.h:42:43: note: in expansion of macro '__swab16'
   42 | #define __cpu_to_be16(x) ((__force __be16)__swab16((x)))
      |                                           ^~~~~~~~
include/linux/byteorder/generic.h:96:21: note: in expansion of macro '__cpu_to_be16'
   96 | #define cpu_to_be16 __cpu_to_be16
      |                     ^~~~~~~~~~~~~
drivers/net/ethernet/intel/ice/ice_tc_lib.c:1458:5: note: in expansion of macro 'cpu_to_be16'
 1458 |     cpu_to_be16((match.key->vlan_priority <<
      |     ^~~~~~~~~~~

After a change to be16_encode_bits(), the code becomes more
readable to both people and compilers, which avoids the warning.

Fixes: 34800178b302 ("ice: Add support for VLAN priority filters in switchdev")
Suggested-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_tc_lib.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index 6b48cbc049c6..76f29a5bf8d7 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -1455,8 +1455,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
 		if (match.mask->vlan_priority) {
 			fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_PRIO;
 			headers->vlan_hdr.vlan_prio =
-				cpu_to_be16((match.key->vlan_priority <<
-					     VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK);
+				be16_encode_bits(match.key->vlan_priority,
+						 VLAN_PRIO_MASK);
 		}
 
 		if (match.mask->vlan_tpid)
@@ -1489,8 +1489,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
 		if (match.mask->vlan_priority) {
 			fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN_PRIO;
 			headers->cvlan_hdr.vlan_prio =
-				cpu_to_be16((match.key->vlan_priority <<
-					     VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK);
+				be16_encode_bits(match.key->vlan_priority,
+						 VLAN_PRIO_MASK);
 		}
 	}
 
-- 
cgit 


From 7d834b4d1ab66c48e8c0810fdeadaabb80fa2c81 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Tue, 7 Mar 2023 00:26:50 +0300
Subject: nfc: change order inside nfc_se_io error path

cb_context should be freed on the error path in nfc_se_io as stated by
commit 25ff6f8a5a3b ("nfc: fix memory leak of se_io context in
nfc_genl_se_io").

Make the error path in nfc_se_io unwind everything in reverse order, i.e.
free the cb_context after unlocking the device.

Suggested-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230306212650.230322-1-pchelkin@ispras.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/nfc/netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 348bf561bc9f..b9264e730fd9 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1446,8 +1446,8 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx,
 	return rc;
 
 error:
-	kfree(cb_context);
 	device_unlock(&dev->dev);
+	kfree(cb_context);
 	return rc;
 }
 
-- 
cgit 


From e7b15acdc10f74872fdae1482b3f837818362085 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 6 Mar 2023 11:20:18 -0800
Subject: mailmap: add entry for Maxim Mikityanskiy

Map Maxim's old corporate addresses to his personal one.

Link: https://lore.kernel.org/r/20230306192018.3894988-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .mailmap | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.mailmap b/.mailmap
index a872c9683958..04bd44774ad0 100644
--- a/.mailmap
+++ b/.mailmap
@@ -304,6 +304,8 @@ Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@osg.samsung.com>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@redhat.com>
 Mauro Carvalho Chehab <mchehab@kernel.org> <m.chehab@samsung.com>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@s-opensource.com>
+Maxim Mikityanskiy <maxtram95@gmail.com> <maximmi@mellanox.com>
+Maxim Mikityanskiy <maxtram95@gmail.com> <maximmi@nvidia.com>
 Maxime Ripard <mripard@kernel.org> <maxime.ripard@bootlin.com>
 Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
 Mayuresh Janorkar <mayur@ti.com>
-- 
cgit 


From b1649b0fe98c2c7eed156c957f6f524d5660d859 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Mon, 6 Mar 2023 11:44:05 -0800
Subject: mailmap: update entries for Stephen Hemminger

Map all my old email addresses to current address.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Link: https://lore.kernel.org/r/20230306194405.108236-1-stephen@networkplumber.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .mailmap | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 04bd44774ad0..4f37ff13a346 100644
--- a/.mailmap
+++ b/.mailmap
@@ -411,7 +411,10 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
 Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
 Simon Kelley <simon@thekelleys.org.uk>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
-Stephen Hemminger <shemminger@osdl.org>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
+Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
+Stephen Hemminger <stephen@networkplumber.org> <sthemmin@vyatta.com>
 Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
 Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
 Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
-- 
cgit 


From 37d9df224d1eec1b434fe9ffa40104c756478c29 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 6 Mar 2023 12:04:57 -0800
Subject: ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause

I was intending to make all the Netlink Spec code BSD-3-Clause
to ease the adoption but it appears that:
 - I fumbled the uAPI and used "GPL WITH uAPI note" there
 - it gives people pause as they expect GPL in the kernel
As suggested by Chuck re-license under dual. This gives us benefit
of full BSD freedom while fulfilling the broad "kernel is under GPL"
expectations.

Link: https://lore.kernel.org/all/20230304120108.05dd44c5@kernel.org/
Link: https://lore.kernel.org/r/20230306200457.3903854-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/genetlink-c.yaml        | 2 +-
 Documentation/netlink/genetlink-legacy.yaml   | 2 +-
 Documentation/netlink/genetlink.yaml          | 2 +-
 Documentation/netlink/specs/ethtool.yaml      | 2 ++
 Documentation/netlink/specs/fou.yaml          | 2 ++
 Documentation/netlink/specs/netdev.yaml       | 2 ++
 Documentation/userspace-api/netlink/specs.rst | 3 +++
 include/uapi/linux/fou.h                      | 2 +-
 include/uapi/linux/netdev.h                   | 2 +-
 net/core/netdev-genl-gen.c                    | 2 +-
 net/core/netdev-genl-gen.h                    | 2 +-
 net/ipv4/fou_nl.c                             | 2 +-
 net/ipv4/fou_nl.h                             | 2 +-
 tools/include/uapi/linux/netdev.h             | 2 +-
 tools/net/ynl/cli.py                          | 2 +-
 tools/net/ynl/lib/__init__.py                 | 2 +-
 tools/net/ynl/lib/nlspec.py                   | 2 +-
 tools/net/ynl/lib/ynl.py                      | 2 +-
 tools/net/ynl/ynl-gen-c.py                    | 7 ++++---
 tools/net/ynl/ynl-regen.sh                    | 2 +-
 20 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index bbcfa2472b04..f082a5ad7cf1 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 %YAML 1.2
 ---
 $id: http://kernel.org/schemas/netlink/genetlink-c.yaml#
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 5642925c4ceb..c6b8c77f7d12 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 %YAML 1.2
 ---
 $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml#
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index 62a922755ce2..b2d56ab9e615 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 %YAML 1.2
 ---
 $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml#
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 35c462bce56f..18ecb7d90cbe 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
 name: ethtool
 
 protocol: genetlink-legacy
diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml
index cca4cf98f03a..cff104288723 100644
--- a/Documentation/netlink/specs/fou.yaml
+++ b/Documentation/netlink/specs/fou.yaml
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
 name: fou
 
 protocol: genetlink-legacy
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index ba9ee13cf729..24de747b5344 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
 name: netdev
 
 doc:
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 32e53328d113..2122e0c4a399 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -24,6 +24,9 @@ YAML specifications can be found under ``Documentation/netlink/specs/``
 This document describes details of the schema.
 See :doc:`intro-specs` for a practical starting guide.
 
+All specs must be licensed under ``GPL-2.0-only OR BSD-3-Clause``
+to allow for easy adoption in user space code.
+
 Compatibility levels
 ====================
 
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index 19ebbef41a63..5041c3598493 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/fou.yaml */
 /* YNL-GEN uapi header */
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 588391447bfb..8c4e3e536c04 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN uapi header */
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 48812ec843f5..9e10802587fc 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN kernel source */
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index b16dc7e026bb..2c5fc7d1e8a7 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN kernel header */
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 6c3820f41dd5..5c14fe030eda 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/fou.yaml */
 /* YNL-GEN kernel source */
diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h
index b7a68121ce6f..58b1e1ed4b3b 100644
--- a/net/ipv4/fou_nl.h
+++ b/net/ipv4/fou_nl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/fou.yaml */
 /* YNL-GEN kernel header */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 588391447bfb..8c4e3e536c04 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN uapi header */
diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py
index db410b74d539..ffaa8038aa8c 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/cli.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 import argparse
 import json
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py
index 3c73f59eabab..a2cb8b16d6f1 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/lib/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 from .nlspec import SpecAttr, SpecAttrSet, SpecFamily, SpecOperation
 from .ynl import YnlFamily
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 9d394e50de23..0a2cfb5862aa 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 import collections
 import importlib
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 1c7411ee04dc..a842adc8e87e 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 import functools
 import os
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 62f8f2c3c56c..c940ca834d3f 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 import argparse
 import collections
@@ -2127,12 +2128,12 @@ def main():
 
     _, spec_kernel = find_kernel_root(args.spec)
     if args.mode == 'uapi':
-        cw.p('/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */')
+        cw.p('/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */')
     else:
         if args.header:
-            cw.p('/* SPDX-License-Identifier: BSD-3-Clause */')
+            cw.p('/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */')
         else:
-            cw.p('// SPDX-License-Identifier: BSD-3-Clause')
+            cw.p('// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause')
     cw.p("/* Do not edit directly, auto-generated from: */")
     cw.p(f"/*\t{spec_kernel} */")
     cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")
diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh
index 43989ae48ed0..74f5de1c2399 100755
--- a/tools/net/ynl/ynl-regen.sh
+++ b/tools/net/ynl/ynl-regen.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 TOOL=$(dirname $(realpath $0))/ynl-gen-c.py
 
-- 
cgit 


From ce7ca794712f186da99719e8b4e97bd5ddbb04c3 Mon Sep 17 00:00:00 2001
From: "D. Wythe" <alibuda@linux.alibaba.com>
Date: Tue, 7 Mar 2023 11:23:46 +0800
Subject: net/smc: fix fallback failed while sendmsg with fastopen

Before determining whether the msg has unsupported options, it has been
prematurely terminated by the wrong status check.

For the application, the general usages of MSG_FASTOPEN likes

fd = socket(...)
/* rather than connect */
sendto(fd, data, len, MSG_FASTOPEN)

Hence, We need to check the flag before state check, because the sock
state here is always SMC_INIT when applications tries MSG_FASTOPEN.
Once we found unsupported options, fallback it to TCP.

Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback")
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>

v2 -> v1: Optimize code style
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a4cccdfdc00a..ff6dd86bdc9f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2657,16 +2657,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct smc_sock *smc;
-	int rc = -EPIPE;
+	int rc;
 
 	smc = smc_sk(sk);
 	lock_sock(sk);
-	if ((sk->sk_state != SMC_ACTIVE) &&
-	    (sk->sk_state != SMC_APPCLOSEWAIT1) &&
-	    (sk->sk_state != SMC_INIT))
-		goto out;
 
+	/* SMC does not support connect with fastopen */
 	if (msg->msg_flags & MSG_FASTOPEN) {
+		/* not connected yet, fallback */
 		if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
 			rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
 			if (rc)
@@ -2675,6 +2673,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 			rc = -EINVAL;
 			goto out;
 		}
+	} else if ((sk->sk_state != SMC_ACTIVE) &&
+		   (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+		   (sk->sk_state != SMC_INIT)) {
+		rc = -EPIPE;
+		goto out;
 	}
 
 	if (smc->use_fallback) {
-- 
cgit 


From ea9dd2e5c6d12c8b65ce7514c8359a70eeaa0e70 Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sumang@marvell.com>
Date: Tue, 7 Mar 2023 16:19:08 +0530
Subject: octeontx2-af: Unlock contexts in the queue context cache in case of
 fault detection

NDC caches contexts of frequently used queue's (Rx and Tx queues)
contexts. Due to a HW errata when NDC detects fault/poision while
accessing contexts it could go into an illegal state where a cache
line could get locked forever. To makesure all cache lines in NDC
are available for optimum performance upon fault/lockerror/posion
errors scan through all cache lines in NDC and clear the lock bit.

Fixes: 4a3581cd5995 ("octeontx2-af: NPA AQ instruction enqueue support")
Signed-off-by: Suman Ghosh <sumang@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: Sai Krishna <saikrishnag@marvell.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu.h    |  5 ++
 .../ethernet/marvell/octeontx2/af/rvu_debugfs.c    |  7 ++-
 .../net/ethernet/marvell/octeontx2/af/rvu_nix.c    | 16 +++++-
 .../net/ethernet/marvell/octeontx2/af/rvu_npa.c    | 58 +++++++++++++++++++++-
 .../net/ethernet/marvell/octeontx2/af/rvu_reg.h    |  3 ++
 5 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 389663a13d1d..ef721caeac49 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -884,6 +884,9 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf,
 int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc);
 int rvu_cpt_init(struct rvu *rvu);
 
+#define NDC_AF_BANK_MASK       GENMASK_ULL(7, 0)
+#define NDC_AF_BANK_LINE_MASK  GENMASK_ULL(31, 16)
+
 /* CN10K RVU */
 int rvu_set_channels_base(struct rvu *rvu);
 void rvu_program_channels(struct rvu *rvu);
@@ -902,6 +905,8 @@ static inline void rvu_dbg_init(struct rvu *rvu) {}
 static inline void rvu_dbg_exit(struct rvu *rvu) {}
 #endif
 
+int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr);
+
 /* RVU Switch */
 void rvu_switch_enable(struct rvu *rvu);
 void rvu_switch_disable(struct rvu *rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index fa280ebd3052..26cfa501f1a1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -198,9 +198,6 @@ enum cpt_eng_type {
 	CPT_IE_TYPE = 3,
 };
 
-#define NDC_MAX_BANK(rvu, blk_addr) (rvu_read64(rvu, \
-						blk_addr, NDC_AF_CONST) & 0xFF)
-
 #define rvu_dbg_NULL NULL
 #define rvu_dbg_open_NULL NULL
 
@@ -1448,6 +1445,7 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr)
 	struct nix_hw *nix_hw;
 	struct rvu *rvu;
 	int bank, max_bank;
+	u64 ndc_af_const;
 
 	if (blk_addr == BLKADDR_NDC_NPA0) {
 		rvu = s->private;
@@ -1456,7 +1454,8 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr)
 		rvu = nix_hw->rvu;
 	}
 
-	max_bank = NDC_MAX_BANK(rvu, blk_addr);
+	ndc_af_const = rvu_read64(rvu, blk_addr, NDC_AF_CONST);
+	max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const);
 	for (bank = 0; bank < max_bank; bank++) {
 		seq_printf(s, "BANK:%d\n", bank);
 		seq_printf(s, "\tHits:\t%lld\n",
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 26e639e57dae..4ad707e758b9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -790,6 +790,7 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
 	struct nix_aq_res_s *result;
 	int timeout = 1000;
 	u64 reg, head;
+	int ret;
 
 	result = (struct nix_aq_res_s *)aq->res->base;
 
@@ -813,9 +814,22 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
 			return -EBUSY;
 	}
 
-	if (result->compcode != NIX_AQ_COMP_GOOD)
+	if (result->compcode != NIX_AQ_COMP_GOOD) {
 		/* TODO: Replace this with some error code */
+		if (result->compcode == NIX_AQ_COMP_CTX_FAULT ||
+		    result->compcode == NIX_AQ_COMP_LOCKERR ||
+		    result->compcode == NIX_AQ_COMP_CTX_POISON) {
+			ret = rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_RX);
+			ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_TX);
+			ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_RX);
+			ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_TX);
+			if (ret)
+				dev_err(rvu->dev,
+					"%s: Not able to unlock cachelines\n", __func__);
+		}
+
 		return -EBUSY;
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
index 70bd036ed76e..4f5ca5ab13a4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2018 Marvell.
  *
  */
-
+#include <linux/bitfield.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 
@@ -42,9 +42,18 @@ static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
 			return -EBUSY;
 	}
 
-	if (result->compcode != NPA_AQ_COMP_GOOD)
+	if (result->compcode != NPA_AQ_COMP_GOOD) {
 		/* TODO: Replace this with some error code */
+		if (result->compcode == NPA_AQ_COMP_CTX_FAULT ||
+		    result->compcode == NPA_AQ_COMP_LOCKERR ||
+		    result->compcode == NPA_AQ_COMP_CTX_POISON) {
+			if (rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NPA0))
+				dev_err(rvu->dev,
+					"%s: Not able to unlock cachelines\n", __func__);
+		}
+
 		return -EBUSY;
+	}
 
 	return 0;
 }
@@ -545,3 +554,48 @@ void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
 
 	npa_ctx_free(rvu, pfvf);
 }
+
+/* Due to an Hardware errata, in some corner cases, AQ context lock
+ * operations can result in a NDC way getting into an illegal state
+ * of not valid but locked.
+ *
+ * This API solves the problem by clearing the lock bit of the NDC block.
+ * The operation needs to be done for each line of all the NDC banks.
+ */
+int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr)
+{
+	int bank, max_bank, line, max_line, err;
+	u64 reg, ndc_af_const;
+
+	/* Set the ENABLE bit(63) to '0' */
+	reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL);
+	rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0));
+
+	/* Poll until the BUSY bits(47:32) are set to '0' */
+	err = rvu_poll_reg(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, GENMASK_ULL(47, 32), true);
+	if (err) {
+		dev_err(rvu->dev, "Timed out while polling for NDC CAM busy bits.\n");
+		return err;
+	}
+
+	ndc_af_const = rvu_read64(rvu, blkaddr, NDC_AF_CONST);
+	max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const);
+	max_line = FIELD_GET(NDC_AF_BANK_LINE_MASK, ndc_af_const);
+	for (bank = 0; bank < max_bank; bank++) {
+		for (line = 0; line < max_line; line++) {
+			/* Check if 'cache line valid bit(63)' is not set
+			 * but 'cache line lock bit(60)' is set and on
+			 * success, reset the lock bit(60).
+			 */
+			reg = rvu_read64(rvu, blkaddr,
+					 NDC_AF_BANKX_LINEX_METADATA(bank, line));
+			if (!(reg & BIT_ULL(63)) && (reg & BIT_ULL(60))) {
+				rvu_write64(rvu, blkaddr,
+					    NDC_AF_BANKX_LINEX_METADATA(bank, line),
+					    reg & ~BIT_ULL(60));
+			}
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 1729b22580ce..7007f0b8e659 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -694,6 +694,7 @@
 #define NDC_AF_INTR_ENA_W1S		(0x00068)
 #define NDC_AF_INTR_ENA_W1C		(0x00070)
 #define NDC_AF_ACTIVE_PC		(0x00078)
+#define NDC_AF_CAMS_RD_INTERVAL		(0x00080)
 #define NDC_AF_BP_TEST_ENABLE		(0x001F8)
 #define NDC_AF_BP_TEST(a)		(0x00200 | (a) << 3)
 #define NDC_AF_BLK_RST			(0x002F0)
@@ -709,6 +710,8 @@
 		(0x00F00 | (a) << 5 | (b) << 4)
 #define NDC_AF_BANKX_HIT_PC(a)		(0x01000 | (a) << 3)
 #define NDC_AF_BANKX_MISS_PC(a)		(0x01100 | (a) << 3)
+#define NDC_AF_BANKX_LINEX_METADATA(a, b) \
+		(0x10000 | (a) << 12 | (b) << 3)
 
 /* LBK */
 #define LBK_CONST			(0x10ull)
-- 
cgit 


From cdd28833100c18a469c85a1cc3de9f6bbbe6caa0 Mon Sep 17 00:00:00 2001
From: Daniel Machon <daniel.machon@microchip.com>
Date: Tue, 7 Mar 2023 12:21:03 +0100
Subject: net: microchip: sparx5: fix deletion of existing DSCP mappings

Fix deletion of existing DSCP mappings in the APP table.

Adding and deleting DSCP entries are replicated per-port, since the
mapping table is global for all ports in the chip. Whenever a mapping
for a DSCP value already exists, the old mapping is deleted first.
However, it is only deleted for the specified port. Fix this by calling
sparx5_dcb_ieee_delapp() instead of dcb_ieee_delapp() as it ought to be.

Reproduce:

// Map and remap DSCP value 63
$ dcb app add dev eth0 dscp-prio 63:1
$ dcb app add dev eth0 dscp-prio 63:2

$ dcb app show dev eth0 dscp-prio
dscp-prio 63:2

$ dcb app show dev eth1 dscp-prio
dscp-prio 63:1 63:2 <-- 63:1 should not be there

Fixes: 8dcf69a64118 ("net: microchip: sparx5: add support for offloading dscp table")
Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c | 32 +++++++++++-----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c b/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c
index 871a3e62f852..2d763664dcda 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c
@@ -249,6 +249,21 @@ static int sparx5_dcb_ieee_dscp_setdel(struct net_device *dev,
 	return 0;
 }
 
+static int sparx5_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+	int err;
+
+	if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
+		err = sparx5_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp);
+	else
+		err = dcb_ieee_delapp(dev, app);
+
+	if (err < 0)
+		return err;
+
+	return sparx5_dcb_app_update(dev);
+}
+
 static int sparx5_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app)
 {
 	struct dcb_app app_itr;
@@ -264,7 +279,7 @@ static int sparx5_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app)
 	if (prio) {
 		app_itr = *app;
 		app_itr.priority = prio;
-		dcb_ieee_delapp(dev, &app_itr);
+		sparx5_dcb_ieee_delapp(dev, &app_itr);
 	}
 
 	if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
@@ -281,21 +296,6 @@ out:
 	return err;
 }
 
-static int sparx5_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app)
-{
-	int err;
-
-	if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
-		err = sparx5_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp);
-	else
-		err = dcb_ieee_delapp(dev, app);
-
-	if (err < 0)
-		return err;
-
-	return sparx5_dcb_app_update(dev);
-}
-
 static int sparx5_dcb_setapptrust(struct net_device *dev, u8 *selectors,
 				  int nselectors)
 {
-- 
cgit 


From c8b8a3c601f2cfad25ab5ce5b04df700048aef6e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 7 Mar 2023 17:54:11 +0200
Subject: net: dsa: mt7530: permit port 5 to work without port 6 on MT7621 SoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MT7530 switch from the MT7621 SoC has 2 ports which can be set up as
internal: port 5 and 6. Arınç reports that the GMAC1 attached to port 5
receives corrupted frames, unless port 6 (attached to GMAC0) has been
brought up by the driver. This is true regardless of whether port 5 is
used as a user port or as a CPU port (carrying DSA tags).

Offline debugging (blind for me) which began in the linked thread showed
experimentally that the configuration done by the driver for port 6
contains a step which is needed by port 5 as well - the write to
CORE_GSWPLL_GRP2 (note that I've no idea as to what it does, apart from
the comment "Set core clock into 500Mhz"). Prints put by Arınç show that
the reset value of CORE_GSWPLL_GRP2 is RG_GSWPLL_POSDIV_500M(1) |
RG_GSWPLL_FBKDIV_500M(40) (0x128), both on the MCM MT7530 from the
MT7621 SoC, as well as on the standalone MT7530 from MT7623NI Bananapi
BPI-R2. Apparently, port 5 on the standalone MT7530 can work under both
values of the register, while on the MT7621 SoC it cannot.

The call path that triggers the register write is:

mt753x_phylink_mac_config() for port 6
-> mt753x_pad_setup()
   -> mt7530_pad_clk_setup()

so this fully explains the behavior noticed by Arınç, that bringing port
6 up is necessary.

The simplest fix for the problem is to extract the register writes which
are needed for both port 5 and 6 into a common mt7530_pll_setup()
function, which is called at mt7530_setup() time, immediately after
switch reset. We can argue that this mirrors the code layout introduced
in mt7531_setup() by commit 42bc4fafe359 ("net: mt7531: only do PLL once
after the reset"), in that the PLL setup has the exact same positioning,
and further work to consolidate the separate setup() functions is not
hindered.

Testing confirms that:

- the slight reordering of writes to MT7530_P6ECR and to
  CORE_GSWPLL_GRP1 / CORE_GSWPLL_GRP2 introduced by this change does not
  appear to cause problems for the operation of port 6 on MT7621 and on
  MT7623 (where port 5 also always worked)

- packets sent through port 5 are not corrupted anymore, regardless of
  whether port 6 is enabled by phylink or not (or even present in the
  device tree)

My algorithm for determining the Fixes: tag is as follows. Testing shows
that some logic from mt7530_pad_clk_setup() is needed even for port 5.
Prior to commit ca366d6c889b ("net: dsa: mt7530: Convert to PHYLINK
API"), a call did exist for all phy_is_pseudo_fixed_link() ports - so
port 5 included. That commit replaced it with a temporary "Port 5 is not
supported!" comment, and the following commit 38f790a80560 ("net: dsa:
mt7530: Add support for port 5") replaced that comment with a
configuration procedure in mt7530_setup_port5() which was insufficient
for port 5 to work. I'm laying the blame on the patch that claimed
support for port 5, although one would have also needed the change from
commit c3b8e07909db ("net: dsa: mt7530: setup core clock even in TRGMII
mode") for the write to be performed completely independently from port
6's configuration.

Thanks go to Arınç for describing the problem, for debugging and for
testing.

Reported-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Link: https://lore.kernel.org/netdev/f297c2c4-6e7c-57ac-2394-f6025d309b9d@arinc9.com/
Fixes: 38f790a80560 ("net: dsa: mt7530: Add support for port 5")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230307155411.868573-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mt7530.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 3a15015bc409..a508402c4ecb 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -393,6 +393,24 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
 		mt7530_write(priv, MT7530_ATA1 + (i * 4), reg[i]);
 }
 
+/* Set up switch core clock for MT7530 */
+static void mt7530_pll_setup(struct mt7530_priv *priv)
+{
+	/* Disable PLL */
+	core_write(priv, CORE_GSWPLL_GRP1, 0);
+
+	/* Set core clock into 500Mhz */
+	core_write(priv, CORE_GSWPLL_GRP2,
+		   RG_GSWPLL_POSDIV_500M(1) |
+		   RG_GSWPLL_FBKDIV_500M(25));
+
+	/* Enable PLL */
+	core_write(priv, CORE_GSWPLL_GRP1,
+		   RG_GSWPLL_EN_PRE |
+		   RG_GSWPLL_POSDIV_200M(2) |
+		   RG_GSWPLL_FBKDIV_200M(32));
+}
+
 /* Setup TX circuit including relevant PAD and driving */
 static int
 mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
@@ -453,21 +471,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
 	core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
 		   REG_GSWCK_EN | REG_TRGMIICK_EN);
 
-	/* Setup core clock for MT7530 */
-	/* Disable PLL */
-	core_write(priv, CORE_GSWPLL_GRP1, 0);
-
-	/* Set core clock into 500Mhz */
-	core_write(priv, CORE_GSWPLL_GRP2,
-		   RG_GSWPLL_POSDIV_500M(1) |
-		   RG_GSWPLL_FBKDIV_500M(25));
-
-	/* Enable PLL */
-	core_write(priv, CORE_GSWPLL_GRP1,
-		   RG_GSWPLL_EN_PRE |
-		   RG_GSWPLL_POSDIV_200M(2) |
-		   RG_GSWPLL_FBKDIV_200M(32));
-
 	/* Setup the MT7530 TRGMII Tx Clock */
 	core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
 	core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
@@ -2196,6 +2199,8 @@ mt7530_setup(struct dsa_switch *ds)
 		     SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
 		     SYS_CTRL_REG_RST);
 
+	mt7530_pll_setup(priv);
+
 	/* Enable Port 6 only; P5 as GMAC5 which currently is not supported */
 	val = mt7530_read(priv, MT7530_MHWTRAP);
 	val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS;
-- 
cgit 


From 8f14820801042c221bb9fe51643a2585cac5dec2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 7 Mar 2023 09:19:30 -0800
Subject: eth: fealnx: bring back this old driver

This reverts commit d5e2d038dbece821f1af57acbeded3aa9a1832c1.

We have a report of this chip being used on a

  SURECOM EP-320X-S 100/10M Ethernet PCI Adapter

which could still have been purchased in some parts
of the world 3 years ago.

Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217151
Fixes: d5e2d038dbec ("eth: fealnx: delete the driver for Myson MTD-800")
Link: https://lore.kernel.org/r/20230307171930.4008454-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 arch/mips/configs/mtx1_defconfig      |    1 +
 arch/powerpc/configs/ppc6xx_defconfig |    1 +
 drivers/net/ethernet/Kconfig          |   10 +
 drivers/net/ethernet/Makefile         |    1 +
 drivers/net/ethernet/fealnx.c         | 1953 +++++++++++++++++++++++++++++++++
 5 files changed, 1966 insertions(+)
 create mode 100644 drivers/net/ethernet/fealnx.c

diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 89a1511d2ee4..edf9634aa8ee 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -284,6 +284,7 @@ CONFIG_IXGB=m
 CONFIG_SKGE=m
 CONFIG_SKY2=m
 CONFIG_MYRI10GE=m
+CONFIG_FEALNX=m
 CONFIG_NATSEMI=m
 CONFIG_NS83820=m
 CONFIG_S2IO=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 110258277959..f73c98be56c8 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -461,6 +461,7 @@ CONFIG_MV643XX_ETH=m
 CONFIG_SKGE=m
 CONFIG_SKY2=m
 CONFIG_MYRI10GE=m
+CONFIG_FEALNX=m
 CONFIG_NATSEMI=m
 CONFIG_NS83820=m
 CONFIG_PCMCIA_AXNET=m
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 323ec56e8a74..1917da784191 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -132,6 +132,16 @@ source "drivers/net/ethernet/mscc/Kconfig"
 source "drivers/net/ethernet/microsoft/Kconfig"
 source "drivers/net/ethernet/moxa/Kconfig"
 source "drivers/net/ethernet/myricom/Kconfig"
+
+config FEALNX
+	tristate "Myson MTD-8xx PCI Ethernet support"
+	depends on PCI
+	select CRC32
+	select MII
+	help
+	  Say Y here to support the Myson MTD-800 family of PCI-based Ethernet
+	  cards. <http://www.myson.com.tw/>
+
 source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/natsemi/Kconfig"
 source "drivers/net/ethernet/neterion/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 2fedbaa545eb..0d872d4efcd1 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/
 obj-$(CONFIG_NET_VENDOR_MICROSEMI) += mscc/
 obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/
 obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
+obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
 obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
new file mode 100644
index 000000000000..ed18450fd2cc
--- /dev/null
+++ b/drivers/net/ethernet/fealnx.c
@@ -0,0 +1,1953 @@
+/*
+	Written 1998-2000 by Donald Becker.
+
+	This software may be used and distributed according to the terms of
+	the GNU General Public License (GPL), incorporated herein by reference.
+	Drivers based on or derived from this code fall under the GPL and must
+	retain the authorship, copyright and license notice.  This file is not
+	a complete program and may only be used when the entire operating
+	system is licensed under the GPL.
+
+	The author may be reached as becker@scyld.com, or C/O
+	Scyld Computing Corporation
+	410 Severn Ave., Suite 210
+	Annapolis MD 21403
+
+	Support information and updates available at
+	http://www.scyld.com/network/pci-skeleton.html
+
+	Linux kernel updates:
+
+	Version 2.51, Nov 17, 2001 (jgarzik):
+	- Add ethtool support
+	- Replace some MII-related magic numbers with constants
+
+*/
+
+#define DRV_NAME	"fealnx"
+
+static int debug;		/* 1-> print debug message */
+static int max_interrupt_work = 20;
+
+/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). */
+static int multicast_filter_limit = 32;
+
+/* Set the copy breakpoint for the copy-only-tiny-frames scheme. */
+/* Setting to > 1518 effectively disables this feature.          */
+static int rx_copybreak;
+
+/* Used to pass the media type, etc.                            */
+/* Both 'options[]' and 'full_duplex[]' should exist for driver */
+/* interoperability.                                            */
+/* The media type is usually passed in 'options[]'.             */
+#define MAX_UNITS 8		/* More are supported, limit only on options */
+static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+static int full_duplex[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+
+/* Operational parameters that are set at compile time.                 */
+/* Keep the ring sizes a power of two for compile efficiency.           */
+/* The compiler will convert <unsigned>'%'<2^N> into a bit mask.        */
+/* Making the Tx ring too large decreases the effectiveness of channel  */
+/* bonding and packet priority.                                         */
+/* There are no ill effects from too-large receive rings.               */
+// 88-12-9 modify,
+// #define TX_RING_SIZE    16
+// #define RX_RING_SIZE    32
+#define TX_RING_SIZE    6
+#define RX_RING_SIZE    12
+#define TX_TOTAL_SIZE	TX_RING_SIZE*sizeof(struct fealnx_desc)
+#define RX_TOTAL_SIZE	RX_RING_SIZE*sizeof(struct fealnx_desc)
+
+/* Operational parameters that usually are not changed. */
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT      (2*HZ)
+
+#define PKT_BUF_SZ      1536	/* Size of each temporary Rx buffer. */
+
+
+/* Include files, designed to support most kernel versions 2.0.0 and later. */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+
+#include <asm/processor.h>	/* Processor type for cache alignment. */
+#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <asm/byteorder.h>
+
+/* This driver was written to use PCI memory space, however some x86 systems
+   work only with I/O space accesses. */
+#ifndef __alpha__
+#define USE_IO_OPS
+#endif
+
+/* Kernel compatibility defines, some common to David Hinds' PCMCIA package. */
+/* This is only in the support-all-kernels source code. */
+
+#define RUN_AT(x) (jiffies + (x))
+
+MODULE_AUTHOR("Myson or whoever");
+MODULE_DESCRIPTION("Myson MTD-8xx 100/10M Ethernet PCI Adapter Driver");
+MODULE_LICENSE("GPL");
+module_param(max_interrupt_work, int, 0);
+module_param(debug, int, 0);
+module_param(rx_copybreak, int, 0);
+module_param(multicast_filter_limit, int, 0);
+module_param_array(options, int, NULL, 0);
+module_param_array(full_duplex, int, NULL, 0);
+MODULE_PARM_DESC(max_interrupt_work, "fealnx maximum events handled per interrupt");
+MODULE_PARM_DESC(debug, "fealnx enable debugging (0-1)");
+MODULE_PARM_DESC(rx_copybreak, "fealnx copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(multicast_filter_limit, "fealnx maximum number of filtered multicast addresses");
+MODULE_PARM_DESC(options, "fealnx: Bits 0-3: media type, bit 17: full duplex");
+MODULE_PARM_DESC(full_duplex, "fealnx full duplex setting(s) (1)");
+
+enum {
+	MIN_REGION_SIZE		= 136,
+};
+
+/* A chip capabilities table, matching the entries in pci_tbl[] above. */
+enum chip_capability_flags {
+	HAS_MII_XCVR,
+	HAS_CHIP_XCVR,
+};
+
+/* 89/6/13 add, */
+/* for different PHY */
+enum phy_type_flags {
+	MysonPHY = 1,
+	AhdocPHY = 2,
+	SeeqPHY = 3,
+	MarvellPHY = 4,
+	Myson981 = 5,
+	LevelOnePHY = 6,
+	OtherPHY = 10,
+};
+
+struct chip_info {
+	char *chip_name;
+	int flags;
+};
+
+static const struct chip_info skel_netdrv_tbl[] = {
+	{ "100/10M Ethernet PCI Adapter",	HAS_MII_XCVR },
+	{ "100/10M Ethernet PCI Adapter",	HAS_CHIP_XCVR },
+	{ "1000/100/10M Ethernet PCI Adapter",	HAS_MII_XCVR },
+};
+
+/* Offsets to the Command and Status Registers. */
+enum fealnx_offsets {
+	PAR0 = 0x0,		/* physical address 0-3 */
+	PAR1 = 0x04,		/* physical address 4-5 */
+	MAR0 = 0x08,		/* multicast address 0-3 */
+	MAR1 = 0x0C,		/* multicast address 4-7 */
+	FAR0 = 0x10,		/* flow-control address 0-3 */
+	FAR1 = 0x14,		/* flow-control address 4-5 */
+	TCRRCR = 0x18,		/* receive & transmit configuration */
+	BCR = 0x1C,		/* bus command */
+	TXPDR = 0x20,		/* transmit polling demand */
+	RXPDR = 0x24,		/* receive polling demand */
+	RXCWP = 0x28,		/* receive current word pointer */
+	TXLBA = 0x2C,		/* transmit list base address */
+	RXLBA = 0x30,		/* receive list base address */
+	ISR = 0x34,		/* interrupt status */
+	IMR = 0x38,		/* interrupt mask */
+	FTH = 0x3C,		/* flow control high/low threshold */
+	MANAGEMENT = 0x40,	/* bootrom/eeprom and mii management */
+	TALLY = 0x44,		/* tally counters for crc and mpa */
+	TSR = 0x48,		/* tally counter for transmit status */
+	BMCRSR = 0x4c,		/* basic mode control and status */
+	PHYIDENTIFIER = 0x50,	/* phy identifier */
+	ANARANLPAR = 0x54,	/* auto-negotiation advertisement and link
+				   partner ability */
+	ANEROCR = 0x58,		/* auto-negotiation expansion and pci conf. */
+	BPREMRPSR = 0x5c,	/* bypass & receive error mask and phy status */
+};
+
+/* Bits in the interrupt status/enable registers. */
+/* The bits in the Intr Status/Enable registers, mostly interrupt sources. */
+enum intr_status_bits {
+	RFCON = 0x00020000,	/* receive flow control xon packet */
+	RFCOFF = 0x00010000,	/* receive flow control xoff packet */
+	LSCStatus = 0x00008000,	/* link status change */
+	ANCStatus = 0x00004000,	/* autonegotiation completed */
+	FBE = 0x00002000,	/* fatal bus error */
+	FBEMask = 0x00001800,	/* mask bit12-11 */
+	ParityErr = 0x00000000,	/* parity error */
+	TargetErr = 0x00001000,	/* target abort */
+	MasterErr = 0x00000800,	/* master error */
+	TUNF = 0x00000400,	/* transmit underflow */
+	ROVF = 0x00000200,	/* receive overflow */
+	ETI = 0x00000100,	/* transmit early int */
+	ERI = 0x00000080,	/* receive early int */
+	CNTOVF = 0x00000040,	/* counter overflow */
+	RBU = 0x00000020,	/* receive buffer unavailable */
+	TBU = 0x00000010,	/* transmit buffer unavilable */
+	TI = 0x00000008,	/* transmit interrupt */
+	RI = 0x00000004,	/* receive interrupt */
+	RxErr = 0x00000002,	/* receive error */
+};
+
+/* Bits in the NetworkConfig register, W for writing, R for reading */
+/* FIXME: some names are invented by me. Marked with (name?) */
+/* If you have docs and know bit names, please fix 'em */
+enum rx_mode_bits {
+	CR_W_ENH	= 0x02000000,	/* enhanced mode (name?) */
+	CR_W_FD		= 0x00100000,	/* full duplex */
+	CR_W_PS10	= 0x00080000,	/* 10 mbit */
+	CR_W_TXEN	= 0x00040000,	/* tx enable (name?) */
+	CR_W_PS1000	= 0x00010000,	/* 1000 mbit */
+     /* CR_W_RXBURSTMASK= 0x00000e00, Im unsure about this */
+	CR_W_RXMODEMASK	= 0x000000e0,
+	CR_W_PROM	= 0x00000080,	/* promiscuous mode */
+	CR_W_AB		= 0x00000040,	/* accept broadcast */
+	CR_W_AM		= 0x00000020,	/* accept mutlicast */
+	CR_W_ARP	= 0x00000008,	/* receive runt pkt */
+	CR_W_ALP	= 0x00000004,	/* receive long pkt */
+	CR_W_SEP	= 0x00000002,	/* receive error pkt */
+	CR_W_RXEN	= 0x00000001,	/* rx enable (unicast?) (name?) */
+
+	CR_R_TXSTOP	= 0x04000000,	/* tx stopped (name?) */
+	CR_R_FD		= 0x00100000,	/* full duplex detected */
+	CR_R_PS10	= 0x00080000,	/* 10 mbit detected */
+	CR_R_RXSTOP	= 0x00008000,	/* rx stopped (name?) */
+};
+
+/* The Tulip Rx and Tx buffer descriptors. */
+struct fealnx_desc {
+	s32 status;
+	s32 control;
+	u32 buffer;
+	u32 next_desc;
+	struct fealnx_desc *next_desc_logical;
+	struct sk_buff *skbuff;
+	u32 reserved1;
+	u32 reserved2;
+};
+
+/* Bits in network_desc.status */
+enum rx_desc_status_bits {
+	RXOWN = 0x80000000,	/* own bit */
+	FLNGMASK = 0x0fff0000,	/* frame length */
+	FLNGShift = 16,
+	MARSTATUS = 0x00004000,	/* multicast address received */
+	BARSTATUS = 0x00002000,	/* broadcast address received */
+	PHYSTATUS = 0x00001000,	/* physical address received */
+	RXFSD = 0x00000800,	/* first descriptor */
+	RXLSD = 0x00000400,	/* last descriptor */
+	ErrorSummary = 0x80,	/* error summary */
+	RUNTPKT = 0x40,		/* runt packet received */
+	LONGPKT = 0x20,		/* long packet received */
+	FAE = 0x10,		/* frame align error */
+	CRC = 0x08,		/* crc error */
+	RXER = 0x04,		/* receive error */
+};
+
+enum rx_desc_control_bits {
+	RXIC = 0x00800000,	/* interrupt control */
+	RBSShift = 0,
+};
+
+enum tx_desc_status_bits {
+	TXOWN = 0x80000000,	/* own bit */
+	JABTO = 0x00004000,	/* jabber timeout */
+	CSL = 0x00002000,	/* carrier sense lost */
+	LC = 0x00001000,	/* late collision */
+	EC = 0x00000800,	/* excessive collision */
+	UDF = 0x00000400,	/* fifo underflow */
+	DFR = 0x00000200,	/* deferred */
+	HF = 0x00000100,	/* heartbeat fail */
+	NCRMask = 0x000000ff,	/* collision retry count */
+	NCRShift = 0,
+};
+
+enum tx_desc_control_bits {
+	TXIC = 0x80000000,	/* interrupt control */
+	ETIControl = 0x40000000,	/* early transmit interrupt */
+	TXLD = 0x20000000,	/* last descriptor */
+	TXFD = 0x10000000,	/* first descriptor */
+	CRCEnable = 0x08000000,	/* crc control */
+	PADEnable = 0x04000000,	/* padding control */
+	RetryTxLC = 0x02000000,	/* retry late collision */
+	PKTSMask = 0x3ff800,	/* packet size bit21-11 */
+	PKTSShift = 11,
+	TBSMask = 0x000007ff,	/* transmit buffer bit 10-0 */
+	TBSShift = 0,
+};
+
+/* BootROM/EEPROM/MII Management Register */
+#define MASK_MIIR_MII_READ       0x00000000
+#define MASK_MIIR_MII_WRITE      0x00000008
+#define MASK_MIIR_MII_MDO        0x00000004
+#define MASK_MIIR_MII_MDI        0x00000002
+#define MASK_MIIR_MII_MDC        0x00000001
+
+/* ST+OP+PHYAD+REGAD+TA */
+#define OP_READ             0x6000	/* ST:01+OP:10+PHYAD+REGAD+TA:Z0 */
+#define OP_WRITE            0x5002	/* ST:01+OP:01+PHYAD+REGAD+TA:10 */
+
+/* ------------------------------------------------------------------------- */
+/*      Constants for Myson PHY                                              */
+/* ------------------------------------------------------------------------- */
+#define MysonPHYID      0xd0000302
+/* 89-7-27 add, (begin) */
+#define MysonPHYID0     0x0302
+#define StatusRegister  18
+#define SPEED100        0x0400	// bit10
+#define FULLMODE        0x0800	// bit11
+/* 89-7-27 add, (end) */
+
+/* ------------------------------------------------------------------------- */
+/*      Constants for Seeq 80225 PHY                                         */
+/* ------------------------------------------------------------------------- */
+#define SeeqPHYID0      0x0016
+
+#define MIIRegister18   18
+#define SPD_DET_100     0x80
+#define DPLX_DET_FULL   0x40
+
+/* ------------------------------------------------------------------------- */
+/*      Constants for Ahdoc 101 PHY                                          */
+/* ------------------------------------------------------------------------- */
+#define AhdocPHYID0     0x0022
+
+#define DiagnosticReg   18
+#define DPLX_FULL       0x0800
+#define Speed_100       0x0400
+
+/* 89/6/13 add, */
+/* -------------------------------------------------------------------------- */
+/*      Constants                                                             */
+/* -------------------------------------------------------------------------- */
+#define MarvellPHYID0           0x0141
+#define LevelOnePHYID0		0x0013
+
+#define MII1000BaseTControlReg  9
+#define MII1000BaseTStatusReg   10
+#define SpecificReg		17
+
+/* for 1000BaseT Control Register */
+#define PHYAbletoPerform1000FullDuplex  0x0200
+#define PHYAbletoPerform1000HalfDuplex  0x0100
+#define PHY1000AbilityMask              0x300
+
+// for phy specific status register, marvell phy.
+#define SpeedMask       0x0c000
+#define Speed_1000M     0x08000
+#define Speed_100M      0x4000
+#define Speed_10M       0
+#define Full_Duplex     0x2000
+
+// 89/12/29 add, for phy specific status register, levelone phy, (begin)
+#define LXT1000_100M    0x08000
+#define LXT1000_1000M   0x0c000
+#define LXT1000_Full    0x200
+// 89/12/29 add, for phy specific status register, levelone phy, (end)
+
+/* for 3-in-1 case, BMCRSR register */
+#define LinkIsUp2	0x00040000
+
+/* for PHY */
+#define LinkIsUp        0x0004
+
+
+struct netdev_private {
+	/* Descriptor rings first for alignment. */
+	struct fealnx_desc *rx_ring;
+	struct fealnx_desc *tx_ring;
+
+	dma_addr_t rx_ring_dma;
+	dma_addr_t tx_ring_dma;
+
+	spinlock_t lock;
+
+	/* Media monitoring timer. */
+	struct timer_list timer;
+
+	/* Reset timer */
+	struct timer_list reset_timer;
+	int reset_timer_armed;
+	unsigned long crvalue_sv;
+	unsigned long imrvalue_sv;
+
+	/* Frequently used values: keep some adjacent for cache effect. */
+	int flags;
+	struct pci_dev *pci_dev;
+	unsigned long crvalue;
+	unsigned long bcrvalue;
+	unsigned long imrvalue;
+	struct fealnx_desc *cur_rx;
+	struct fealnx_desc *lack_rxbuf;
+	int really_rx_count;
+	struct fealnx_desc *cur_tx;
+	struct fealnx_desc *cur_tx_copy;
+	int really_tx_count;
+	int free_tx_count;
+	unsigned int rx_buf_sz;	/* Based on MTU+slack. */
+
+	/* These values are keep track of the transceiver/media in use. */
+	unsigned int linkok;
+	unsigned int line_speed;
+	unsigned int duplexmode;
+	unsigned int default_port:4;	/* Last dev->if_port value. */
+	unsigned int PHYType;
+
+	/* MII transceiver section. */
+	int mii_cnt;		/* MII device addresses. */
+	unsigned char phys[2];	/* MII device addresses. */
+	struct mii_if_info mii;
+	void __iomem *mem;
+};
+
+
+static int mdio_read(struct net_device *dev, int phy_id, int location);
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
+static int netdev_open(struct net_device *dev);
+static void getlinktype(struct net_device *dev);
+static void getlinkstatus(struct net_device *dev);
+static void netdev_timer(struct timer_list *t);
+static void reset_timer(struct timer_list *t);
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue);
+static void init_ring(struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
+static irqreturn_t intr_handler(int irq, void *dev_instance);
+static int netdev_rx(struct net_device *dev);
+static void set_rx_mode(struct net_device *dev);
+static void __set_rx_mode(struct net_device *dev);
+static struct net_device_stats *get_stats(struct net_device *dev);
+static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static const struct ethtool_ops netdev_ethtool_ops;
+static int netdev_close(struct net_device *dev);
+static void reset_rx_descriptors(struct net_device *dev);
+static void reset_tx_descriptors(struct net_device *dev);
+
+static void stop_nic_rx(void __iomem *ioaddr, long crvalue)
+{
+	int delay = 0x1000;
+	iowrite32(crvalue & ~(CR_W_RXEN), ioaddr + TCRRCR);
+	while (--delay) {
+		if ( (ioread32(ioaddr + TCRRCR) & CR_R_RXSTOP) == CR_R_RXSTOP)
+			break;
+	}
+}
+
+
+static void stop_nic_rxtx(void __iomem *ioaddr, long crvalue)
+{
+	int delay = 0x1000;
+	iowrite32(crvalue & ~(CR_W_RXEN+CR_W_TXEN), ioaddr + TCRRCR);
+	while (--delay) {
+		if ( (ioread32(ioaddr + TCRRCR) & (CR_R_RXSTOP+CR_R_TXSTOP))
+					    == (CR_R_RXSTOP+CR_R_TXSTOP) )
+			break;
+	}
+}
+
+static const struct net_device_ops netdev_ops = {
+	.ndo_open		= netdev_open,
+	.ndo_stop		= netdev_close,
+	.ndo_start_xmit		= start_tx,
+	.ndo_get_stats 		= get_stats,
+	.ndo_set_rx_mode	= set_rx_mode,
+	.ndo_eth_ioctl		= mii_ioctl,
+	.ndo_tx_timeout		= fealnx_tx_timeout,
+	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int fealnx_init_one(struct pci_dev *pdev,
+			   const struct pci_device_id *ent)
+{
+	struct netdev_private *np;
+	int i, option, err, irq;
+	static int card_idx = -1;
+	char boardname[12];
+	void __iomem *ioaddr;
+	unsigned long len;
+	unsigned int chip_id = ent->driver_data;
+	struct net_device *dev;
+	void *ring_space;
+	dma_addr_t ring_dma;
+	u8 addr[ETH_ALEN];
+#ifdef USE_IO_OPS
+	int bar = 0;
+#else
+	int bar = 1;
+#endif
+
+	card_idx++;
+	sprintf(boardname, "fealnx%d", card_idx);
+
+	option = card_idx < MAX_UNITS ? options[card_idx] : 0;
+
+	i = pci_enable_device(pdev);
+	if (i) return i;
+	pci_set_master(pdev);
+
+	len = pci_resource_len(pdev, bar);
+	if (len < MIN_REGION_SIZE) {
+		dev_err(&pdev->dev,
+			   "region size %ld too small, aborting\n", len);
+		return -ENODEV;
+	}
+
+	i = pci_request_regions(pdev, boardname);
+	if (i)
+		return i;
+
+	irq = pdev->irq;
+
+	ioaddr = pci_iomap(pdev, bar, len);
+	if (!ioaddr) {
+		err = -ENOMEM;
+		goto err_out_res;
+	}
+
+	dev = alloc_etherdev(sizeof(struct netdev_private));
+	if (!dev) {
+		err = -ENOMEM;
+		goto err_out_unmap;
+	}
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	/* read ethernet id */
+	for (i = 0; i < 6; ++i)
+		addr[i] = ioread8(ioaddr + PAR0 + i);
+	eth_hw_addr_set(dev, addr);
+
+	/* Reset the chip to erase previous misconfiguration. */
+	iowrite32(0x00000001, ioaddr + BCR);
+
+	/* Make certain the descriptor lists are aligned. */
+	np = netdev_priv(dev);
+	np->mem = ioaddr;
+	spin_lock_init(&np->lock);
+	np->pci_dev = pdev;
+	np->flags = skel_netdrv_tbl[chip_id].flags;
+	pci_set_drvdata(pdev, dev);
+	np->mii.dev = dev;
+	np->mii.mdio_read = mdio_read;
+	np->mii.mdio_write = mdio_write;
+	np->mii.phy_id_mask = 0x1f;
+	np->mii.reg_num_mask = 0x1f;
+
+	ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, &ring_dma,
+					GFP_KERNEL);
+	if (!ring_space) {
+		err = -ENOMEM;
+		goto err_out_free_dev;
+	}
+	np->rx_ring = ring_space;
+	np->rx_ring_dma = ring_dma;
+
+	ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, &ring_dma,
+					GFP_KERNEL);
+	if (!ring_space) {
+		err = -ENOMEM;
+		goto err_out_free_rx;
+	}
+	np->tx_ring = ring_space;
+	np->tx_ring_dma = ring_dma;
+
+	/* find the connected MII xcvrs */
+	if (np->flags == HAS_MII_XCVR) {
+		int phy, phy_idx = 0;
+
+		for (phy = 1; phy < 32 && phy_idx < ARRAY_SIZE(np->phys);
+			       phy++) {
+			int mii_status = mdio_read(dev, phy, 1);
+
+			if (mii_status != 0xffff && mii_status != 0x0000) {
+				np->phys[phy_idx++] = phy;
+				dev_info(&pdev->dev,
+				       "MII PHY found at address %d, status "
+				       "0x%4.4x.\n", phy, mii_status);
+				/* get phy type */
+				{
+					unsigned int data;
+
+					data = mdio_read(dev, np->phys[0], 2);
+					if (data == SeeqPHYID0)
+						np->PHYType = SeeqPHY;
+					else if (data == AhdocPHYID0)
+						np->PHYType = AhdocPHY;
+					else if (data == MarvellPHYID0)
+						np->PHYType = MarvellPHY;
+					else if (data == MysonPHYID0)
+						np->PHYType = Myson981;
+					else if (data == LevelOnePHYID0)
+						np->PHYType = LevelOnePHY;
+					else
+						np->PHYType = OtherPHY;
+				}
+			}
+		}
+
+		np->mii_cnt = phy_idx;
+		if (phy_idx == 0)
+			dev_warn(&pdev->dev,
+				"MII PHY not found -- this device may "
+			       "not operate correctly.\n");
+	} else {
+		np->phys[0] = 32;
+/* 89/6/23 add, (begin) */
+		/* get phy type */
+		if (ioread32(ioaddr + PHYIDENTIFIER) == MysonPHYID)
+			np->PHYType = MysonPHY;
+		else
+			np->PHYType = OtherPHY;
+	}
+	np->mii.phy_id = np->phys[0];
+
+	if (dev->mem_start)
+		option = dev->mem_start;
+
+	/* The lower four bits are the media type. */
+	if (option > 0) {
+		if (option & 0x200)
+			np->mii.full_duplex = 1;
+		np->default_port = option & 15;
+	}
+
+	if (card_idx < MAX_UNITS && full_duplex[card_idx] > 0)
+		np->mii.full_duplex = full_duplex[card_idx];
+
+	if (np->mii.full_duplex) {
+		dev_info(&pdev->dev, "Media type forced to Full Duplex.\n");
+/* 89/6/13 add, (begin) */
+//      if (np->PHYType==MarvellPHY)
+		if ((np->PHYType == MarvellPHY) || (np->PHYType == LevelOnePHY)) {
+			unsigned int data;
+
+			data = mdio_read(dev, np->phys[0], 9);
+			data = (data & 0xfcff) | 0x0200;
+			mdio_write(dev, np->phys[0], 9, data);
+		}
+/* 89/6/13 add, (end) */
+		if (np->flags == HAS_MII_XCVR)
+			mdio_write(dev, np->phys[0], MII_ADVERTISE, ADVERTISE_FULL);
+		else
+			iowrite32(ADVERTISE_FULL, ioaddr + ANARANLPAR);
+		np->mii.force_media = 1;
+	}
+
+	dev->netdev_ops = &netdev_ops;
+	dev->ethtool_ops = &netdev_ethtool_ops;
+	dev->watchdog_timeo = TX_TIMEOUT;
+
+	err = register_netdev(dev);
+	if (err)
+		goto err_out_free_tx;
+
+	printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n",
+	       dev->name, skel_netdrv_tbl[chip_id].chip_name, ioaddr,
+	       dev->dev_addr, irq);
+
+	return 0;
+
+err_out_free_tx:
+	dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring,
+			  np->tx_ring_dma);
+err_out_free_rx:
+	dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring,
+			  np->rx_ring_dma);
+err_out_free_dev:
+	free_netdev(dev);
+err_out_unmap:
+	pci_iounmap(pdev, ioaddr);
+err_out_res:
+	pci_release_regions(pdev);
+	return err;
+}
+
+
+static void fealnx_remove_one(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+
+	if (dev) {
+		struct netdev_private *np = netdev_priv(dev);
+
+		dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring,
+				  np->tx_ring_dma);
+		dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring,
+				  np->rx_ring_dma);
+		unregister_netdev(dev);
+		pci_iounmap(pdev, np->mem);
+		free_netdev(dev);
+		pci_release_regions(pdev);
+	} else
+		printk(KERN_ERR "fealnx: remove for unknown device\n");
+}
+
+
+static ulong m80x_send_cmd_to_phy(void __iomem *miiport, int opcode, int phyad, int regad)
+{
+	ulong miir;
+	int i;
+	unsigned int mask, data;
+
+	/* enable MII output */
+	miir = (ulong) ioread32(miiport);
+	miir &= 0xfffffff0;
+
+	miir |= MASK_MIIR_MII_WRITE + MASK_MIIR_MII_MDO;
+
+	/* send 32 1's preamble */
+	for (i = 0; i < 32; i++) {
+		/* low MDC; MDO is already high (miir) */
+		miir &= ~MASK_MIIR_MII_MDC;
+		iowrite32(miir, miiport);
+
+		/* high MDC */
+		miir |= MASK_MIIR_MII_MDC;
+		iowrite32(miir, miiport);
+	}
+
+	/* calculate ST+OP+PHYAD+REGAD+TA */
+	data = opcode | (phyad << 7) | (regad << 2);
+
+	/* sent out */
+	mask = 0x8000;
+	while (mask) {
+		/* low MDC, prepare MDO */
+		miir &= ~(MASK_MIIR_MII_MDC + MASK_MIIR_MII_MDO);
+		if (mask & data)
+			miir |= MASK_MIIR_MII_MDO;
+
+		iowrite32(miir, miiport);
+		/* high MDC */
+		miir |= MASK_MIIR_MII_MDC;
+		iowrite32(miir, miiport);
+		udelay(30);
+
+		/* next */
+		mask >>= 1;
+		if (mask == 0x2 && opcode == OP_READ)
+			miir &= ~MASK_MIIR_MII_WRITE;
+	}
+	return miir;
+}
+
+
+static int mdio_read(struct net_device *dev, int phyad, int regad)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *miiport = np->mem + MANAGEMENT;
+	ulong miir;
+	unsigned int mask, data;
+
+	miir = m80x_send_cmd_to_phy(miiport, OP_READ, phyad, regad);
+
+	/* read data */
+	mask = 0x8000;
+	data = 0;
+	while (mask) {
+		/* low MDC */
+		miir &= ~MASK_MIIR_MII_MDC;
+		iowrite32(miir, miiport);
+
+		/* read MDI */
+		miir = ioread32(miiport);
+		if (miir & MASK_MIIR_MII_MDI)
+			data |= mask;
+
+		/* high MDC, and wait */
+		miir |= MASK_MIIR_MII_MDC;
+		iowrite32(miir, miiport);
+		udelay(30);
+
+		/* next */
+		mask >>= 1;
+	}
+
+	/* low MDC */
+	miir &= ~MASK_MIIR_MII_MDC;
+	iowrite32(miir, miiport);
+
+	return data & 0xffff;
+}
+
+
+static void mdio_write(struct net_device *dev, int phyad, int regad, int data)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *miiport = np->mem + MANAGEMENT;
+	ulong miir;
+	unsigned int mask;
+
+	miir = m80x_send_cmd_to_phy(miiport, OP_WRITE, phyad, regad);
+
+	/* write data */
+	mask = 0x8000;
+	while (mask) {
+		/* low MDC, prepare MDO */
+		miir &= ~(MASK_MIIR_MII_MDC + MASK_MIIR_MII_MDO);
+		if (mask & data)
+			miir |= MASK_MIIR_MII_MDO;
+		iowrite32(miir, miiport);
+
+		/* high MDC */
+		miir |= MASK_MIIR_MII_MDC;
+		iowrite32(miir, miiport);
+
+		/* next */
+		mask >>= 1;
+	}
+
+	/* low MDC */
+	miir &= ~MASK_MIIR_MII_MDC;
+	iowrite32(miir, miiport);
+}
+
+
+static int netdev_open(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+	const int irq = np->pci_dev->irq;
+	int rc, i;
+
+	iowrite32(0x00000001, ioaddr + BCR);	/* Reset */
+
+	rc = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev);
+	if (rc)
+		return -EAGAIN;
+
+	for (i = 0; i < 3; i++)
+		iowrite16(((const unsigned short *)dev->dev_addr)[i],
+				ioaddr + PAR0 + i*2);
+
+	init_ring(dev);
+
+	iowrite32(np->rx_ring_dma, ioaddr + RXLBA);
+	iowrite32(np->tx_ring_dma, ioaddr + TXLBA);
+
+	/* Initialize other registers. */
+	/* Configure the PCI bus bursts and FIFO thresholds.
+	   486: Set 8 longword burst.
+	   586: no burst limit.
+	   Burst length 5:3
+	   0 0 0   1
+	   0 0 1   4
+	   0 1 0   8
+	   0 1 1   16
+	   1 0 0   32
+	   1 0 1   64
+	   1 1 0   128
+	   1 1 1   256
+	   Wait the specified 50 PCI cycles after a reset by initializing
+	   Tx and Rx queues and the address filter list.
+	   FIXME (Ueimor): optimistic for alpha + posted writes ? */
+
+	np->bcrvalue = 0x10;	/* little-endian, 8 burst length */
+#ifdef __BIG_ENDIAN
+	np->bcrvalue |= 0x04;	/* big-endian */
+#endif
+
+#if defined(__i386__) && !defined(MODULE) && !defined(CONFIG_UML)
+	if (boot_cpu_data.x86 <= 4)
+		np->crvalue = 0xa00;
+	else
+#endif
+		np->crvalue = 0xe00;	/* rx 128 burst length */
+
+
+// 89/12/29 add,
+// 90/1/16 modify,
+//   np->imrvalue=FBE|TUNF|CNTOVF|RBU|TI|RI;
+	np->imrvalue = TUNF | CNTOVF | RBU | TI | RI;
+	if (np->pci_dev->device == 0x891) {
+		np->bcrvalue |= 0x200;	/* set PROG bit */
+		np->crvalue |= CR_W_ENH;	/* set enhanced bit */
+		np->imrvalue |= ETI;
+	}
+	iowrite32(np->bcrvalue, ioaddr + BCR);
+
+	if (dev->if_port == 0)
+		dev->if_port = np->default_port;
+
+	iowrite32(0, ioaddr + RXPDR);
+// 89/9/1 modify,
+//   np->crvalue = 0x00e40001;    /* tx store and forward, tx/rx enable */
+	np->crvalue |= 0x00e40001;	/* tx store and forward, tx/rx enable */
+	np->mii.full_duplex = np->mii.force_media;
+	getlinkstatus(dev);
+	if (np->linkok)
+		getlinktype(dev);
+	__set_rx_mode(dev);
+
+	netif_start_queue(dev);
+
+	/* Clear and Enable interrupts by setting the interrupt mask. */
+	iowrite32(FBE | TUNF | CNTOVF | RBU | TI | RI, ioaddr + ISR);
+	iowrite32(np->imrvalue, ioaddr + IMR);
+
+	if (debug)
+		printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name);
+
+	/* Set the timer to check for link beat. */
+	timer_setup(&np->timer, netdev_timer, 0);
+	np->timer.expires = RUN_AT(3 * HZ);
+
+	/* timer handler */
+	add_timer(&np->timer);
+
+	timer_setup(&np->reset_timer, reset_timer, 0);
+	np->reset_timer_armed = 0;
+	return rc;
+}
+
+
+static void getlinkstatus(struct net_device *dev)
+/* function: Routine will read MII Status Register to get link status.       */
+/* input   : dev... pointer to the adapter block.                            */
+/* output  : none.                                                           */
+{
+	struct netdev_private *np = netdev_priv(dev);
+	unsigned int i, DelayTime = 0x1000;
+
+	np->linkok = 0;
+
+	if (np->PHYType == MysonPHY) {
+		for (i = 0; i < DelayTime; ++i) {
+			if (ioread32(np->mem + BMCRSR) & LinkIsUp2) {
+				np->linkok = 1;
+				return;
+			}
+			udelay(100);
+		}
+	} else {
+		for (i = 0; i < DelayTime; ++i) {
+			if (mdio_read(dev, np->phys[0], MII_BMSR) & BMSR_LSTATUS) {
+				np->linkok = 1;
+				return;
+			}
+			udelay(100);
+		}
+	}
+}
+
+
+static void getlinktype(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+
+	if (np->PHYType == MysonPHY) {	/* 3-in-1 case */
+		if (ioread32(np->mem + TCRRCR) & CR_R_FD)
+			np->duplexmode = 2;	/* full duplex */
+		else
+			np->duplexmode = 1;	/* half duplex */
+		if (ioread32(np->mem + TCRRCR) & CR_R_PS10)
+			np->line_speed = 1;	/* 10M */
+		else
+			np->line_speed = 2;	/* 100M */
+	} else {
+		if (np->PHYType == SeeqPHY) {	/* this PHY is SEEQ 80225 */
+			unsigned int data;
+
+			data = mdio_read(dev, np->phys[0], MIIRegister18);
+			if (data & SPD_DET_100)
+				np->line_speed = 2;	/* 100M */
+			else
+				np->line_speed = 1;	/* 10M */
+			if (data & DPLX_DET_FULL)
+				np->duplexmode = 2;	/* full duplex mode */
+			else
+				np->duplexmode = 1;	/* half duplex mode */
+		} else if (np->PHYType == AhdocPHY) {
+			unsigned int data;
+
+			data = mdio_read(dev, np->phys[0], DiagnosticReg);
+			if (data & Speed_100)
+				np->line_speed = 2;	/* 100M */
+			else
+				np->line_speed = 1;	/* 10M */
+			if (data & DPLX_FULL)
+				np->duplexmode = 2;	/* full duplex mode */
+			else
+				np->duplexmode = 1;	/* half duplex mode */
+		}
+/* 89/6/13 add, (begin) */
+		else if (np->PHYType == MarvellPHY) {
+			unsigned int data;
+
+			data = mdio_read(dev, np->phys[0], SpecificReg);
+			if (data & Full_Duplex)
+				np->duplexmode = 2;	/* full duplex mode */
+			else
+				np->duplexmode = 1;	/* half duplex mode */
+			data &= SpeedMask;
+			if (data == Speed_1000M)
+				np->line_speed = 3;	/* 1000M */
+			else if (data == Speed_100M)
+				np->line_speed = 2;	/* 100M */
+			else
+				np->line_speed = 1;	/* 10M */
+		}
+/* 89/6/13 add, (end) */
+/* 89/7/27 add, (begin) */
+		else if (np->PHYType == Myson981) {
+			unsigned int data;
+
+			data = mdio_read(dev, np->phys[0], StatusRegister);
+
+			if (data & SPEED100)
+				np->line_speed = 2;
+			else
+				np->line_speed = 1;
+
+			if (data & FULLMODE)
+				np->duplexmode = 2;
+			else
+				np->duplexmode = 1;
+		}
+/* 89/7/27 add, (end) */
+/* 89/12/29 add */
+		else if (np->PHYType == LevelOnePHY) {
+			unsigned int data;
+
+			data = mdio_read(dev, np->phys[0], SpecificReg);
+			if (data & LXT1000_Full)
+				np->duplexmode = 2;	/* full duplex mode */
+			else
+				np->duplexmode = 1;	/* half duplex mode */
+			data &= SpeedMask;
+			if (data == LXT1000_1000M)
+				np->line_speed = 3;	/* 1000M */
+			else if (data == LXT1000_100M)
+				np->line_speed = 2;	/* 100M */
+			else
+				np->line_speed = 1;	/* 10M */
+		}
+		np->crvalue &= (~CR_W_PS10) & (~CR_W_FD) & (~CR_W_PS1000);
+		if (np->line_speed == 1)
+			np->crvalue |= CR_W_PS10;
+		else if (np->line_speed == 3)
+			np->crvalue |= CR_W_PS1000;
+		if (np->duplexmode == 2)
+			np->crvalue |= CR_W_FD;
+	}
+}
+
+
+/* Take lock before calling this */
+static void allocate_rx_buffers(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+
+	/*  allocate skb for rx buffers */
+	while (np->really_rx_count != RX_RING_SIZE) {
+		struct sk_buff *skb;
+
+		skb = netdev_alloc_skb(dev, np->rx_buf_sz);
+		if (skb == NULL)
+			break;	/* Better luck next round. */
+
+		while (np->lack_rxbuf->skbuff)
+			np->lack_rxbuf = np->lack_rxbuf->next_desc_logical;
+
+		np->lack_rxbuf->skbuff = skb;
+		np->lack_rxbuf->buffer = dma_map_single(&np->pci_dev->dev,
+							skb->data,
+							np->rx_buf_sz,
+							DMA_FROM_DEVICE);
+		np->lack_rxbuf->status = RXOWN;
+		++np->really_rx_count;
+	}
+}
+
+
+static void netdev_timer(struct timer_list *t)
+{
+	struct netdev_private *np = from_timer(np, t, timer);
+	struct net_device *dev = np->mii.dev;
+	void __iomem *ioaddr = np->mem;
+	int old_crvalue = np->crvalue;
+	unsigned int old_linkok = np->linkok;
+	unsigned long flags;
+
+	if (debug)
+		printk(KERN_DEBUG "%s: Media selection timer tick, status %8.8x "
+		       "config %8.8x.\n", dev->name, ioread32(ioaddr + ISR),
+		       ioread32(ioaddr + TCRRCR));
+
+	spin_lock_irqsave(&np->lock, flags);
+
+	if (np->flags == HAS_MII_XCVR) {
+		getlinkstatus(dev);
+		if ((old_linkok == 0) && (np->linkok == 1)) {	/* we need to detect the media type again */
+			getlinktype(dev);
+			if (np->crvalue != old_crvalue) {
+				stop_nic_rxtx(ioaddr, np->crvalue);
+				iowrite32(np->crvalue, ioaddr + TCRRCR);
+			}
+		}
+	}
+
+	allocate_rx_buffers(dev);
+
+	spin_unlock_irqrestore(&np->lock, flags);
+
+	np->timer.expires = RUN_AT(10 * HZ);
+	add_timer(&np->timer);
+}
+
+
+/* Take lock before calling */
+/* Reset chip and disable rx, tx and interrupts */
+static void reset_and_disable_rxtx(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+	int delay=51;
+
+	/* Reset the chip's Tx and Rx processes. */
+	stop_nic_rxtx(ioaddr, 0);
+
+	/* Disable interrupts by clearing the interrupt mask. */
+	iowrite32(0, ioaddr + IMR);
+
+	/* Reset the chip to erase previous misconfiguration. */
+	iowrite32(0x00000001, ioaddr + BCR);
+
+	/* Ueimor: wait for 50 PCI cycles (and flush posted writes btw).
+	   We surely wait too long (address+data phase). Who cares? */
+	while (--delay) {
+		ioread32(ioaddr + BCR);
+		rmb();
+	}
+}
+
+
+/* Take lock before calling */
+/* Restore chip after reset */
+static void enable_rxtx(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+
+	reset_rx_descriptors(dev);
+
+	iowrite32(np->tx_ring_dma + ((char*)np->cur_tx - (char*)np->tx_ring),
+		ioaddr + TXLBA);
+	iowrite32(np->rx_ring_dma + ((char*)np->cur_rx - (char*)np->rx_ring),
+		ioaddr + RXLBA);
+
+	iowrite32(np->bcrvalue, ioaddr + BCR);
+
+	iowrite32(0, ioaddr + RXPDR);
+	__set_rx_mode(dev); /* changes np->crvalue, writes it into TCRRCR */
+
+	/* Clear and Enable interrupts by setting the interrupt mask. */
+	iowrite32(FBE | TUNF | CNTOVF | RBU | TI | RI, ioaddr + ISR);
+	iowrite32(np->imrvalue, ioaddr + IMR);
+
+	iowrite32(0, ioaddr + TXPDR);
+}
+
+
+static void reset_timer(struct timer_list *t)
+{
+	struct netdev_private *np = from_timer(np, t, reset_timer);
+	struct net_device *dev = np->mii.dev;
+	unsigned long flags;
+
+	printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name);
+
+	spin_lock_irqsave(&np->lock, flags);
+	np->crvalue = np->crvalue_sv;
+	np->imrvalue = np->imrvalue_sv;
+
+	reset_and_disable_rxtx(dev);
+	/* works for me without this:
+	reset_tx_descriptors(dev); */
+	enable_rxtx(dev);
+	netif_start_queue(dev); /* FIXME: or netif_wake_queue(dev); ? */
+
+	np->reset_timer_armed = 0;
+
+	spin_unlock_irqrestore(&np->lock, flags);
+}
+
+
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+	unsigned long flags;
+	int i;
+
+	printk(KERN_WARNING
+	       "%s: Transmit timed out, status %8.8x, resetting...\n",
+	       dev->name, ioread32(ioaddr + ISR));
+
+	{
+		printk(KERN_DEBUG "  Rx ring %p: ", np->rx_ring);
+		for (i = 0; i < RX_RING_SIZE; i++)
+			printk(KERN_CONT " %8.8x",
+			       (unsigned int) np->rx_ring[i].status);
+		printk(KERN_CONT "\n");
+		printk(KERN_DEBUG "  Tx ring %p: ", np->tx_ring);
+		for (i = 0; i < TX_RING_SIZE; i++)
+			printk(KERN_CONT " %4.4x", np->tx_ring[i].status);
+		printk(KERN_CONT "\n");
+	}
+
+	spin_lock_irqsave(&np->lock, flags);
+
+	reset_and_disable_rxtx(dev);
+	reset_tx_descriptors(dev);
+	enable_rxtx(dev);
+
+	spin_unlock_irqrestore(&np->lock, flags);
+
+	netif_trans_update(dev); /* prevent tx timeout */
+	dev->stats.tx_errors++;
+	netif_wake_queue(dev); /* or .._start_.. ?? */
+}
+
+
+/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+static void init_ring(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	int i;
+
+	/* initialize rx variables */
+	np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
+	np->cur_rx = &np->rx_ring[0];
+	np->lack_rxbuf = np->rx_ring;
+	np->really_rx_count = 0;
+
+	/* initial rx descriptors. */
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		np->rx_ring[i].status = 0;
+		np->rx_ring[i].control = np->rx_buf_sz << RBSShift;
+		np->rx_ring[i].next_desc = np->rx_ring_dma +
+			(i + 1)*sizeof(struct fealnx_desc);
+		np->rx_ring[i].next_desc_logical = &np->rx_ring[i + 1];
+		np->rx_ring[i].skbuff = NULL;
+	}
+
+	/* for the last rx descriptor */
+	np->rx_ring[i - 1].next_desc = np->rx_ring_dma;
+	np->rx_ring[i - 1].next_desc_logical = np->rx_ring;
+
+	/* allocate skb for rx buffers */
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz);
+
+		if (skb == NULL) {
+			np->lack_rxbuf = &np->rx_ring[i];
+			break;
+		}
+
+		++np->really_rx_count;
+		np->rx_ring[i].skbuff = skb;
+		np->rx_ring[i].buffer = dma_map_single(&np->pci_dev->dev,
+						       skb->data,
+						       np->rx_buf_sz,
+						       DMA_FROM_DEVICE);
+		np->rx_ring[i].status = RXOWN;
+		np->rx_ring[i].control |= RXIC;
+	}
+
+	/* initialize tx variables */
+	np->cur_tx = &np->tx_ring[0];
+	np->cur_tx_copy = &np->tx_ring[0];
+	np->really_tx_count = 0;
+	np->free_tx_count = TX_RING_SIZE;
+
+	for (i = 0; i < TX_RING_SIZE; i++) {
+		np->tx_ring[i].status = 0;
+		/* do we need np->tx_ring[i].control = XXX; ?? */
+		np->tx_ring[i].next_desc = np->tx_ring_dma +
+			(i + 1)*sizeof(struct fealnx_desc);
+		np->tx_ring[i].next_desc_logical = &np->tx_ring[i + 1];
+		np->tx_ring[i].skbuff = NULL;
+	}
+
+	/* for the last tx descriptor */
+	np->tx_ring[i - 1].next_desc = np->tx_ring_dma;
+	np->tx_ring[i - 1].next_desc_logical = &np->tx_ring[0];
+}
+
+
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&np->lock, flags);
+
+	np->cur_tx_copy->skbuff = skb;
+
+#define one_buffer
+#define BPT 1022
+#if defined(one_buffer)
+	np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev, skb->data,
+						 skb->len, DMA_TO_DEVICE);
+	np->cur_tx_copy->control = TXIC | TXLD | TXFD | CRCEnable | PADEnable;
+	np->cur_tx_copy->control |= (skb->len << PKTSShift);	/* pkt size */
+	np->cur_tx_copy->control |= (skb->len << TBSShift);	/* buffer size */
+// 89/12/29 add,
+	if (np->pci_dev->device == 0x891)
+		np->cur_tx_copy->control |= ETIControl | RetryTxLC;
+	np->cur_tx_copy->status = TXOWN;
+	np->cur_tx_copy = np->cur_tx_copy->next_desc_logical;
+	--np->free_tx_count;
+#elif defined(two_buffer)
+	if (skb->len > BPT) {
+		struct fealnx_desc *next;
+
+		/* for the first descriptor */
+		np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev,
+							 skb->data, BPT,
+							 DMA_TO_DEVICE);
+		np->cur_tx_copy->control = TXIC | TXFD | CRCEnable | PADEnable;
+		np->cur_tx_copy->control |= (skb->len << PKTSShift);	/* pkt size */
+		np->cur_tx_copy->control |= (BPT << TBSShift);	/* buffer size */
+
+		/* for the last descriptor */
+		next = np->cur_tx_copy->next_desc_logical;
+		next->skbuff = skb;
+		next->control = TXIC | TXLD | CRCEnable | PADEnable;
+		next->control |= (skb->len << PKTSShift);	/* pkt size */
+		next->control |= ((skb->len - BPT) << TBSShift);	/* buf size */
+// 89/12/29 add,
+		if (np->pci_dev->device == 0x891)
+			np->cur_tx_copy->control |= ETIControl | RetryTxLC;
+		next->buffer = dma_map_single(&ep->pci_dev->dev,
+					      skb->data + BPT, skb->len - BPT,
+					      DMA_TO_DEVICE);
+
+		next->status = TXOWN;
+		np->cur_tx_copy->status = TXOWN;
+
+		np->cur_tx_copy = next->next_desc_logical;
+		np->free_tx_count -= 2;
+	} else {
+		np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev,
+							 skb->data, skb->len,
+							 DMA_TO_DEVICE);
+		np->cur_tx_copy->control = TXIC | TXLD | TXFD | CRCEnable | PADEnable;
+		np->cur_tx_copy->control |= (skb->len << PKTSShift);	/* pkt size */
+		np->cur_tx_copy->control |= (skb->len << TBSShift);	/* buffer size */
+// 89/12/29 add,
+		if (np->pci_dev->device == 0x891)
+			np->cur_tx_copy->control |= ETIControl | RetryTxLC;
+		np->cur_tx_copy->status = TXOWN;
+		np->cur_tx_copy = np->cur_tx_copy->next_desc_logical;
+		--np->free_tx_count;
+	}
+#endif
+
+	if (np->free_tx_count < 2)
+		netif_stop_queue(dev);
+	++np->really_tx_count;
+	iowrite32(0, np->mem + TXPDR);
+
+	spin_unlock_irqrestore(&np->lock, flags);
+	return NETDEV_TX_OK;
+}
+
+
+/* Take lock before calling */
+/* Chip probably hosed tx ring. Clean up. */
+static void reset_tx_descriptors(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	struct fealnx_desc *cur;
+	int i;
+
+	/* initialize tx variables */
+	np->cur_tx = &np->tx_ring[0];
+	np->cur_tx_copy = &np->tx_ring[0];
+	np->really_tx_count = 0;
+	np->free_tx_count = TX_RING_SIZE;
+
+	for (i = 0; i < TX_RING_SIZE; i++) {
+		cur = &np->tx_ring[i];
+		if (cur->skbuff) {
+			dma_unmap_single(&np->pci_dev->dev, cur->buffer,
+					 cur->skbuff->len, DMA_TO_DEVICE);
+			dev_kfree_skb_any(cur->skbuff);
+			cur->skbuff = NULL;
+		}
+		cur->status = 0;
+		cur->control = 0;	/* needed? */
+		/* probably not needed. We do it for purely paranoid reasons */
+		cur->next_desc = np->tx_ring_dma +
+			(i + 1)*sizeof(struct fealnx_desc);
+		cur->next_desc_logical = &np->tx_ring[i + 1];
+	}
+	/* for the last tx descriptor */
+	np->tx_ring[TX_RING_SIZE - 1].next_desc = np->tx_ring_dma;
+	np->tx_ring[TX_RING_SIZE - 1].next_desc_logical = &np->tx_ring[0];
+}
+
+
+/* Take lock and stop rx before calling this */
+static void reset_rx_descriptors(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	struct fealnx_desc *cur = np->cur_rx;
+	int i;
+
+	allocate_rx_buffers(dev);
+
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		if (cur->skbuff)
+			cur->status = RXOWN;
+		cur = cur->next_desc_logical;
+	}
+
+	iowrite32(np->rx_ring_dma + ((char*)np->cur_rx - (char*)np->rx_ring),
+		np->mem + RXLBA);
+}
+
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+static irqreturn_t intr_handler(int irq, void *dev_instance)
+{
+	struct net_device *dev = (struct net_device *) dev_instance;
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+	long boguscnt = max_interrupt_work;
+	unsigned int num_tx = 0;
+	int handled = 0;
+
+	spin_lock(&np->lock);
+
+	iowrite32(0, ioaddr + IMR);
+
+	do {
+		u32 intr_status = ioread32(ioaddr + ISR);
+
+		/* Acknowledge all of the current interrupt sources ASAP. */
+		iowrite32(intr_status, ioaddr + ISR);
+
+		if (debug)
+			printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", dev->name,
+			       intr_status);
+
+		if (!(intr_status & np->imrvalue))
+			break;
+
+		handled = 1;
+
+// 90/1/16 delete,
+//
+//      if (intr_status & FBE)
+//      {   /* fatal error */
+//          stop_nic_tx(ioaddr, 0);
+//          stop_nic_rx(ioaddr, 0);
+//          break;
+//      };
+
+		if (intr_status & TUNF)
+			iowrite32(0, ioaddr + TXPDR);
+
+		if (intr_status & CNTOVF) {
+			/* missed pkts */
+			dev->stats.rx_missed_errors +=
+				ioread32(ioaddr + TALLY) & 0x7fff;
+
+			/* crc error */
+			dev->stats.rx_crc_errors +=
+			    (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16;
+		}
+
+		if (intr_status & (RI | RBU)) {
+			if (intr_status & RI)
+				netdev_rx(dev);
+			else {
+				stop_nic_rx(ioaddr, np->crvalue);
+				reset_rx_descriptors(dev);
+				iowrite32(np->crvalue, ioaddr + TCRRCR);
+			}
+		}
+
+		while (np->really_tx_count) {
+			long tx_status = np->cur_tx->status;
+			long tx_control = np->cur_tx->control;
+
+			if (!(tx_control & TXLD)) {	/* this pkt is combined by two tx descriptors */
+				struct fealnx_desc *next;
+
+				next = np->cur_tx->next_desc_logical;
+				tx_status = next->status;
+				tx_control = next->control;
+			}
+
+			if (tx_status & TXOWN)
+				break;
+
+			if (!(np->crvalue & CR_W_ENH)) {
+				if (tx_status & (CSL | LC | EC | UDF | HF)) {
+					dev->stats.tx_errors++;
+					if (tx_status & EC)
+						dev->stats.tx_aborted_errors++;
+					if (tx_status & CSL)
+						dev->stats.tx_carrier_errors++;
+					if (tx_status & LC)
+						dev->stats.tx_window_errors++;
+					if (tx_status & UDF)
+						dev->stats.tx_fifo_errors++;
+					if ((tx_status & HF) && np->mii.full_duplex == 0)
+						dev->stats.tx_heartbeat_errors++;
+
+				} else {
+					dev->stats.tx_bytes +=
+					    ((tx_control & PKTSMask) >> PKTSShift);
+
+					dev->stats.collisions +=
+					    ((tx_status & NCRMask) >> NCRShift);
+					dev->stats.tx_packets++;
+				}
+			} else {
+				dev->stats.tx_bytes +=
+				    ((tx_control & PKTSMask) >> PKTSShift);
+				dev->stats.tx_packets++;
+			}
+
+			/* Free the original skb. */
+			dma_unmap_single(&np->pci_dev->dev,
+					 np->cur_tx->buffer,
+					 np->cur_tx->skbuff->len,
+					 DMA_TO_DEVICE);
+			dev_consume_skb_irq(np->cur_tx->skbuff);
+			np->cur_tx->skbuff = NULL;
+			--np->really_tx_count;
+			if (np->cur_tx->control & TXLD) {
+				np->cur_tx = np->cur_tx->next_desc_logical;
+				++np->free_tx_count;
+			} else {
+				np->cur_tx = np->cur_tx->next_desc_logical;
+				np->cur_tx = np->cur_tx->next_desc_logical;
+				np->free_tx_count += 2;
+			}
+			num_tx++;
+		}		/* end of for loop */
+
+		if (num_tx && np->free_tx_count >= 2)
+			netif_wake_queue(dev);
+
+		/* read transmit status for enhanced mode only */
+		if (np->crvalue & CR_W_ENH) {
+			long data;
+
+			data = ioread32(ioaddr + TSR);
+			dev->stats.tx_errors += (data & 0xff000000) >> 24;
+			dev->stats.tx_aborted_errors +=
+				(data & 0xff000000) >> 24;
+			dev->stats.tx_window_errors +=
+				(data & 0x00ff0000) >> 16;
+			dev->stats.collisions += (data & 0x0000ffff);
+		}
+
+		if (--boguscnt < 0) {
+			printk(KERN_WARNING "%s: Too much work at interrupt, "
+			       "status=0x%4.4x.\n", dev->name, intr_status);
+			if (!np->reset_timer_armed) {
+				np->reset_timer_armed = 1;
+				np->reset_timer.expires = RUN_AT(HZ/2);
+				add_timer(&np->reset_timer);
+				stop_nic_rxtx(ioaddr, 0);
+				netif_stop_queue(dev);
+				/* or netif_tx_disable(dev); ?? */
+				/* Prevent other paths from enabling tx,rx,intrs */
+				np->crvalue_sv = np->crvalue;
+				np->imrvalue_sv = np->imrvalue;
+				np->crvalue &= ~(CR_W_TXEN | CR_W_RXEN); /* or simply = 0? */
+				np->imrvalue = 0;
+			}
+
+			break;
+		}
+	} while (1);
+
+	/* read the tally counters */
+	/* missed pkts */
+	dev->stats.rx_missed_errors += ioread32(ioaddr + TALLY) & 0x7fff;
+
+	/* crc error */
+	dev->stats.rx_crc_errors +=
+		(ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16;
+
+	if (debug)
+		printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
+		       dev->name, ioread32(ioaddr + ISR));
+
+	iowrite32(np->imrvalue, ioaddr + IMR);
+
+	spin_unlock(&np->lock);
+
+	return IRQ_RETVAL(handled);
+}
+
+
+/* This routine is logically part of the interrupt handler, but separated
+   for clarity and better register allocation. */
+static int netdev_rx(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+
+	/* If EOP is set on the next entry, it's a new packet. Send it up. */
+	while (!(np->cur_rx->status & RXOWN) && np->cur_rx->skbuff) {
+		s32 rx_status = np->cur_rx->status;
+
+		if (np->really_rx_count == 0)
+			break;
+
+		if (debug)
+			printk(KERN_DEBUG "  netdev_rx() status was %8.8x.\n", rx_status);
+
+		if ((!((rx_status & RXFSD) && (rx_status & RXLSD))) ||
+		    (rx_status & ErrorSummary)) {
+			if (rx_status & ErrorSummary) {	/* there was a fatal error */
+				if (debug)
+					printk(KERN_DEBUG
+					       "%s: Receive error, Rx status %8.8x.\n",
+					       dev->name, rx_status);
+
+				dev->stats.rx_errors++;	/* end of a packet. */
+				if (rx_status & (LONGPKT | RUNTPKT))
+					dev->stats.rx_length_errors++;
+				if (rx_status & RXER)
+					dev->stats.rx_frame_errors++;
+				if (rx_status & CRC)
+					dev->stats.rx_crc_errors++;
+			} else {
+				int need_to_reset = 0;
+				int desno = 0;
+
+				if (rx_status & RXFSD) {	/* this pkt is too long, over one rx buffer */
+					struct fealnx_desc *cur;
+
+					/* check this packet is received completely? */
+					cur = np->cur_rx;
+					while (desno <= np->really_rx_count) {
+						++desno;
+						if ((!(cur->status & RXOWN)) &&
+						    (cur->status & RXLSD))
+							break;
+						/* goto next rx descriptor */
+						cur = cur->next_desc_logical;
+					}
+					if (desno > np->really_rx_count)
+						need_to_reset = 1;
+				} else	/* RXLSD did not find, something error */
+					need_to_reset = 1;
+
+				if (need_to_reset == 0) {
+					int i;
+
+					dev->stats.rx_length_errors++;
+
+					/* free all rx descriptors related this long pkt */
+					for (i = 0; i < desno; ++i) {
+						if (!np->cur_rx->skbuff) {
+							printk(KERN_DEBUG
+								"%s: I'm scared\n", dev->name);
+							break;
+						}
+						np->cur_rx->status = RXOWN;
+						np->cur_rx = np->cur_rx->next_desc_logical;
+					}
+					continue;
+				} else {        /* rx error, need to reset this chip */
+					stop_nic_rx(ioaddr, np->crvalue);
+					reset_rx_descriptors(dev);
+					iowrite32(np->crvalue, ioaddr + TCRRCR);
+				}
+				break;	/* exit the while loop */
+			}
+		} else {	/* this received pkt is ok */
+
+			struct sk_buff *skb;
+			/* Omit the four octet CRC from the length. */
+			short pkt_len = ((rx_status & FLNGMASK) >> FLNGShift) - 4;
+
+#ifndef final_version
+			if (debug)
+				printk(KERN_DEBUG "  netdev_rx() normal Rx pkt length %d"
+				       " status %x.\n", pkt_len, rx_status);
+#endif
+
+			/* Check if the packet is long enough to accept without copying
+			   to a minimally-sized skbuff. */
+			if (pkt_len < rx_copybreak &&
+			    (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
+				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+				dma_sync_single_for_cpu(&np->pci_dev->dev,
+							np->cur_rx->buffer,
+							np->rx_buf_sz,
+							DMA_FROM_DEVICE);
+				/* Call copy + cksum if available. */
+
+#if ! defined(__alpha__)
+				skb_copy_to_linear_data(skb,
+					np->cur_rx->skbuff->data, pkt_len);
+				skb_put(skb, pkt_len);
+#else
+				skb_put_data(skb, np->cur_rx->skbuff->data,
+					     pkt_len);
+#endif
+				dma_sync_single_for_device(&np->pci_dev->dev,
+							   np->cur_rx->buffer,
+							   np->rx_buf_sz,
+							   DMA_FROM_DEVICE);
+			} else {
+				dma_unmap_single(&np->pci_dev->dev,
+						 np->cur_rx->buffer,
+						 np->rx_buf_sz,
+						 DMA_FROM_DEVICE);
+				skb_put(skb = np->cur_rx->skbuff, pkt_len);
+				np->cur_rx->skbuff = NULL;
+				--np->really_rx_count;
+			}
+			skb->protocol = eth_type_trans(skb, dev);
+			netif_rx(skb);
+			dev->stats.rx_packets++;
+			dev->stats.rx_bytes += pkt_len;
+		}
+
+		np->cur_rx = np->cur_rx->next_desc_logical;
+	}			/* end of while loop */
+
+	/*  allocate skb for rx buffers */
+	allocate_rx_buffers(dev);
+
+	return 0;
+}
+
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+
+	/* The chip only need report frame silently dropped. */
+	if (netif_running(dev)) {
+		dev->stats.rx_missed_errors +=
+			ioread32(ioaddr + TALLY) & 0x7fff;
+		dev->stats.rx_crc_errors +=
+			(ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16;
+	}
+
+	return &dev->stats;
+}
+
+
+/* for dev->set_multicast_list */
+static void set_rx_mode(struct net_device *dev)
+{
+	spinlock_t *lp = &((struct netdev_private *)netdev_priv(dev))->lock;
+	unsigned long flags;
+	spin_lock_irqsave(lp, flags);
+	__set_rx_mode(dev);
+	spin_unlock_irqrestore(lp, flags);
+}
+
+
+/* Take lock before calling */
+static void __set_rx_mode(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+	u32 mc_filter[2];	/* Multicast hash filter */
+	u32 rx_mode;
+
+	if (dev->flags & IFF_PROMISC) {	/* Set promiscuous. */
+		memset(mc_filter, 0xff, sizeof(mc_filter));
+		rx_mode = CR_W_PROM | CR_W_AB | CR_W_AM;
+	} else if ((netdev_mc_count(dev) > multicast_filter_limit) ||
+		   (dev->flags & IFF_ALLMULTI)) {
+		/* Too many to match, or accept all multicasts. */
+		memset(mc_filter, 0xff, sizeof(mc_filter));
+		rx_mode = CR_W_AB | CR_W_AM;
+	} else {
+		struct netdev_hw_addr *ha;
+
+		memset(mc_filter, 0, sizeof(mc_filter));
+		netdev_for_each_mc_addr(ha, dev) {
+			unsigned int bit;
+			bit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F;
+			mc_filter[bit >> 5] |= (1 << bit);
+		}
+		rx_mode = CR_W_AB | CR_W_AM;
+	}
+
+	stop_nic_rxtx(ioaddr, np->crvalue);
+
+	iowrite32(mc_filter[0], ioaddr + MAR0);
+	iowrite32(mc_filter[1], ioaddr + MAR1);
+	np->crvalue &= ~CR_W_RXMODEMASK;
+	np->crvalue |= rx_mode;
+	iowrite32(np->crvalue, ioaddr + TCRRCR);
+}
+
+static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	struct netdev_private *np = netdev_priv(dev);
+
+	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info));
+}
+
+static int netdev_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *cmd)
+{
+	struct netdev_private *np = netdev_priv(dev);
+
+	spin_lock_irq(&np->lock);
+	mii_ethtool_get_link_ksettings(&np->mii, cmd);
+	spin_unlock_irq(&np->lock);
+
+	return 0;
+}
+
+static int netdev_set_link_ksettings(struct net_device *dev,
+				     const struct ethtool_link_ksettings *cmd)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	int rc;
+
+	spin_lock_irq(&np->lock);
+	rc = mii_ethtool_set_link_ksettings(&np->mii, cmd);
+	spin_unlock_irq(&np->lock);
+
+	return rc;
+}
+
+static int netdev_nway_reset(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	return mii_nway_restart(&np->mii);
+}
+
+static u32 netdev_get_link(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	return mii_link_ok(&np->mii);
+}
+
+static u32 netdev_get_msglevel(struct net_device *dev)
+{
+	return debug;
+}
+
+static void netdev_set_msglevel(struct net_device *dev, u32 value)
+{
+	debug = value;
+}
+
+static const struct ethtool_ops netdev_ethtool_ops = {
+	.get_drvinfo		= netdev_get_drvinfo,
+	.nway_reset		= netdev_nway_reset,
+	.get_link		= netdev_get_link,
+	.get_msglevel		= netdev_get_msglevel,
+	.set_msglevel		= netdev_set_msglevel,
+	.get_link_ksettings	= netdev_get_link_ksettings,
+	.set_link_ksettings	= netdev_set_link_ksettings,
+};
+
+static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	int rc;
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	spin_lock_irq(&np->lock);
+	rc = generic_mii_ioctl(&np->mii, if_mii(rq), cmd, NULL);
+	spin_unlock_irq(&np->lock);
+
+	return rc;
+}
+
+
+static int netdev_close(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->mem;
+	int i;
+
+	netif_stop_queue(dev);
+
+	/* Disable interrupts by clearing the interrupt mask. */
+	iowrite32(0x0000, ioaddr + IMR);
+
+	/* Stop the chip's Tx and Rx processes. */
+	stop_nic_rxtx(ioaddr, 0);
+
+	del_timer_sync(&np->timer);
+	del_timer_sync(&np->reset_timer);
+
+	free_irq(np->pci_dev->irq, dev);
+
+	/* Free all the skbuffs in the Rx queue. */
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		struct sk_buff *skb = np->rx_ring[i].skbuff;
+
+		np->rx_ring[i].status = 0;
+		if (skb) {
+			dma_unmap_single(&np->pci_dev->dev,
+					 np->rx_ring[i].buffer, np->rx_buf_sz,
+					 DMA_FROM_DEVICE);
+			dev_kfree_skb(skb);
+			np->rx_ring[i].skbuff = NULL;
+		}
+	}
+
+	for (i = 0; i < TX_RING_SIZE; i++) {
+		struct sk_buff *skb = np->tx_ring[i].skbuff;
+
+		if (skb) {
+			dma_unmap_single(&np->pci_dev->dev,
+					 np->tx_ring[i].buffer, skb->len,
+					 DMA_TO_DEVICE);
+			dev_kfree_skb(skb);
+			np->tx_ring[i].skbuff = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static const struct pci_device_id fealnx_pci_tbl[] = {
+	{0x1516, 0x0800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x1516, 0x0803, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
+	{0x1516, 0x0891, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
+	{} /* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, fealnx_pci_tbl);
+
+
+static struct pci_driver fealnx_driver = {
+	.name		= "fealnx",
+	.id_table	= fealnx_pci_tbl,
+	.probe		= fealnx_init_one,
+	.remove		= fealnx_remove_one,
+};
+
+module_pci_driver(fealnx_driver);
-- 
cgit 


From 2aab4b96900272885bc157f8b236abf1cdc02e08 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 7 Mar 2023 16:45:30 +0000
Subject: af_unix: fix struct pid leaks in OOB support

syzbot reported struct pid leak [1].

Issue is that queue_oob() calls maybe_add_creds() which potentially
holds a reference on a pid.

But skb->destructor is not set (either directly or by calling
unix_scm_to_skb())

This means that subsequent kfree_skb() or consume_skb() would leak
this reference.

In this fix, I chose to fully support scm even for the OOB message.

[1]
BUG: memory leak
unreferenced object 0xffff8881053e7f80 (size 128):
comm "syz-executor242", pid 5066, jiffies 4294946079 (age 13.220s)
hex dump (first 32 bytes):
01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<ffffffff812ae26a>] alloc_pid+0x6a/0x560 kernel/pid.c:180
[<ffffffff812718df>] copy_process+0x169f/0x26c0 kernel/fork.c:2285
[<ffffffff81272b37>] kernel_clone+0xf7/0x610 kernel/fork.c:2684
[<ffffffff812730cc>] __do_sys_clone+0x7c/0xb0 kernel/fork.c:2825
[<ffffffff849ad699>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
[<ffffffff849ad699>] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
[<ffffffff84a0008b>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: 314001f0bf92 ("af_unix: Add OOB support")
Reported-by: syzbot+7699d9e5635c10253a27@syzkaller.appspotmail.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Rao Shoaib <rao.shoaib@oracle.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230307164530.771896-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/unix/af_unix.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 347122c3575e..0b0f18ecce44 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2105,7 +2105,8 @@ out:
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
-static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
+		     struct scm_cookie *scm, bool fds_sent)
 {
 	struct unix_sock *ousk = unix_sk(other);
 	struct sk_buff *skb;
@@ -2116,6 +2117,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
 	if (!skb)
 		return err;
 
+	err = unix_scm_to_skb(scm, skb, !fds_sent);
+	if (err < 0) {
+		kfree_skb(skb);
+		return err;
+	}
 	skb_put(skb, 1);
 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
 
@@ -2243,7 +2249,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 	if (msg->msg_flags & MSG_OOB) {
-		err = queue_oob(sock, msg, other);
+		err = queue_oob(sock, msg, other, &scm, fds_sent);
 		if (err)
 			goto out_err;
 		sent++;
-- 
cgit 


From 649c15c7691e9b13cbe9bf6c65c365350e056067 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Tue, 7 Mar 2023 14:37:07 -0300
Subject: net: avoid double iput when sock_alloc_file fails

When sock_alloc_file fails to allocate a file, it will call sock_release.
__sys_socket_file should then not call sock_release again, otherwise there
will be a double free.

[   89.319884] ------------[ cut here ]------------
[   89.320286] kernel BUG at fs/inode.c:1764!
[   89.320656] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[   89.321051] CPU: 7 PID: 125 Comm: iou-sqp-124 Not tainted 6.2.0+ #361
[   89.321535] RIP: 0010:iput+0x1ff/0x240
[   89.321808] Code: d1 83 e1 03 48 83 f9 02 75 09 48 81 fa 00 10 00 00 77 05 83 e2 01 75 1f 4c 89 ef e8 fb d2 ba 00 e9 80 fe ff ff c3 cc cc cc cc <0f> 0b 0f 0b e9 d0 fe ff ff 0f 0b eb 8d 49 8d b4 24 08 01 00 00 48
[   89.322760] RSP: 0018:ffffbdd60068bd50 EFLAGS: 00010202
[   89.323036] RAX: 0000000000000000 RBX: ffff9d7ad3cacac0 RCX: 0000000000001107
[   89.323412] RDX: 000000000003af00 RSI: 0000000000000000 RDI: ffff9d7ad3cacb40
[   89.323785] RBP: ffffbdd60068bd68 R08: ffffffffffffffff R09: ffffffffab606438
[   89.324157] R10: ffffffffacb3dfa0 R11: 6465686361657256 R12: ffff9d7ad3cacb40
[   89.324529] R13: 0000000080000001 R14: 0000000080000001 R15: 0000000000000002
[   89.324904] FS:  00007f7b28516740(0000) GS:ffff9d7aeb1c0000(0000) knlGS:0000000000000000
[   89.325328] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   89.325629] CR2: 00007f0af52e96c0 CR3: 0000000002a02006 CR4: 0000000000770ee0
[   89.326004] PKRU: 55555554
[   89.326161] Call Trace:
[   89.326298]  <TASK>
[   89.326419]  __sock_release+0xb5/0xc0
[   89.326632]  __sys_socket_file+0xb2/0xd0
[   89.326844]  io_socket+0x88/0x100
[   89.327039]  ? io_issue_sqe+0x6a/0x430
[   89.327258]  io_issue_sqe+0x67/0x430
[   89.327450]  io_submit_sqes+0x1fe/0x670
[   89.327661]  io_sq_thread+0x2e6/0x530
[   89.327859]  ? __pfx_autoremove_wake_function+0x10/0x10
[   89.328145]  ? __pfx_io_sq_thread+0x10/0x10
[   89.328367]  ret_from_fork+0x29/0x50
[   89.328576] RIP: 0033:0x0
[   89.328732] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
[   89.329073] RSP: 002b:0000000000000000 EFLAGS: 00000202 ORIG_RAX: 00000000000001a9
[   89.329477] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f7b28637a3d
[   89.329845] RDX: 00007fff4e4318a8 RSI: 00007fff4e4318b0 RDI: 0000000000000400
[   89.330216] RBP: 00007fff4e431830 R08: 00007fff4e431711 R09: 00007fff4e4318b0
[   89.330584] R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff4e441b38
[   89.330950] R13: 0000563835e3e725 R14: 0000563835e40d10 R15: 00007f7b28784040
[   89.331318]  </TASK>
[   89.331441] Modules linked in:
[   89.331617] ---[ end trace 0000000000000000 ]---

Fixes: da214a475f8b ("net: add __sys_socket_file()")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230307173707.468744-1-cascardo@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/socket.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 6bae8ce7059e..9c92c0e6c4da 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -450,7 +450,9 @@ static struct file_system_type sock_fs_type = {
  *
  *	Returns the &file bound with @sock, implicitly storing it
  *	in sock->file. If dname is %NULL, sets to "".
- *	On failure the return is a ERR pointer (see linux/err.h).
+ *
+ *	On failure @sock is released, and an ERR pointer is returned.
+ *
  *	This function uses GFP_KERNEL internally.
  */
 
@@ -1638,7 +1640,6 @@ static struct socket *__sys_socket_create(int family, int type, int protocol)
 struct file *__sys_socket_file(int family, int type, int protocol)
 {
 	struct socket *sock;
-	struct file *file;
 	int flags;
 
 	sock = __sys_socket_create(family, type, protocol);
@@ -1649,11 +1650,7 @@ struct file *__sys_socket_file(int family, int type, int protocol)
 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
-	file = sock_alloc_file(sock, flags, NULL);
-	if (IS_ERR(file))
-		sock_release(sock);
-
-	return file;
+	return sock_alloc_file(sock, flags, NULL);
 }
 
 int __sys_socket(int family, int type, int protocol)
-- 
cgit 


From 6517a60b0307abdcca80133c237551cc7cc8e6ae Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 7 Mar 2023 16:39:22 -0800
Subject: tools: ynl: move the enum classes to shared code

Move bulk of the EnumSet and EnumEntry code to shared
code for reuse by cli.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/__init__.py |   7 +--
 tools/net/ynl/lib/nlspec.py   |  96 +++++++++++++++++++++++++++++++++++++
 tools/net/ynl/ynl-gen-c.py    | 107 +++++++++---------------------------------
 3 files changed, 121 insertions(+), 89 deletions(-)

diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py
index a2cb8b16d6f1..4b3797fe784b 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/lib/__init__.py
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
-from .nlspec import SpecAttr, SpecAttrSet, SpecFamily, SpecOperation
+from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
+    SpecFamily, SpecOperation
 from .ynl import YnlFamily
 
-__all__ = ["SpecAttr", "SpecAttrSet", "SpecFamily", "SpecOperation",
-           "YnlFamily"]
+__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
+           "SpecFamily", "SpecOperation", "YnlFamily"]
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 0a2cfb5862aa..7c1cf6c1499e 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -57,6 +57,91 @@ class SpecElement:
         pass
 
 
+class SpecEnumEntry(SpecElement):
+    """ Entry within an enum declared in the Netlink spec.
+
+    Attributes:
+        doc         documentation string
+        enum_set    back reference to the enum
+        value       numerical value of this enum (use accessors in most situations!)
+
+    Methods:
+        raw_value   raw value, i.e. the id in the enum, unlike user value which is a mask for flags
+        user_value   user value, same as raw value for enums, for flags it's the mask
+    """
+    def __init__(self, enum_set, yaml, prev, value_start):
+        if isinstance(yaml, str):
+            yaml = {'name': yaml}
+        super().__init__(enum_set.family, yaml)
+
+        self.doc = yaml.get('doc', '')
+        self.enum_set = enum_set
+
+        if 'value' in yaml:
+            self.value = yaml['value']
+        elif prev:
+            self.value = prev.value + 1
+        else:
+            self.value = value_start
+
+    def has_doc(self):
+        return bool(self.doc)
+
+    def raw_value(self):
+        return self.value
+
+    def user_value(self):
+        if self.enum_set['type'] == 'flags':
+            return 1 << self.value
+        else:
+            return self.value
+
+
+class SpecEnumSet(SpecElement):
+    """ Enum type
+
+    Represents an enumeration (list of numerical constants)
+    as declared in the "definitions" section of the spec.
+
+    Attributes:
+        type          enum or flags
+        entries       entries by name
+    Methods:
+        get_mask      for flags compute the mask of all defined values
+    """
+    def __init__(self, family, yaml):
+        super().__init__(family, yaml)
+
+        self.type = yaml['type']
+
+        prev_entry = None
+        value_start = self.yaml.get('value-start', 0)
+        self.entries = dict()
+        for entry in self.yaml['entries']:
+            e = self.new_entry(entry, prev_entry, value_start)
+            self.entries[e.name] = e
+            prev_entry = e
+
+    def new_entry(self, entry, prev_entry, value_start):
+        return SpecEnumEntry(self, entry, prev_entry, value_start)
+
+    def has_doc(self):
+        if 'doc' in self.yaml:
+            return True
+        for entry in self.entries.values():
+            if entry.has_doc():
+                return True
+        return False
+
+    def get_mask(self):
+        mask = 0
+        idx = self.yaml.get('value-start', 0)
+        for _ in self.entries.values():
+            mask |= 1 << idx
+            idx += 1
+        return mask
+
+
 class SpecAttr(SpecElement):
     """ Single Netlink atttribute type
 
@@ -193,6 +278,7 @@ class SpecFamily(SpecElement):
         msgs       dict of all messages (index by name)
         msgs_by_value  dict of all messages (indexed by name)
         ops        dict of all valid requests / responses
+        consts     dict of all constants/enums
     """
     def __init__(self, spec_path, schema_path=None):
         with open(spec_path, "r") as stream:
@@ -222,6 +308,7 @@ class SpecFamily(SpecElement):
         self.req_by_value = collections.OrderedDict()
         self.rsp_by_value = collections.OrderedDict()
         self.ops = collections.OrderedDict()
+        self.consts = collections.OrderedDict()
 
         last_exception = None
         while len(self._resolution_list) > 0:
@@ -242,6 +329,9 @@ class SpecFamily(SpecElement):
             if len(resolved) == 0:
                 raise last_exception
 
+    def new_enum(self, elem):
+        return SpecEnumSet(self, elem)
+
     def new_attr_set(self, elem):
         return SpecAttrSet(self, elem)
 
@@ -296,6 +386,12 @@ class SpecFamily(SpecElement):
     def resolve(self):
         self.resolve_up(super())
 
+        for elem in self.yaml['definitions']:
+            if elem['type'] == 'enum' or elem['type'] == 'flags':
+                self.consts[elem['name']] = self.new_enum(elem)
+            else:
+                self.consts[elem['name']] = elem
+
         for elem in self.yaml['attribute-sets']:
             attr_set = self.new_attr_set(elem)
             self.attr_sets[elem['name']] = attr_set
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index c940ca834d3f..1bcc5354d800 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -6,7 +6,7 @@ import collections
 import os
 import yaml
 
-from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation
+from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
 
 
 def c_upper(name):
@@ -567,97 +567,37 @@ class Struct:
         self.inherited = [c_lower(x) for x in sorted(self._inherited)]
 
 
-class EnumEntry:
+class EnumEntry(SpecEnumEntry):
     def __init__(self, enum_set, yaml, prev, value_start):
-        if isinstance(yaml, str):
-            self.name = yaml
-            yaml = {}
-            self.doc = ''
-        else:
-            self.name = yaml['name']
-            self.doc = yaml.get('doc', '')
-
-        self.yaml = yaml
-        self.enum_set = enum_set
-        self.c_name = c_upper(enum_set.value_pfx + self.name)
-
-        if 'value' in yaml:
-            self.value = yaml['value']
-            if prev:
-                self.value_change = (self.value != prev.value + 1)
-        elif prev:
-            self.value_change = False
-            self.value = prev.value + 1
+        super().__init__(enum_set, yaml, prev, value_start)
+
+        if prev:
+            self.value_change = (self.value != prev.value + 1)
         else:
-            self.value = value_start
             self.value_change = (self.value != 0)
-
         self.value_change = self.value_change or self.enum_set['type'] == 'flags'
 
-    def __getitem__(self, key):
-        return self.yaml[key]
-
-    def __contains__(self, key):
-        return key in self.yaml
-
-    def has_doc(self):
-        return bool(self.doc)
+        # Added by resolve:
+        self.c_name = None
+        delattr(self, "c_name")
 
-    # raw value, i.e. the id in the enum, unlike user value which is a mask for flags
-    def raw_value(self):
-        return self.value
+    def resolve(self):
+        self.resolve_up(super())
 
-    # user value, same as raw value for enums, for flags it's the mask
-    def user_value(self):
-        if self.enum_set['type'] == 'flags':
-            return 1 << self.value
-        else:
-            return self.value
+        self.c_name = c_upper(self.enum_set.value_pfx + self.name)
 
 
-class EnumSet:
+class EnumSet(SpecEnumSet):
     def __init__(self, family, yaml):
-        self.yaml = yaml
-        self.family = family
-
         self.render_name = c_lower(family.name + '-' + yaml['name'])
         self.enum_name = 'enum ' + self.render_name
 
         self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-")
 
-        self.type = yaml['type']
-
-        prev_entry = None
-        value_start = self.yaml.get('value-start', 0)
-        self.entries = {}
-        self.entry_list = []
-        for entry in self.yaml['entries']:
-            e = EnumEntry(self, entry, prev_entry, value_start)
-            self.entries[e.name] = e
-            self.entry_list.append(e)
-            prev_entry = e
-
-    def __getitem__(self, key):
-        return self.yaml[key]
-
-    def __contains__(self, key):
-        return key in self.yaml
-
-    def has_doc(self):
-        if 'doc' in self.yaml:
-            return True
-        for entry in self.entry_list:
-            if entry.has_doc():
-                return True
-        return False
+        super().__init__(family, yaml)
 
-    def get_mask(self):
-        mask = 0
-        idx = self.yaml.get('value-start', 0)
-        for _ in self.entry_list:
-            mask |= 1 << idx
-            idx += 1
-        return mask
+    def new_entry(self, entry, prev_entry, value_start):
+        return EnumEntry(self, entry, prev_entry, value_start)
 
 
 class AttrSet(SpecAttrSet):
@@ -792,8 +732,6 @@ class Family(SpecFamily):
 
         self.mcgrps = self.yaml.get('mcast-groups', {'list': []})
 
-        self.consts = dict()
-
         self.hooks = dict()
         for when in ['pre', 'post']:
             self.hooks[when] = dict()
@@ -820,6 +758,9 @@ class Family(SpecFamily):
         if self.kernel_policy == 'global':
             self._load_global_policy()
 
+    def new_enum(self, elem):
+        return EnumSet(self, elem)
+
     def new_attr_set(self, elem):
         return AttrSet(self, elem)
 
@@ -837,12 +778,6 @@ class Family(SpecFamily):
                 }
 
     def _dictify(self):
-        for elem in self.yaml['definitions']:
-            if elem['type'] == 'enum' or elem['type'] == 'flags':
-                self.consts[elem['name']] = EnumSet(self, elem)
-            else:
-                self.consts[elem['name']] = elem
-
         ntf = []
         for msg in self.msgs.values():
             if 'notify' in msg:
@@ -1980,7 +1915,7 @@ def render_uapi(family, cw):
                 if 'doc' in enum:
                     doc = ' - ' + enum['doc']
                 cw.write_doc_line(enum.enum_name + doc)
-                for entry in enum.entry_list:
+                for entry in enum.entries.values():
                     if entry.has_doc():
                         doc = '@' + entry.c_name + ': ' + entry['doc']
                         cw.write_doc_line(doc)
@@ -1988,7 +1923,7 @@ def render_uapi(family, cw):
 
             uapi_enum_start(family, cw, const, 'name')
             name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-")
-            for entry in enum.entry_list:
+            for entry in enum.entries.values():
                 suffix = ','
                 if entry.value_change:
                     suffix = f" = {entry.user_value()}" + suffix
-- 
cgit 


From c311aaa74ca18bd0781d1d3bebd08484799f840c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 7 Mar 2023 16:39:23 -0800
Subject: tools: ynl: fix enum-as-flags in the generic CLI

Lorenzo points out that the generic CLI is broken for the netdev
family. When I added the support for documentation of enums
(and sparse enums) the client script was not updated.
It expects the values in enum to be a list of names,
now it can also be a dict (YAML object).

Reported-by: Lorenzo Bianconi <lorenzo@kernel.org>
Fixes: e4b48ed460d3 ("tools: ynl: add a completely generic client")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/nlspec.py | 7 +++++--
 tools/net/ynl/lib/ynl.py    | 9 ++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 7c1cf6c1499e..a34d088f6743 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -104,8 +104,9 @@ class SpecEnumSet(SpecElement):
     as declared in the "definitions" section of the spec.
 
     Attributes:
-        type          enum or flags
-        entries       entries by name
+        type            enum or flags
+        entries         entries by name
+        entries_by_val  entries by value
     Methods:
         get_mask      for flags compute the mask of all defined values
     """
@@ -117,9 +118,11 @@ class SpecEnumSet(SpecElement):
         prev_entry = None
         value_start = self.yaml.get('value-start', 0)
         self.entries = dict()
+        self.entries_by_val = dict()
         for entry in self.yaml['entries']:
             e = self.new_entry(entry, prev_entry, value_start)
             self.entries[e.name] = e
+            self.entries_by_val[e.raw_value()] = e
             prev_entry = e
 
     def new_entry(self, entry, prev_entry, value_start):
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index a842adc8e87e..90764a83c646 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -303,11 +303,6 @@ class YnlFamily(SpecFamily):
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1)
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1)
 
-        self._types = dict()
-
-        for elem in self.yaml.get('definitions', []):
-            self._types[elem['name']] = elem
-
         self.async_msg_ids = set()
         self.async_msg_queue = []
 
@@ -353,13 +348,13 @@ class YnlFamily(SpecFamily):
 
     def _decode_enum(self, rsp, attr_spec):
         raw = rsp[attr_spec['name']]
-        enum = self._types[attr_spec['enum']]
+        enum = self.consts[attr_spec['enum']]
         i = attr_spec.get('value-start', 0)
         if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']:
             value = set()
             while raw:
                 if raw & 1:
-                    value.add(enum['entries'][i])
+                    value.add(enum.entries_by_val[i].name)
                 raw >>= 1
                 i += 1
         else:
-- 
cgit