From a07484c083eabc809e45a198bd639bfd37ac70b6 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 22 Feb 2023 15:35:30 +0700 Subject: bpf, docs: Fix link to BTF doc Ross reported broken link to BTF documentation (Documentation/bpf/btf.rst) in Documentation/bpf/bpf_devel_QA.rst. The link in question is written using external link syntax, with the target refers to BTF doc in reST source (btf.rst), which doesn't exist in resulting HTML output. Fix the link by replacing external link syntax with simply writing out the target doc, which the link will be generated to the correct HTML doc target. Fixes: 6736aa793c2b5f ("selftests/bpf: Add general instructions for test execution") Reported-by: Ross Zwisler Signed-off-by: Bagas Sanjaya Signed-off-by: Daniel Borkmann Acked-by: Ross Zwisler Link: https://lore.kernel.org/linux-doc/Y++09LKx25dtR4Ow@google.com/ Link: https://lore.kernel.org/bpf/20230222083530.26136-1-bagasdotme@gmail.com --- Documentation/bpf/bpf_devel_QA.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 03d4993eda6f..715f7321020f 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -469,7 +469,7 @@ under test should match the config file fragment in tools/testing/selftests/bpf as closely as possible. Finally to ensure support for latest BPF Type Format features - -discussed in `Documentation/bpf/btf.rst`_ - pahole version 1.16 +discussed in Documentation/bpf/btf.rst - pahole version 1.16 is required for kernels built with CONFIG_DEBUG_INFO_BTF=y. pahole is delivered in the dwarves package or can be built from source at @@ -690,6 +690,5 @@ when: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ .. _Documentation/dev-tools/kselftest.rst: https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html -.. _Documentation/bpf/btf.rst: btf.rst Happy BPF hacking! -- cgit From 2d311f480b52eeb2e1fd432d64b78d82952c3808 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 26 Feb 2023 23:20:16 -0800 Subject: riscv, bpf: Fix patch_text implicit declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bpf_jit_comp64.c uses patch_text(), so add to it to prevent the build error: ../arch/riscv/net/bpf_jit_comp64.c: In function 'bpf_arch_text_poke': ../arch/riscv/net/bpf_jit_comp64.c:691:23: error: implicit declaration of function 'patch_text'; did you mean 'path_get'? [-Werror=implicit-function-declaration] 691 | ret = patch_text(ip, new_insns, ninsns); | ^~~~~~~~~~ Fixes: 596f2e6f9cf4 ("riscv, bpf: Add bpf_arch_text_poke support for RV64") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Acked-by: Pu Lehui Acked-by: Björn Töpel Link: https://lore.kernel.org/r/202302271000.Aj4nMXbZ-lkp@intel.com Link: https://lore.kernel.org/bpf/20230227072016.14618-1-rdunlap@infradead.org --- arch/riscv/net/bpf_jit_comp64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index f5a668736c79..acdc3f040195 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "bpf_jit.h" #define RV_REG_TCC RV_REG_A6 -- cgit From f99e6d7c4ed3be2531bd576425a5bd07fb133bd7 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 27 Feb 2023 10:11:56 +0100 Subject: bgmac: fix *initial* chip reset to support BCM5358 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While bringing hardware up we should perform a full reset including the switch bit (BGMAC_BCMA_IOCTL_SW_RESET aka SICF_SWRST). It's what specification says and what reference driver does. This seems to be critical for the BCM5358. Without this hardware doesn't get initialized properly and doesn't seem to transmit or receive any packets. Originally bgmac was calling bgmac_chip_reset() before setting "has_robosw" property which resulted in expected behaviour. That has changed as a side effect of adding platform device support which regressed BCM5358 support. Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support") Cc: Jon Mason Signed-off-by: Rafał Miłecki Reviewed-by: Leon Romanovsky Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230227091156.19509-1-zajec5@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bgmac.c | 8 ++++++-- drivers/net/ethernet/broadcom/bgmac.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 3038386a5afd..1761df8fb7f9 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac) if (iost & BGMAC_BCMA_IOST_ATTACHED) { flags = BGMAC_BCMA_IOCTL_SW_CLKEN; - if (!bgmac->has_robosw) + if (bgmac->in_init || !bgmac->has_robosw) flags |= BGMAC_BCMA_IOCTL_SW_RESET; } bgmac_clk_enable(bgmac, flags); } - if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) + if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw)) bgmac_idm_write(bgmac, BCMA_IOCTL, bgmac_idm_read(bgmac, BCMA_IOCTL) & ~BGMAC_BCMA_IOCTL_SW_RESET); @@ -1490,6 +1490,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) struct net_device *net_dev = bgmac->net_dev; int err; + bgmac->in_init = true; + bgmac_chip_intrs_off(bgmac); net_dev->irq = bgmac->irq; @@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) /* Omit FCS from max MTU size */ net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN; + bgmac->in_init = false; + err = register_netdev(bgmac->net_dev); if (err) { dev_err(bgmac->dev, "Cannot register net device\n"); diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index e05ac92c0650..d73ef262991d 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -472,6 +472,8 @@ struct bgmac { int irq; u32 int_mask; + bool in_init; + /* Current MAC state */ int mac_speed; int mac_duplex; -- cgit From 11f180a5d62a51b484e9648f9b310e1bd50b1a57 Mon Sep 17 00:00:00 2001 From: Kang Chen Date: Mon, 27 Feb 2023 17:30:37 +0800 Subject: nfc: fdp: add null check of devm_kmalloc_array in fdp_nci_i2c_read_device_properties devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause out-of-bounds write in device_property_read_u8_array later. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Kang Chen Reviewed-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com Signed-off-by: Paolo Abeni --- drivers/nfc/fdp/i2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index 2d53e0f88d2f..1e0f2297f9c6 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -247,6 +247,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, len, sizeof(**fw_vsc_cfg), GFP_KERNEL); + if (!*fw_vsc_cfg) + goto alloc_err; + r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME, *fw_vsc_cfg, len); @@ -260,6 +263,7 @@ vsc_read_err: *fw_vsc_cfg = NULL; } +alloc_err: dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s", *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no"); } -- cgit From 8f9850dd8d23c1290cb642ce9548a440da5771ec Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Feb 2023 13:03:22 +0300 Subject: net: phy: unlock on error in phy_probe() If genphy_c45_read_eee_adv() fails then we need to do a reset and unlock the &phydev->lock mutex before returning. Fixes: 3eeca4e199ce ("net: phy: do not force EEE support") Signed-off-by: Dan Carpenter Reviewed-by: Oleksij Rempel Link: https://lore.kernel.org/r/Y/x/6kHCjnQHqOpF@kili Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 3f8a64fb9d71..9e9fd8ff00f6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3146,7 +3146,7 @@ static int phy_probe(struct device *dev) */ err = genphy_c45_read_eee_adv(phydev, phydev->advertising_eee); if (err) - return err; + goto out; /* There is no "enabled" flag. If PHY is advertising, assume it is * kind of enabled. -- cgit From ae44f1c9d1fc54aeceb335fedb1e73b2c3ee4561 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 24 Feb 2023 17:59:39 +0200 Subject: powerpc: dts: t1040rdb: fix compatible string for Rev A boards It looks like U-Boot fails to start the kernel properly when the compatible string of the board isn't fsl,T1040RDB, so stop overriding it from the rev-a.dts. Fixes: 5ebb74749202 ("powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts index 73f8c998c64d..d4f5f159d6f2 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -10,7 +10,6 @@ / { model = "fsl,T1040RDB-REV-A"; - compatible = "fsl,T1040RDB-REV-A"; }; &seville_port0 { -- cgit From 8b322f9fdb35ecee0d4a40de8af923444bd624ea Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 24 Feb 2023 17:59:40 +0200 Subject: powerpc: dts: t1040rdb: enable both CPU ports Since commit eca70102cfb1 ("net: dsa: felix: add support for changing DSA master") included in kernel v6.1, the driver supports 2 CPU ports, and they can be put in a LAG, for example (see Documentation/networking/dsa/configuration.rst for more details). Defining the second CPU port in the device tree should not cause any compatibility issue, because the default CPU port was &seville_port8 before this change, and still is &seville_port8 now (the numerically first CPU port is used by default). Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- arch/powerpc/boot/dts/fsl/t1040rdb.dts | 5 ++++- arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index b6733e7e6580..dd3aab81e9de 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -180,6 +180,9 @@ }; &seville_port8 { - ethernet = <&enet0>; + status = "okay"; +}; + +&seville_port9 { status = "okay"; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index f58eb820eb5e..ad0ab33336b8 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -686,6 +686,7 @@ seville_port8: port@8 { reg = <8>; phy-mode = "internal"; + ethernet = <&enet0>; status = "disabled"; fixed-link { @@ -697,6 +698,7 @@ seville_port9: port@9 { reg = <9>; phy-mode = "internal"; + ethernet = <&enet1>; status = "disabled"; fixed-link { -- cgit From 4d42cd6bc2ac1b9be50ade13771daec90c9d18b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Feb 2023 10:12:01 -0800 Subject: tls: rx: fix return value for async crypto Gaurav reports that TLS Rx is broken with async crypto accelerators. The commit under fixes missed updating the retval byte counting logic when updating how records are stored. Even tho both before and after the change 'decrypted' was updated inside the main loop, it was completely overwritten when processing the async completions. Now that the rx_list only holds non-zero-copy records we need to add, not overwrite. Reported-and-bisected-by: Gaurav Jain Fixes: cbbdee9918a2 ("tls: rx: async: don't put async zc on the list") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217064 Tested-by: Gaurav Jain Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230227181201.1793772-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 782d3701b86f..021d760f9133 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2127,7 +2127,7 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted = max(err, 0); + decrypted += max(err, 0); } copied += decrypted; -- cgit From 880ce5f20033cd6ecb2c0edfe0376c9e45220012 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 14:17:06 +0000 Subject: net: avoid skb end_offset change in __skb_unclone_keeptruesize() Once initial skb->head has been allocated from skb_small_head_cache, we need to make sure to use the same strategy whenever skb->head has to be re-allocated, as found by syzbot [1] This means kmalloc_reserve() can not fallback from using skb_small_head_cache to generic (power-of-two) kmem caches. It seems that we probably want to rework things in the future, to partially revert following patch, because we no longer use ksize() for skb allocated in TX path. 2b88cba55883 ("net: preserve skb_end_offset() in skb_unclone_keeptruesize()") Ideally, TCP stack should never put payload in skb->head, this effort has to be completed. In the mean time, add a sanity check. [1] BUG: KASAN: invalid-free in slab_free mm/slub.c:3787 [inline] BUG: KASAN: invalid-free in kmem_cache_free+0xee/0x5c0 mm/slub.c:3809 Free of addr ffff88806cdee800 by task syz-executor239/5189 CPU: 0 PID: 5189 Comm: syz-executor239 Not tainted 6.2.0-rc8-syzkaller-02400-gd1fabc68f8e0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x15e/0x45d mm/kasan/report.c:417 kasan_report_invalid_free+0x9b/0x1b0 mm/kasan/report.c:482 ____kasan_slab_free+0x1a5/0x1c0 mm/kasan/common.c:216 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1807 slab_free mm/slub.c:3787 [inline] kmem_cache_free+0xee/0x5c0 mm/slub.c:3809 skb_kfree_head net/core/skbuff.c:857 [inline] skb_kfree_head net/core/skbuff.c:853 [inline] skb_free_head+0x16f/0x1a0 net/core/skbuff.c:872 skb_release_data+0x57a/0x820 net/core/skbuff.c:901 skb_release_all net/core/skbuff.c:966 [inline] __kfree_skb+0x4f/0x70 net/core/skbuff.c:980 tcp_wmem_free_skb include/net/tcp.h:302 [inline] tcp_rtx_queue_purge net/ipv4/tcp.c:3061 [inline] tcp_write_queue_purge+0x617/0xcf0 net/ipv4/tcp.c:3074 tcp_v4_destroy_sock+0x125/0x810 net/ipv4/tcp_ipv4.c:2302 inet_csk_destroy_sock+0x19a/0x440 net/ipv4/inet_connection_sock.c:1195 __tcp_close+0xb96/0xf50 net/ipv4/tcp.c:3021 tcp_close+0x2d/0xc0 net/ipv4/tcp.c:3033 inet_release+0x132/0x270 net/ipv4/af_inet.c:426 __sock_release+0xcd/0x280 net/socket.c:651 sock_close+0x1c/0x20 net/socket.c:1393 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x23c/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f2511f546c3 Code: c7 c2 c0 ff ff ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 RSP: 002b:00007ffef0103d48 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f2511f546c3 RDX: 0000000000000978 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000003434 R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffef0103d6c R13: 00007ffef0103d80 R14: 00007ffef0103dc0 R15: 0000000000000003 Allocated by task 5189: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] ____kasan_kmalloc mm/kasan/common.c:333 [inline] __kasan_kmalloc+0xa5/0xb0 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc_node_track_caller+0x5b/0xc0 mm/slab_common.c:988 kmalloc_reserve+0xf1/0x230 net/core/skbuff.c:539 pskb_expand_head+0x237/0x1160 net/core/skbuff.c:1995 __skb_unclone_keeptruesize+0x93/0x220 net/core/skbuff.c:2094 skb_unclone_keeptruesize include/linux/skbuff.h:1910 [inline] skb_prepare_for_shift net/core/skbuff.c:3804 [inline] skb_shift+0xef8/0x1e20 net/core/skbuff.c:3877 tcp_skb_shift net/ipv4/tcp_input.c:1538 [inline] tcp_shift_skb_data net/ipv4/tcp_input.c:1646 [inline] tcp_sacktag_walk+0x93b/0x18a0 net/ipv4/tcp_input.c:1713 tcp_sacktag_write_queue+0x1599/0x31d0 net/ipv4/tcp_input.c:1974 tcp_ack+0x2e9f/0x5a10 net/ipv4/tcp_input.c:3847 tcp_rcv_established+0x667/0x2230 net/ipv4/tcp_input.c:6006 tcp_v4_do_rcv+0x670/0x9b0 net/ipv4/tcp_ipv4.c:1721 sk_backlog_rcv include/net/sock.h:1113 [inline] __release_sock+0x133/0x3b0 net/core/sock.c:2921 release_sock+0x58/0x1b0 net/core/sock.c:3488 tcp_sendmsg+0x3a/0x50 net/ipv4/tcp.c:1485 inet_sendmsg+0x9d/0xe0 net/ipv4/af_inet.c:825 sock_sendmsg_nosec net/socket.c:722 [inline] sock_sendmsg+0xde/0x190 net/socket.c:745 sock_write_iter+0x295/0x3d0 net/socket.c:1136 call_write_iter include/linux/fs.h:2189 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff88806cdee800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 0 bytes inside of 1024-byte region [ffff88806cdee800, ffff88806cdeec00) The buggy address belongs to the physical page: page:ffffea0001b37a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x6cde8 head:ffffea0001b37a00 order:3 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 ffff888012441dc0 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1f2a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_MEMALLOC|__GFP_HARDWALL), pid 75, tgid 75 (kworker/u4:4), ts 96369578780, free_ts 26734162530 prep_new_page mm/page_alloc.c:2531 [inline] get_page_from_freelist+0x119c/0x2ce0 mm/page_alloc.c:4283 __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5549 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2287 alloc_slab_page mm/slub.c:1851 [inline] allocate_slab+0x25f/0x350 mm/slub.c:1998 new_slab mm/slub.c:2051 [inline] ___slab_alloc+0xa91/0x1400 mm/slub.c:3193 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3292 __slab_alloc_node mm/slub.c:3345 [inline] slab_alloc_node mm/slub.c:3442 [inline] __kmem_cache_alloc_node+0x1a4/0x430 mm/slub.c:3491 __do_kmalloc_node mm/slab_common.c:967 [inline] __kmalloc_node_track_caller+0x4b/0xc0 mm/slab_common.c:988 kmalloc_reserve+0xf1/0x230 net/core/skbuff.c:539 __alloc_skb+0x129/0x330 net/core/skbuff.c:608 __netdev_alloc_skb+0x74/0x410 net/core/skbuff.c:672 __netdev_alloc_skb_ip_align include/linux/skbuff.h:3203 [inline] netdev_alloc_skb_ip_align include/linux/skbuff.h:3213 [inline] batadv_iv_ogm_aggregate_new+0x106/0x4e0 net/batman-adv/bat_iv_ogm.c:558 batadv_iv_ogm_queue_add net/batman-adv/bat_iv_ogm.c:670 [inline] batadv_iv_ogm_schedule_buff+0xe6b/0x1450 net/batman-adv/bat_iv_ogm.c:849 batadv_iv_ogm_schedule net/batman-adv/bat_iv_ogm.c:868 [inline] batadv_iv_ogm_schedule net/batman-adv/bat_iv_ogm.c:861 [inline] batadv_iv_send_outstanding_bat_ogm_packet+0x744/0x910 net/batman-adv/bat_iv_ogm.c:1712 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1446 [inline] free_pcp_prepare+0x66a/0xc20 mm/page_alloc.c:1496 free_unref_page_prepare mm/page_alloc.c:3369 [inline] free_unref_page+0x1d/0x490 mm/page_alloc.c:3464 free_contig_range+0xb5/0x180 mm/page_alloc.c:9488 destroy_args+0xa8/0x64c mm/debug_vm_pgtable.c:998 debug_vm_pgtable+0x28de/0x296f mm/debug_vm_pgtable.c:1318 do_one_initcall+0x141/0x790 init/main.c:1306 do_initcall_level init/main.c:1379 [inline] do_initcalls init/main.c:1395 [inline] do_basic_setup init/main.c:1414 [inline] kernel_init_freeable+0x6f9/0x782 init/main.c:1634 kernel_init+0x1e/0x1d0 init/main.c:1522 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 Memory state around the buggy address: ffff88806cdee700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88806cdee780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88806cdee800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffff88806cdee880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head") Reported-by: syzbot Signed-off-by: Eric Dumazet Tested-by: Christoph Paasch Signed-off-by: David S. Miller --- net/core/skbuff.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eb7d33b41e71..1a31815104d6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -517,18 +517,16 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, #ifdef HAVE_SKB_SMALL_HEAD_CACHE if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { - - /* skb_small_head_cache has non power of two size, - * likely forcing SLUB to use order-3 pages. - * We deliberately attempt a NOMEMALLOC allocation only. - */ obj = kmem_cache_alloc_node(skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); - if (obj) { - *size = SKB_SMALL_HEAD_CACHE_SIZE; + *size = SKB_SMALL_HEAD_CACHE_SIZE; + if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; - } + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); + goto out; } #endif *size = obj_size = kmalloc_size_roundup(obj_size); @@ -2082,6 +2080,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +/* Note: We plan to rework this in linux-6.4 */ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { unsigned int saved_end_offset, saved_truesize; @@ -2100,6 +2099,22 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; +#ifdef HAVE_SKB_SMALL_HEAD_CACHE + /* We can not change skb->end if the original or new value + * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). + */ + if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM || + skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) { + /* We think this path should not be taken. + * Add a temporary trace to warn us just in case. + */ + pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n", + saved_end_offset, skb_end_offset(skb)); + WARN_ON_ONCE(1); + return 0; + } +#endif + shinfo = skb_shinfo(skb); /* We are about to change back skb->end, -- cgit From fb07390463c95e6eef254044d6dde050bfb9807a Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 27 Feb 2023 12:23:52 -0300 Subject: net/sched: act_connmark: handle errno on tcf_idr_check_alloc Smatch reports that 'ci' can be used uninitialized. The current code ignores errno coming from tcf_idr_check_alloc, which will lead to the incorrect usage of 'ci'. Handle the errno as it should. Fixes: 288864effe33 ("net/sched: act_connmark: transition to percpu stats and rcu") Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/act_connmark.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8dabfb52ea3d..0d7aee8933c5 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -158,6 +158,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, nparms->zone = parm->zone; ret = 0; + } else { + err = ret; + goto out_free; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); -- cgit From 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 15:30:24 +0000 Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping() ila_xlat_nl_cmd_get_mapping() generates an empty skb, triggerring a recent sanity check [1]. Instead, return an error code, so that user space can get it. [1] skb_assert_len WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline] WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 Modules linked in: CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_assert_len include/linux/skbuff.h:2527 [inline] pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 lr : skb_assert_len include/linux/skbuff.h:2527 [inline] lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 sp : ffff80001e0d6c40 x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0 x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00 x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10 x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0 x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600 x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001 x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e Call trace: skb_assert_len include/linux/skbuff.h:2527 [inline] __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 dev_queue_xmit include/linux/netdevice.h:3033 [inline] __netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline] __netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325 netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338 __netlink_sendskb net/netlink/af_netlink.c:1283 [inline] netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292 netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380 nlmsg_unicast include/net/netlink.h:1099 [inline] genlmsg_unicast include/net/genetlink.h:433 [inline] genlmsg_reply include/net/genetlink.h:443 [inline] ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493 genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065 netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574 genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x558/0x844 net/socket.c:2479 ___sys_sendmsg net/socket.c:2533 [inline] __sys_sendmsg+0x26c/0x33c net/socket.c:2562 __do_sys_sendmsg net/socket.c:2571 [inline] __se_sys_sendmsg net/socket.c:2569 [inline] __arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52 el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193 el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 irq event stamp: 136484 hardirqs last enabled at (136483): [] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345 hardirqs last disabled at (136484): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405 softirqs last enabled at (136418): [] softirq_handle_end kernel/softirq.c:414 [inline] softirqs last enabled at (136418): [] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600 softirqs last disabled at (136371): [] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 ---[ end trace 0000000000000000 ]--- skb len=0 headroom=0 headlen=0 tailroom=192 mac=(0,0) net=(0,-1) trans=-1 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0 dev name=nlmon0 feat=0x0000000000005861 Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ila/ila_xlat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 47447f0241df..bee45dfeb187 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); + ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, -- cgit From dfd2f0eb2347dbdf391fd5b8255fefc58a745472 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 18:44:36 +0000 Subject: net/sched: flower: fix fl_change() error recovery path The two "goto errout;" paths in fl_change() became wrong after cited commit. Indeed we only must not call __fl_put() until the net pointer has been set in tcf_exts_init_ex() This is a minimal fix. We might in the future validate TCA_FLOWER_FLAGS before we allocate @fnew. BUG: KASAN: null-ptr-deref in instrument_atomic_read include/linux/instrumented.h:72 [inline] BUG: KASAN: null-ptr-deref in atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline] BUG: KASAN: null-ptr-deref in refcount_read include/linux/refcount.h:147 [inline] BUG: KASAN: null-ptr-deref in __refcount_add_not_zero include/linux/refcount.h:152 [inline] BUG: KASAN: null-ptr-deref in __refcount_inc_not_zero include/linux/refcount.h:227 [inline] BUG: KASAN: null-ptr-deref in refcount_inc_not_zero include/linux/refcount.h:245 [inline] BUG: KASAN: null-ptr-deref in maybe_get_net include/net/net_namespace.h:269 [inline] BUG: KASAN: null-ptr-deref in tcf_exts_get_net include/net/pkt_cls.h:260 [inline] BUG: KASAN: null-ptr-deref in __fl_put net/sched/cls_flower.c:513 [inline] BUG: KASAN: null-ptr-deref in __fl_put+0x13e/0x3b0 net/sched/cls_flower.c:508 Read of size 4 at addr 000000000000014c by task syz-executor548/5082 CPU: 0 PID: 5082 Comm: syz-executor548 Not tainted 6.2.0-syzkaller-05251-g5b7c4cabbb65 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 print_report mm/kasan/report.c:420 [inline] kasan_report+0xec/0x130 mm/kasan/report.c:517 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x141/0x190 mm/kasan/generic.c:189 instrument_atomic_read include/linux/instrumented.h:72 [inline] atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline] refcount_read include/linux/refcount.h:147 [inline] __refcount_add_not_zero include/linux/refcount.h:152 [inline] __refcount_inc_not_zero include/linux/refcount.h:227 [inline] refcount_inc_not_zero include/linux/refcount.h:245 [inline] maybe_get_net include/net/net_namespace.h:269 [inline] tcf_exts_get_net include/net/pkt_cls.h:260 [inline] __fl_put net/sched/cls_flower.c:513 [inline] __fl_put+0x13e/0x3b0 net/sched/cls_flower.c:508 fl_change+0x101b/0x4ab0 net/sched/cls_flower.c:2341 tc_new_tfilter+0x97c/0x2290 net/sched/cls_api.c:2310 rtnetlink_rcv_msg+0x996/0xd50 net/core/rtnetlink.c:6165 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2574 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x925/0xe30 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:722 [inline] sock_sendmsg+0xde/0x190 net/socket.c:745 ____sys_sendmsg+0x334/0x900 net/socket.c:2504 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2558 __sys_sendmmsg+0x18f/0x460 net/socket.c:2644 __do_sys_sendmmsg net/socket.c:2673 [inline] __se_sys_sendmmsg net/socket.c:2670 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2670 Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Reported-by: syzbot+baabf3efa7c1e57d28b2@syzkaller.appspotmail.com Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Paul Blakey Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e960a46b0520..475fe222a855 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2200,8 +2200,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { + kfree(fnew); err = -EINVAL; - goto errout; + goto errout_tb; } } @@ -2226,8 +2227,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } spin_unlock(&tp->lock); - if (err) - goto errout; + if (err) { + kfree(fnew); + goto errout_tb; + } } fnew->handle = handle; @@ -2337,7 +2340,6 @@ errout_mask: fl_mask_put(head, fnew->mask); errout_idr: idr_remove(&head->handle_idr, fnew->handle); -errout: __fl_put(fnew); errout_tb: kfree(tb); -- cgit From 81563d8548b0478075c720666be348d4199b8591 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 28 Feb 2023 21:47:42 +0100 Subject: net: lan966x: Fix port police support using tc-matchall When the police was removed from the port, then it was trying to remove the police from the police id and not from the actual police index. The police id represents the id of the police and police index represents the position in HW where the police is situated. The port police id can be any number while the port police index is a number based on the port chip port. Fix this by deleting the police from HW that is situated at the police index and not police id. Fixes: 5390334b59a3 ("net: lan966x: Add port police support using tc-matchall") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_police.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c index a9aec900d608..7d66fe75cd3b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c @@ -194,7 +194,7 @@ int lan966x_police_port_del(struct lan966x_port *port, return -EINVAL; } - err = lan966x_police_del(port, port->tc.police_id); + err = lan966x_police_del(port, POL_IDX_PORT + port->chip_port); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to add policer to port"); -- cgit From 2067e7a00aa604b94de31d64f29b8893b1696f26 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 27 Feb 2023 17:36:46 +0800 Subject: selftests: nft_nat: ensuring the listening side is up before starting the client The test_local_dnat_portonly() function initiates the client-side as soon as it sets the listening side to the background. This could lead to a race condition where the server may not be ready to listen. To ensure that the server-side is up and running before initiating the client-side, a delay is introduced to the test_local_dnat_portonly() function. Before the fix: # ./nft_nat.sh PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU 2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused ERROR: inet port rewrite After the fix: # ./nft_nat.sh PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ PASS: inet port rewrite without l3 address Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address") Signed-off-by: Hangbin Liu Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_nat.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 924ecb3f1f73..dd40d9f6f259 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -404,6 +404,8 @@ EOF echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 & sc_s=$! + sleep 1 + result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT) if [ "$result" = "SERVER-inet" ];then -- cgit From 860e874290fb3be08e966c9c8ffc510c5b0f2bd8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2023 17:09:03 +0100 Subject: netfilter: nft_last: copy content when cloning expression If the ruleset contains last timestamps, restore them accordingly. Otherwise, listing after restoration shows never used items. Fixes: 33a24de37e81 ("netfilter: nft_last: move stateful fields out of expression data") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 7f2bda6641bd..8e6d7eaf9dc8 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -105,11 +105,15 @@ static void nft_last_destroy(const struct nft_ctx *ctx, static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_last_priv *priv_dst = nft_expr_priv(dst); + struct nft_last_priv *priv_src = nft_expr_priv(src); priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); if (!priv_dst->last) return -ENOMEM; + priv_dst->last->set = priv_src->last->set; + priv_dst->last->jiffies = priv_src->last->jiffies; + return 0; } -- cgit From aabef97a35160461e9c576848ded737558d89055 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2023 20:43:02 +0100 Subject: netfilter: nft_quota: copy content when cloning expression If the ruleset contains consumed quota, restore them accordingly. Otherwise, listing after restoration shows never used items. Restore the user-defined quota and flags too. Fixes: ed0a0c60f0e5 ("netfilter: nft_quota: move stateful fields out of expression data") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_quota.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 123578e28917..3ba12a7471b0 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -236,12 +236,16 @@ static void nft_quota_destroy(const struct nft_ctx *ctx, static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_quota *priv_dst = nft_expr_priv(dst); + struct nft_quota *priv_src = nft_expr_priv(src); + + priv_dst->quota = priv_src->quota; + priv_dst->flags = priv_src->flags; priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); if (!priv_dst->consumed) return -ENOMEM; - atomic64_set(priv_dst->consumed, 0); + *priv_dst->consumed = *priv_src->consumed; return 0; } -- cgit From 49c47cc21b5b7a3d8deb18fc57b0aa2ab1286962 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Tue, 28 Feb 2023 10:33:44 +0800 Subject: net: tls: fix possible race condition between do_tls_getsockopt_conf() and do_tls_setsockopt_conf() ctx->crypto_send.info is not protected by lock_sock in do_tls_getsockopt_conf(). A race condition between do_tls_getsockopt_conf() and error paths of do_tls_setsockopt_conf() may lead to a use-after-free or null-deref. More discussion: https://lore.kernel.org/all/Y/ht6gQL+u6fj3dG@hog/ Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20230228023344.9623-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/tls/tls_main.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 3735cb00905d..b32c112984dd 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -405,13 +405,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aes_gcm_128->iv, cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_128, sizeof(*crypto_info_aes_gcm_128))) @@ -429,13 +427,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aes_gcm_256->iv, cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE, TLS_CIPHER_AES_GCM_256_IV_SIZE); memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_256, sizeof(*crypto_info_aes_gcm_256))) @@ -451,13 +447,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(aes_ccm_128->iv, cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE, TLS_CIPHER_AES_CCM_128_IV_SIZE); memcpy(aes_ccm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128))) rc = -EFAULT; break; @@ -472,13 +466,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(chacha20_poly1305->iv, cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE, TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE); memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq, TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, chacha20_poly1305, sizeof(*chacha20_poly1305))) rc = -EFAULT; @@ -493,13 +485,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(sm4_gcm_info->iv, cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE, TLS_CIPHER_SM4_GCM_IV_SIZE); memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq, TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info))) rc = -EFAULT; break; @@ -513,13 +503,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(sm4_ccm_info->iv, cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE, TLS_CIPHER_SM4_CCM_IV_SIZE); memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq, TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info))) rc = -EFAULT; break; @@ -535,13 +523,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aria_gcm_128->iv, cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE, TLS_CIPHER_ARIA_GCM_128_IV_SIZE); memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aria_gcm_128, sizeof(*crypto_info_aria_gcm_128))) @@ -559,13 +545,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aria_gcm_256->iv, cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE, TLS_CIPHER_ARIA_GCM_256_IV_SIZE); memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq, TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aria_gcm_256, sizeof(*crypto_info_aria_gcm_256))) @@ -614,11 +598,9 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, if (len < sizeof(value)) return -EINVAL; - lock_sock(sk); value = -EINVAL; if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) value = ctx->rx_no_pad; - release_sock(sk); if (value < 0) return value; @@ -635,6 +617,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname, { int rc = 0; + lock_sock(sk); + switch (optname) { case TLS_TX: case TLS_RX: @@ -651,6 +635,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, rc = -ENOPROTOOPT; break; } + + release_sock(sk); + return rc; } -- cgit From f3221361dc85d4de22586ce8441ec2c67b454f5d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Feb 2023 16:28:57 -0800 Subject: net: tls: avoid hanging tasks on the tx_lock syzbot sent a hung task report and Eric explains that adversarial receiver may keep RWIN at 0 for a long time, so we are not guaranteed to make forward progress. Thread which took tx_lock and went to sleep may not release tx_lock for hours. Use interruptible sleep where possible and reschedule the work if it can't take the lock. Testing: existing selftest passes Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com Fixes: 79ffe6087e91 ("net/tls: add a TX lock") Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/ Cc: stable@vger.kernel.org # wait 4 weeks Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 021d760f9133..635b8bf6b937 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -956,7 +956,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) MSG_CMSG_COMPAT)) return -EOPNOTSUPP; - mutex_lock(&tls_ctx->tx_lock); + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; lock_sock(sk); if (unlikely(msg->msg_controllen)) { @@ -1290,7 +1292,9 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) return -EOPNOTSUPP; - mutex_lock(&tls_ctx->tx_lock); + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; lock_sock(sk); ret = tls_sw_do_sendpage(sk, page, offset, size, flags); release_sock(sk); @@ -2435,11 +2439,19 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; - mutex_lock(&tls_ctx->tx_lock); - lock_sock(sk); - tls_tx_records(sk, -1); - release_sock(sk); - mutex_unlock(&tls_ctx->tx_lock); + + if (mutex_trylock(&tls_ctx->tx_lock)) { + lock_sock(sk); + tls_tx_records(sk, -1); + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); + } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + /* Someone is holding the tx_lock, they will likely run Tx + * and cancel the work on their way out of the lock section. + * Schedule a long delay just in case. + */ + schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); + } } static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) -- cgit From 5c1ebbfabcd61142a4551bfc0e51840f9bdae7af Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Wed, 1 Mar 2023 13:32:47 +0000 Subject: net: use indirect calls helpers for sk_exit_memory_pressure() Florian reported a regression and sent a patch with the following changelog: There is a noticeable tcp performance regression (loopback or cross-netns), seen with iperf3 -Z (sendfile mode) when generic retpolines are needed. With SK_RECLAIM_THRESHOLD checks gone number of calls to enter/leave memory pressure happen much more often. For TCP indirect calls are used. We can't remove the if-set-return short-circuit check in tcp_enter_memory_pressure because there are callers other than sk_enter_memory_pressure. Doing a check in the sk wrapper too reduces the indirect calls enough to recover some performance. Before, 0.00-60.00 sec 322 GBytes 46.1 Gbits/sec receiver After: 0.00-60.04 sec 359 GBytes 51.4 Gbits/sec receiver "iperf3 -c $peer -t 60 -Z -f g", connected via veth in another netns. It seems we forgot to upstream this indirect call mitigation we had for years, lets do this instead. [edumazet] - It seems we forgot to upstream this indirect call mitigation we had for years, let's do this instead. - Changed to INDIRECT_CALL_INET_1() to avoid bots reports. Fixes: 4890b686f408 ("net: keep sk->sk_forward_alloc as small as possible") Reported-by: Florian Westphal Link: https://lore.kernel.org/netdev/20230227152741.4a53634b@kernel.org/T/ Signed-off-by: Brian Vazquez Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230301133247.2346111-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 341c565dbc26..c25888795390 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2818,7 +2818,8 @@ static void sk_enter_memory_pressure(struct sock *sk) static void sk_leave_memory_pressure(struct sock *sk) { if (sk->sk_prot->leave_memory_pressure) { - sk->sk_prot->leave_memory_pressure(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure, + tcp_leave_memory_pressure, sk); } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; -- cgit From 6c993779ea1d0cccdb3a5d7d45446dd229e610a3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 16 Feb 2023 23:25:04 -0500 Subject: ca8210: fix mac_len negative array access This patch fixes a buffer overflow access of skb->data if ieee802154_hdr_peek_addrs() fails. Reported-by: lianhui tang Signed-off-by: Alexander Aring Link: https://lore.kernel.org/r/20230217042504.3303396-1-aahringo@redhat.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index e1a569b99e4a..0b0c6c0764fe 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1913,6 +1913,8 @@ static int ca8210_skb_tx( * packet */ mac_len = ieee802154_hdr_peek_addrs(skb, &header); + if (mac_len < 0) + return mac_len; secspec.security_level = header.sec.level; secspec.key_id_mode = header.sec.key_id_mode; -- cgit From 02f18662f6c671382345fcb696e808d78f4c194a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 1 Mar 2023 16:44:50 +0100 Subject: ieee802154: Prevent user from crashing the host Avoid crashing the machine by checking info->attrs[NL802154_ATTR_SCAN_TYPE] presence before de-referencing it, which was the primary intend of the blamed patch. Reported-by: Sanan Hasanov Suggested-by: Eric Dumazet Fixes: a0b6106672b5 ("ieee802154: Convert scan error messages to extack") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20230301154450.547716-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl802154.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 2215f576ee37..d8f4379d4fa6 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1412,7 +1412,7 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - if (!nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE])) { + if (!info->attrs[NL802154_ATTR_SCAN_TYPE]) { NL_SET_ERR_MSG(info->extack, "Malformed request, missing scan type"); return -EINVAL; } -- cgit From e57cf3639c323eeed05d3725fd82f91b349adca8 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Wed, 1 Mar 2023 08:43:07 -0700 Subject: net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver Move the LAN7800 internal phy (phy ID 0x0007c132) specific register accesses to the phy driver (microchip.c). Fix the error reported by Enguerrand de Ribaucourt in December 2022, "Some operations during the cable switch workaround modify the register LAN88XX_INT_MASK of the PHY. However, this register is specific to the LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801, that register (0x19), corresponds to the LED and MAC address configuration, resulting in unapropriate behavior." I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800 with the internal PHY. Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error") Signed-off-by: Yuiko Oshino Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++ drivers/net/usb/lan78xx.c | 27 +-------------------------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index ccecee2524ce..0b88635f4fbc 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -342,6 +342,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static void lan88xx_link_change_notify(struct phy_device *phydev) +{ + int temp; + + /* At forced 100 F/H mode, chip may fail to set mode correctly + * when cable is switched between long(~50+m) and short one. + * As workaround, set to 10 before setting to 100 + * at forced 100 F/H mode. + */ + if (!phydev->autoneg && phydev->speed == 100) { + /* disable phy interrupt */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; + phy_write(phydev, LAN88XX_INT_MASK, temp); + + temp = phy_read(phydev, MII_BMCR); + temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); + phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ + temp |= BMCR_SPEED100; + phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ + + /* clear pending interrupt generated while workaround */ + temp = phy_read(phydev, LAN88XX_INT_STS); + + /* enable phy interrupt back */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; + phy_write(phydev, LAN88XX_INT_MASK, temp); + } +} + static struct phy_driver microchip_phy_driver[] = { { .phy_id = 0x0007c132, @@ -359,6 +390,7 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, + .link_change_notify = lan88xx_link_change_notify, .config_intr = lan88xx_phy_config_intr, .handle_interrupt = lan88xx_handle_interrupt, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f18ab8e220db..068488890d57 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2115,33 +2115,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) static void lan78xx_link_status_change(struct net_device *net) { struct phy_device *phydev = net->phydev; - int temp; - - /* At forced 100 F/H mode, chip may fail to set mode correctly - * when cable is switched between long(~50+m) and short one. - * As workaround, set to 10 before setting to 100 - * at forced 100 F/H mode. - */ - if (!phydev->autoneg && (phydev->speed == 100)) { - /* disable phy interrupt */ - temp = phy_read(phydev, LAN88XX_INT_MASK); - temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; - phy_write(phydev, LAN88XX_INT_MASK, temp); - temp = phy_read(phydev, MII_BMCR); - temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); - phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ - temp |= BMCR_SPEED100; - phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ - - /* clear pending interrupt generated while workaround */ - temp = phy_read(phydev, LAN88XX_INT_STS); - - /* enable phy interrupt back */ - temp = phy_read(phydev, LAN88XX_INT_MASK); - temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; - phy_write(phydev, LAN88XX_INT_MASK, temp); - } + phy_print_status(phydev); } static int irq_map(struct irq_domain *d, unsigned int irq, -- cgit From 9781e98a97110f5e76999058368b4be76a788484 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 2 Mar 2023 01:39:13 +0900 Subject: net: caif: Fix use-after-free in cfusbl_device_notify() syzbot reported use-after-free in cfusbl_device_notify() [1]. This causes a stack trace like below: BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214 CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] netdev_wait_allrefs_any net/core/dev.c:10227 [inline] netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341 default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 When unregistering a net device, unregister_netdevice_many_notify() sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers with NETDEV_UNREGISTER, and adds the device to the todo list. Later on, devices in the todo list are processed by netdev_run_todo(). netdev_run_todo() waits devices' reference count become 1 while rebdoadcasting NETDEV_UNREGISTER notification. When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple times, the parent device might be freed. This could cause UAF. Processing NETDEV_UNREGISTER multiple times also causes inbalance of reference count for the module. This patch fixes the issue by accepting only first NETDEV_UNREGISTER notification. Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") CC: sjur.brandeland@stericsson.com Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com Signed-off-by: Jakub Kicinski --- net/caif/caif_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ebc202ffdd8d..bf61ea4b8132 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct usb_device *usbdev; int res; + if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) + return 0; + /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) -- cgit From 7cf93538e087a792cb476008a757ab7c1c23b68c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Mar 2023 10:36:40 -0800 Subject: tools: ynl: fully inherit attrs in subsets To avoid having to repeat the entire definition of an attribute (including the value) use the Attr object from the original set. In fact this is already the documented expectation. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/userspace-api/netlink/specs.rst | 3 ++- tools/net/ynl/lib/nlspec.py | 23 +++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 6ffe8137cd90..1424ab1b9b33 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -199,7 +199,8 @@ The ``value`` property can be skipped, in which case the attribute ID will be the value of the previous attribute plus one (recursively) and ``0`` for the first attribute in the attribute set. -Note that the ``value`` of an attribute is defined only in its main set. +Note that the ``value`` of an attribute is defined only in its main set +(not in subsets). enum ~~~~ diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 71da568e2c28..dff31dad36c5 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -95,15 +95,22 @@ class SpecAttrSet(SpecElement): self.attrs = collections.OrderedDict() self.attrs_by_val = collections.OrderedDict() - val = 0 - for elem in self.yaml['attributes']: - if 'value' in elem: - val = elem['value'] + if self.subset_of is None: + val = 0 + for elem in self.yaml['attributes']: + if 'value' in elem: + val = elem['value'] - attr = self.new_attr(elem, val) - self.attrs[attr.name] = attr - self.attrs_by_val[attr.value] = attr - val += 1 + attr = self.new_attr(elem, val) + self.attrs[attr.name] = attr + self.attrs_by_val[attr.value] = attr + val += 1 + else: + real_set = family.attr_sets[self.subset_of] + for elem in self.yaml['attributes']: + attr = real_set[elem['name']] + self.attrs[attr.name] = attr + self.attrs_by_val[attr.value] = attr def new_attr(self, elem, value): return SpecAttr(self.family, self, elem, value) -- cgit From ad4fafcde5bc1badb8fc2c7f260a5d6b83a038e4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Mar 2023 10:36:41 -0800 Subject: tools: ynl: use 1 as the default for first entry in attrs/ops Pretty much all families use value: 1 or reserve as unspec the first entry in attribute set and the first operation. Make this the default. Update documentation (the doc for values of operations just refers back to doc for attrs so updating only attrs). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/userspace-api/netlink/specs.rst | 7 ++++++- tools/net/ynl/lib/nlspec.py | 6 +++--- tools/net/ynl/ynl-gen-c.py | 7 +++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 1424ab1b9b33..32e53328d113 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -197,7 +197,12 @@ value Numerical attribute ID, used in serialized Netlink messages. The ``value`` property can be skipped, in which case the attribute ID will be the value of the previous attribute plus one (recursively) -and ``0`` for the first attribute in the attribute set. +and ``1`` for the first attribute in the attribute set. + +Attributes (and operations) use ``1`` as the default value for the first +entry (unlike enums in definitions which start from ``0``) because +entry ``0`` is almost always reserved as undefined. Spec can explicitly +set value to ``0`` if needed. Note that the ``value`` of an attribute is defined only in its main set (not in subsets). diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index dff31dad36c5..9d394e50de23 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -96,7 +96,7 @@ class SpecAttrSet(SpecElement): self.attrs_by_val = collections.OrderedDict() if self.subset_of is None: - val = 0 + val = 1 for elem in self.yaml['attributes']: if 'value' in elem: val = elem['value'] @@ -252,7 +252,7 @@ class SpecFamily(SpecElement): self._resolution_list.append(elem) def _dictify_ops_unified(self): - val = 0 + val = 1 for elem in self.yaml['operations']['list']: if 'value' in elem: val = elem['value'] @@ -263,7 +263,7 @@ class SpecFamily(SpecElement): self.msgs[op.name] = op def _dictify_ops_directional(self): - req_val = rsp_val = 0 + req_val = rsp_val = 1 for elem in self.yaml['operations']['list']: if 'notify' in elem: if 'value' in elem: diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 274e9c566f61..62f8f2c3c56c 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2044,14 +2044,17 @@ def render_uapi(family, cw): max_value = f"({cnt_name} - 1)" uapi_enum_start(family, cw, family['operations'], 'enum-name') + val = 0 for op in family.msgs.values(): if separate_ntf and ('notify' in op or 'event' in op): continue suffix = ',' - if 'value' in op: - suffix = f" = {op['value']}," + if op.value != val: + suffix = f" = {op.value}," + val = op.value cw.p(op.enum_name + suffix) + val += 1 cw.nl() cw.p(cnt_name + ('' if max_by_define else ',')) if not max_by_define: -- cgit From bcec7171eba901cc5f427ebbad9fe17ed4749b8f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Mar 2023 10:36:42 -0800 Subject: netlink: specs: update for codegen enumerating from 1 Now that the codegen rules had been changed we can update the specs to reflect the new default. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 15 --------------- Documentation/netlink/specs/fou.yaml | 2 ++ Documentation/netlink/specs/netdev.yaml | 2 -- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 08b776908d15..35c462bce56f 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -11,7 +11,6 @@ attribute-sets: - name: dev-index type: u32 - value: 1 - name: dev-name type: string @@ -25,7 +24,6 @@ attribute-sets: - name: index type: u32 - value: 1 - name: name type: string @@ -39,14 +37,12 @@ attribute-sets: name: bit type: nest nested-attributes: bitset-bit - value: 1 - name: bitset attributes: - name: nomask type: flag - value: 1 - name: size type: u32 @@ -61,7 +57,6 @@ attribute-sets: - name: index type: u32 - value: 1 - name: value type: string @@ -71,7 +66,6 @@ attribute-sets: - name: string type: nest - value: 1 multi-attr: true nested-attributes: string - @@ -80,7 +74,6 @@ attribute-sets: - name: id type: u32 - value: 1 - name: count type: u32 @@ -96,14 +89,12 @@ attribute-sets: name: stringset type: nest multi-attr: true - value: 1 nested-attributes: stringset - name: strset attributes: - name: header - value: 1 type: nest nested-attributes: header - @@ -119,7 +110,6 @@ attribute-sets: attributes: - name: header - value: 1 type: nest nested-attributes: header - @@ -132,7 +122,6 @@ attribute-sets: attributes: - name: header - value: 1 type: nest nested-attributes: header - @@ -180,7 +169,6 @@ attribute-sets: attributes: - name: pad - value: 1 type: pad - name: reassembly-errors @@ -205,7 +193,6 @@ attribute-sets: attributes: - name: header - value: 1 type: nest nested-attributes: header - @@ -251,13 +238,11 @@ operations: do: &strset-get-op request: - value: 1 attributes: - header - stringsets - counts-only reply: - value: 1 attributes: - header - stringsets diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index 266c386eedf3..cca4cf98f03a 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -26,6 +26,7 @@ attribute-sets: - name: unspec type: unused + value: 0 - name: port type: u16 @@ -71,6 +72,7 @@ operations: - name: unspec doc: unused + value: 0 - name: add diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index cffef09729f1..ba9ee13cf729 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -48,7 +48,6 @@ attribute-sets: name: ifindex doc: netdev ifindex type: u32 - value: 1 checks: min: 1 - @@ -66,7 +65,6 @@ operations: - name: dev-get doc: Get / dump information about a netdev. - value: 1 attribute-set: dev do: request: -- cgit From 84cba1840e68430325ac133a11be06bfb2f7acd8 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Wed, 1 Mar 2023 21:47:07 +0100 Subject: ice: copy last block omitted in ice_get_module_eeprom() ice_get_module_eeprom() is broken since commit e9c9692c8a81 ("ice: Reimplement module reads used by ethtool") In this refactor, ice_get_module_eeprom() reads the eeprom in blocks of size 8. But the condition that should protect the buffer overflow ignores the last block. The last block always contains zeros. Bug uncovered by ethtool upstream commit 9538f384b535 ("netlink: eeprom: Defer page requests to individual parsers") After this commit, ethtool reads a block with length = 1; to read the SFF-8024 identifier value. unpatched driver: $ ethtool -m enp65s0f0np0 offset 0x90 length 8 Offset Values ------ ------ 0x0090: 00 00 00 00 00 00 00 00 $ ethtool -m enp65s0f0np0 offset 0x90 length 12 Offset Values ------ ------ 0x0090: 00 00 01 a0 4d 65 6c 6c 00 00 00 00 $ $ ethtool -m enp65s0f0np0 Offset Values ------ ------ 0x0000: 11 06 06 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 01 08 00 0x0070: 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 patched driver: $ ethtool -m enp65s0f0np0 offset 0x90 length 8 Offset Values ------ ------ 0x0090: 00 00 01 a0 4d 65 6c 6c $ ethtool -m enp65s0f0np0 offset 0x90 length 12 Offset Values ------ ------ 0x0090: 00 00 01 a0 4d 65 6c 6c 61 6e 6f 78 $ ethtool -m enp65s0f0np0 Identifier : 0x11 (QSFP28) Extended identifier : 0x00 Extended identifier description : 1.5W max. Power consumption Extended identifier description : No CDR in TX, No CDR in RX Extended identifier description : High Power Class (> 3.5 W) not enabled Connector : 0x23 (No separable connector) Transceiver codes : 0x88 0x00 0x00 0x00 0x00 0x00 0x00 0x00 Transceiver type : 40G Ethernet: 40G Base-CR4 Transceiver type : 25G Ethernet: 25G Base-CR CA-N Encoding : 0x05 (64B/66B) BR, Nominal : 25500Mbps Rate identifier : 0x00 Length (SMF,km) : 0km Length (OM3 50um) : 0m Length (OM2 50um) : 0m Length (OM1 62.5um) : 0m Length (Copper or Active cable) : 1m Transmitter technology : 0xa0 (Copper cable unequalized) Attenuation at 2.5GHz : 4db Attenuation at 5.0GHz : 5db Attenuation at 7.0GHz : 7db Attenuation at 12.9GHz : 10db ........ .... Fixes: e9c9692c8a81 ("ice: Reimplement module reads used by ethtool") Signed-off-by: Petr Oros Reviewed-by: Jesse Brandeburg Tested-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index b360bd8f1599..f86e814354a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4331,6 +4331,8 @@ ice_get_module_eeprom(struct net_device *netdev, * SFP modules only ever use page 0. */ if (page == 0 || !(data[0x2] & 0x4)) { + u32 copy_len; + /* If i2c bus is busy due to slow page change or * link management access, call can fail. This is normal. * So we retry this a few times. @@ -4354,8 +4356,8 @@ ice_get_module_eeprom(struct net_device *netdev, } /* Make sure we have enough room for the new block */ - if ((i + SFF_READ_BLOCK_SIZE) < ee->len) - memcpy(data + i, value, SFF_READ_BLOCK_SIZE); + copy_len = min_t(u32, SFF_READ_BLOCK_SIZE, ee->len - i); + memcpy(data + i, value, copy_len); } } return 0; -- cgit From 3e04419cbe2d0bc10ffc20c006c6513ffa4b1ca3 Mon Sep 17 00:00:00 2001 From: Huanhuan Wang Date: Thu, 2 Mar 2023 10:58:28 +0100 Subject: nfp: fix incorrectly set csum flag for nfd3 path The csum flag of IPsec packet are set repeatedly. Therefore, the csum flag set of IPsec and non-IPsec packet need to be distinguished. As the ipv6 header does not have a csum field, so l3-csum flag is not required to be set for ipv6 case. L4-csum flag include the tcp csum flag and udp csum flag, we shouldn't set the udp and tcp csum flag at the same time for one packet, should set l4-csum flag according to the transport layer is tcp or udp. Fixes: 57f273adbcd4 ("nfp: add framework to support ipsec offloading") Signed-off-by: Huanhuan Wang Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfd3/dp.c | 7 ++++--- drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c | 25 +++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index 59fb0583cc08..0cc026b0aefd 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -324,14 +324,15 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */ nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes); - nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb); + if (ipsec) + nfp_nfd3_ipsec_tx(txd, skb); + else + nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb); if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { txd->flags |= NFD3_DESC_TX_VLAN; txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); } - if (ipsec) - nfp_nfd3_ipsec_tx(txd, skb); /* Gather DMA */ if (nr_frags > 0) { __le64 second_half; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c b/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c index e90f8c975903..51087693072c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c @@ -10,9 +10,30 @@ void nfp_nfd3_ipsec_tx(struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + struct iphdr *iph = ip_hdr(skb); + int l4_proto; if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)) { - txd->flags |= NFD3_DESC_TX_CSUM | NFD3_DESC_TX_IP4_CSUM | - NFD3_DESC_TX_TCP_CSUM | NFD3_DESC_TX_UDP_CSUM; + txd->flags |= NFD3_DESC_TX_CSUM; + + if (iph->version == 4) + txd->flags |= NFD3_DESC_TX_IP4_CSUM; + + if (x->props.mode == XFRM_MODE_TRANSPORT) + l4_proto = xo->proto; + else if (x->props.mode == XFRM_MODE_TUNNEL) + l4_proto = xo->inner_ipproto; + else + return; + + switch (l4_proto) { + case IPPROTO_UDP: + txd->flags |= NFD3_DESC_TX_UDP_CSUM; + return; + case IPPROTO_TCP: + txd->flags |= NFD3_DESC_TX_TCP_CSUM; + return; + } } } -- cgit From 8b46168ca79c3aad04bc20605e523127266624d4 Mon Sep 17 00:00:00 2001 From: Huanhuan Wang Date: Thu, 2 Mar 2023 10:58:29 +0100 Subject: nfp: fix incorrectly set csum flag for nfdk path The csum flag of IPsec packet are set repeatedly. Therefore, the csum flag set of IPsec and non-IPsec packet need to be distinguished. As the ipv6 header does not have a csum field, so l3-csum flag is not required to be set for ipv6 case. Fixes: 436396f26d50 ("nfp: support IPsec offloading for NFP3800") Signed-off-by: Huanhuan Wang Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfdk/dp.c | 6 ++++-- drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index d60c0e991a91..33b6d74adb4b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -387,7 +387,8 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) if (!skb_is_gso(skb)) { real_len = skb->len; /* Metadata desc */ - metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); + if (!ipsec) + metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); txd->raw = cpu_to_le64(metadata); txd++; } else { @@ -395,7 +396,8 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); real_len = txbuf->real_len; /* Metadata desc */ - metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); + if (!ipsec) + metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); txd->raw = cpu_to_le64(metadata); txd += 2; txbuf++; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c b/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c index 58d8f59eb885..cec199f4c852 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/ipsec.c @@ -9,9 +9,13 @@ u64 nfp_nfdk_ipsec_tx(u64 flags, struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); + struct iphdr *iph = ip_hdr(skb); - if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)) - flags |= NFDK_DESC_TX_L3_CSUM | NFDK_DESC_TX_L4_CSUM; + if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)) { + if (iph->version == 4) + flags |= NFDK_DESC_TX_L3_CSUM; + flags |= NFDK_DESC_TX_L4_CSUM; + } return flags; } -- cgit From 1cf78d4c4144ffdbc2c9d505db482cb204bb480b Mon Sep 17 00:00:00 2001 From: Huanhuan Wang Date: Thu, 2 Mar 2023 10:58:30 +0100 Subject: nfp: fix esp-tx-csum-offload doesn't take effect When esp-tx-csum-offload is set to on, the protocol stack shouldn't calculate the IPsec offload packet's csum, but it does. Because the callback `.ndo_features_check` incorrectly masked NETIF_F_CSUM_MASK bit. Fixes: 57f273adbcd4 ("nfp: add framework to support ipsec offloading") Signed-off-by: Huanhuan Wang Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 81b7ca0ad222..62f0bf91d1e1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "nfpcore/nfp_dev.h" #include "nfpcore/nfp_nsp.h" @@ -1897,6 +1898,9 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, features &= ~NETIF_F_GSO_MASK; } + if (xfrm_offload(skb)) + return features; + /* VXLAN/GRE check */ switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): -- cgit From d900f3d20cc3169ce42ec72acc850e662a4d4db2 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 3 Mar 2023 16:09:46 +0800 Subject: bpf, sockmap: Fix an infinite loop error when len is 0 in tcp_bpf_recvmsg_parser() When the buffer length of the recvmsg system call is 0, we got the flollowing soft lockup problem: watchdog: BUG: soft lockup - CPU#3 stuck for 27s! [a.out:6149] CPU: 3 PID: 6149 Comm: a.out Kdump: loaded Not tainted 6.2.0+ #30 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:remove_wait_queue+0xb/0xc0 Code: 5e 41 5f c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 41 57 <41> 56 41 55 41 54 55 48 89 fd 53 48 89 f3 4c 8d 6b 18 4c 8d 73 20 RSP: 0018:ffff88811b5978b8 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff88811a7d3780 RCX: ffffffffb7a4d768 RDX: dffffc0000000000 RSI: ffff88811b597908 RDI: ffff888115408040 RBP: 1ffff110236b2f1b R08: 0000000000000000 R09: ffff88811a7d37e7 R10: ffffed10234fa6fc R11: 0000000000000001 R12: ffff88811179b800 R13: 0000000000000001 R14: ffff88811a7d38a8 R15: ffff88811a7d37e0 FS: 00007f6fb5398740(0000) GS:ffff888237180000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000000 CR3: 000000010b6ba002 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_msg_wait_data+0x279/0x2f0 tcp_bpf_recvmsg_parser+0x3c6/0x490 inet_recvmsg+0x280/0x290 sock_recvmsg+0xfc/0x120 ____sys_recvmsg+0x160/0x3d0 ___sys_recvmsg+0xf0/0x180 __sys_recvmsg+0xea/0x1a0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc The logic in tcp_bpf_recvmsg_parser is as follows: msg_bytes_ready: copied = sk_msg_recvmsg(sk, psock, msg, len, flags); if (!copied) { wait data; goto msg_bytes_ready; } In this case, "copied" always is 0, the infinite loop occurs. According to the Linux system call man page, 0 should be returned in this case. Therefore, in tcp_bpf_recvmsg_parser(), if the length is 0, directly return. Also modify several other functions with the same problem. Fixes: 1f5be6b3b063 ("udp: Implement udp_bpf_recvmsg() for sockmap") Fixes: 9825d866ce0d ("af_unix: Implement unix_dgram_bpf_recvmsg()") Fixes: c5d2177a72a1 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Cc: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20230303080946.1146638-1-liujian56@huawei.com --- net/ipv4/tcp_bpf.c | 6 ++++++ net/ipv4/udp_bpf.c | 3 +++ net/unix/unix_bpf.c | 3 +++ 3 files changed, 12 insertions(+) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index cf26d65ca389..ebf917511937 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -186,6 +186,9 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, flags, addr_len); @@ -244,6 +247,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, flags, addr_len); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index e5dc91d0e079..0735d820e413 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -68,6 +68,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return sk_udp_recvmsg(sk, msg, len, flags, addr_len); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index e9bf15513961..2f9d8271c6ec 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -54,6 +54,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, struct sk_psock *psock; int copied; + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return __unix_recvmsg(sk, msg, len, flags); -- cgit From a9334b702a03b693f54ebd3b98f67bf722b74870 Mon Sep 17 00:00:00 2001 From: Rongguang Wei Date: Thu, 2 Mar 2023 14:21:43 +0800 Subject: net: stmmac: add to set device wake up flag when stmmac init phy When MAC is not support PMT, driver will check PHY's WoL capability and set device wakeup capability in stmmac_init_phy(). We can enable the WoL through ethtool, the driver would enable the device wake up flag. Now the device_may_wakeup() return true. But if there is a way which enable the PHY's WoL capability derectly, like in BIOS. The driver would not know the enable thing and would not set the device wake up flag. The phy_suspend may failed like this: [ 32.409063] PM: dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x50 returns -16 [ 32.409065] PM: Device stmmac-1:00 failed to suspend: error -16 [ 32.409067] PM: Some devices failed to suspend, or early wake event detected Add to set the device wakeup enable flag according to the get_wol function result in PHY can fix the error in this scene. v2: add a Fixes tag. Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy") Signed-off-by: Rongguang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e4902a7bb61e..8f543c3ab5c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1170,6 +1170,7 @@ static int stmmac_init_phy(struct net_device *dev) phylink_ethtool_get_wol(priv->phylink, &wol); device_set_wakeup_capable(priv->device, !!wol.supported); + device_set_wakeup_enable(priv->device, !!wol.wolopts); } return ret; -- cgit From f4b47a2e9463950df3e7c8b70e017877c1d4eb11 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Mar 2023 16:37:54 +0000 Subject: net: phylib: get rid of unnecessary locking The locking in phy_probe() and phy_remove() does very little to prevent any races with e.g. phy_attach_direct(), but instead causes lockdep ABBA warnings. Remove it. ====================================================== WARNING: possible circular locking dependency detected 6.2.0-dirty #1108 Tainted: G W E ------------------------------------------------------ ip/415 is trying to acquire lock: ffff5c268f81ef50 (&dev->lock){+.+.}-{3:3}, at: phy_attach_direct+0x17c/0x3a0 [libphy] but task is already holding lock: ffffaef6496cb518 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x154/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}-{3:3}: __lock_acquire+0x35c/0x6c0 lock_acquire.part.0+0xcc/0x220 lock_acquire+0x68/0x84 __mutex_lock+0x8c/0x414 mutex_lock_nested+0x34/0x40 rtnl_lock+0x24/0x30 sfp_bus_add_upstream+0x34/0x150 phy_sfp_probe+0x4c/0x94 [libphy] mv3310_probe+0x148/0x184 [marvell10g] phy_probe+0x8c/0x200 [libphy] call_driver_probe+0xbc/0x15c really_probe+0xc0/0x320 __driver_probe_device+0x84/0x120 driver_probe_device+0x44/0x120 __device_attach_driver+0xc4/0x160 bus_for_each_drv+0x80/0xe0 __device_attach+0xb0/0x1f0 device_initial_probe+0x1c/0x2c bus_probe_device+0xa4/0xb0 device_add+0x360/0x53c phy_device_register+0x60/0xa4 [libphy] fwnode_mdiobus_phy_device_register+0xc0/0x190 [fwnode_mdio] fwnode_mdiobus_register_phy+0x160/0xd80 [fwnode_mdio] of_mdiobus_register+0x140/0x340 [of_mdio] orion_mdio_probe+0x298/0x3c0 [mvmdio] platform_probe+0x70/0xe0 call_driver_probe+0x34/0x15c really_probe+0xc0/0x320 __driver_probe_device+0x84/0x120 driver_probe_device+0x44/0x120 __driver_attach+0x104/0x210 bus_for_each_dev+0x78/0xdc driver_attach+0x2c/0x3c bus_add_driver+0x184/0x240 driver_register+0x80/0x13c __platform_driver_register+0x30/0x3c xt_compat_calc_jump+0x28/0xa4 [x_tables] do_one_initcall+0x50/0x1b0 do_init_module+0x50/0x1fc load_module+0x684/0x744 __do_sys_finit_module+0xc4/0x140 __arm64_sys_finit_module+0x28/0x34 invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0x6c/0x1b0 do_el0_svc+0x34/0x44 el0_svc+0x48/0xf0 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a0/0x1a4 -> #0 (&dev->lock){+.+.}-{3:3}: check_prev_add+0xb4/0xc80 validate_chain+0x414/0x47c __lock_acquire+0x35c/0x6c0 lock_acquire.part.0+0xcc/0x220 lock_acquire+0x68/0x84 __mutex_lock+0x8c/0x414 mutex_lock_nested+0x34/0x40 phy_attach_direct+0x17c/0x3a0 [libphy] phylink_fwnode_phy_connect.part.0+0x70/0xe4 [phylink] phylink_fwnode_phy_connect+0x48/0x60 [phylink] mvpp2_open+0xec/0x2e0 [mvpp2] __dev_open+0x104/0x214 __dev_change_flags+0x1d4/0x254 dev_change_flags+0x2c/0x7c do_setlink+0x254/0xa50 __rtnl_newlink+0x430/0x514 rtnl_newlink+0x58/0x8c rtnetlink_rcv_msg+0x17c/0x560 netlink_rcv_skb+0x64/0x150 rtnetlink_rcv+0x20/0x30 netlink_unicast+0x1d4/0x2b4 netlink_sendmsg+0x1a4/0x400 ____sys_sendmsg+0x228/0x290 ___sys_sendmsg+0x88/0xec __sys_sendmsg+0x70/0xd0 __arm64_sys_sendmsg+0x2c/0x40 invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0x6c/0x1b0 do_el0_svc+0x34/0x44 el0_svc+0x48/0xf0 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a0/0x1a4 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(&dev->lock); lock(rtnl_mutex); lock(&dev->lock); *** DEADLOCK *** Fixes: 298e54fa810e ("net: phy: add core phylib sfp support") Reported-by: Marc Zyngier Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9e9fd8ff00f6..1785f1cead97 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3098,8 +3098,6 @@ static int phy_probe(struct device *dev) if (phydrv->flags & PHY_IS_INTERNAL) phydev->is_internal = true; - mutex_lock(&phydev->lock); - /* Deassert the reset signal */ phy_device_reset(phydev, 0); @@ -3188,12 +3186,10 @@ static int phy_probe(struct device *dev) phydev->state = PHY_READY; out: - /* Assert the reset signal */ + /* Re-assert the reset signal on error */ if (err) phy_device_reset(phydev, 1); - mutex_unlock(&phydev->lock); - return err; } @@ -3203,9 +3199,7 @@ static int phy_remove(struct device *dev) cancel_delayed_work_sync(&phydev->state_queue); - mutex_lock(&phydev->lock); phydev->state = PHY_DOWN; - mutex_unlock(&phydev->lock); sfp_bus_del_upstream(phydev->sfp_bus); phydev->sfp_bus = NULL; -- cgit From accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 3 Mar 2023 18:43:57 -0800 Subject: bnxt_en: Avoid order-5 memory allocation for TPA data The driver needs to keep track of all the possible concurrent TPA (GRO/LRO) completions on the aggregation ring. On P5 chips, the maximum number of concurrent TPA is 256 and the amount of memory we allocate is order-5 on systems using 4K pages. Memory allocation failure has been reported: NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1 CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1 Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022 Call Trace: dump_stack+0x57/0x6e warn_alloc.cold.120+0x7b/0xdd ? _cond_resched+0x15/0x30 ? __alloc_pages_direct_compact+0x15f/0x170 __alloc_pages_slowpath.constprop.108+0xc58/0xc70 __alloc_pages_nodemask+0x2d0/0x300 kmalloc_order+0x24/0xe0 kmalloc_order_trace+0x19/0x80 bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en] ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en] __bnxt_open_nic+0x12e/0x780 [bnxt_en] bnxt_open+0x10b/0x240 [bnxt_en] __dev_open+0xe9/0x180 __dev_change_flags+0x1af/0x220 dev_change_flags+0x21/0x60 do_setlink+0x35c/0x1100 Instead of allocating this big chunk of memory and dividing it up for the concurrent TPA instances, allocate each small chunk separately for each TPA instance. This will reduce it to order-0 allocations. Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.") Reviewed-by: Somnath Kotur Reviewed-by: Damodharam Ammepalli Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d4b1f2ebeac..f96d539b469c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3145,7 +3145,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) static void bnxt_free_tpa_info(struct bnxt *bp) { - int i; + int i, j; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; @@ -3153,8 +3153,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp) kfree(rxr->rx_tpa_idx_map); rxr->rx_tpa_idx_map = NULL; if (rxr->rx_tpa) { - kfree(rxr->rx_tpa[0].agg_arr); - rxr->rx_tpa[0].agg_arr = NULL; + for (j = 0; j < bp->max_tpa; j++) { + kfree(rxr->rx_tpa[j].agg_arr); + rxr->rx_tpa[j].agg_arr = NULL; + } } kfree(rxr->rx_tpa); rxr->rx_tpa = NULL; @@ -3163,14 +3165,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp) static int bnxt_alloc_tpa_info(struct bnxt *bp) { - int i, j, total_aggs = 0; + int i, j; bp->max_tpa = MAX_TPA; if (bp->flags & BNXT_FLAG_CHIP_P5) { if (!bp->max_tpa_v2) return 0; bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); - total_aggs = bp->max_tpa * MAX_SKB_FRAGS; } for (i = 0; i < bp->rx_nr_rings; i++) { @@ -3184,12 +3185,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) if (!(bp->flags & BNXT_FLAG_CHIP_P5)) continue; - agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL); - rxr->rx_tpa[0].agg_arr = agg; - if (!agg) - return -ENOMEM; - for (j = 1; j < bp->max_tpa; j++) - rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS; + for (j = 0; j < bp->max_tpa; j++) { + agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); + if (!agg) + return -ENOMEM; + rxr->rx_tpa[j].agg_arr = agg; + } rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), GFP_KERNEL); if (!rxr->rx_tpa_idx_map) -- cgit From 89b59a84cb166f1ab5b6de9830e61324937c661e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Fri, 3 Mar 2023 18:43:58 -0800 Subject: bnxt_en: Fix the double free during device removal Following warning reported by KASAN during driver unload ================================================================== BUG: KASAN: double-free in bnxt_remove_one+0x103/0x200 [bnxt_en] Free of addr ffff88814e8dd4c0 by task rmmod/17469 CPU: 47 PID: 17469 Comm: rmmod Kdump: loaded Tainted: G S 6.2.0-rc7+ #2 Hardware name: Dell Inc. PowerEdge R740/01YM03, BIOS 2.3.10 08/15/2019 Call Trace: dump_stack_lvl+0x33/0x46 print_report+0x17b/0x4b3 ? __call_rcu_common.constprop.79+0x27e/0x8c0 ? __pfx_free_object_rcu+0x10/0x10 ? __virt_addr_valid+0xe3/0x160 ? bnxt_remove_one+0x103/0x200 [bnxt_en] kasan_report_invalid_free+0x64/0xd0 ? bnxt_remove_one+0x103/0x200 [bnxt_en] ? bnxt_remove_one+0x103/0x200 [bnxt_en] __kasan_slab_free+0x179/0x1c0 ? bnxt_remove_one+0x103/0x200 [bnxt_en] __kmem_cache_free+0x194/0x350 bnxt_remove_one+0x103/0x200 [bnxt_en] pci_device_remove+0x62/0x110 device_release_driver_internal+0xf6/0x1c0 driver_detach+0x76/0xe0 bus_remove_driver+0x89/0x160 pci_unregister_driver+0x26/0x110 ? strncpy_from_user+0x188/0x1c0 bnxt_exit+0xc/0x24 [bnxt_en] __x64_sys_delete_module+0x21f/0x390 ? __pfx___x64_sys_delete_module+0x10/0x10 ? __pfx_mem_cgroup_handle_over_high+0x10/0x10 ? _raw_spin_lock+0x87/0xe0 ? __pfx__raw_spin_lock+0x10/0x10 ? __audit_syscall_entry+0x185/0x210 ? ktime_get_coarse_real_ts64+0x51/0x80 ? syscall_trace_enter.isra.18+0x126/0x1a0 do_syscall_64+0x37/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7effcb6fd71b Code: 73 01 c3 48 8b 0d 6d 17 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 3d 17 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffeada270b8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 00005623660e0750 RCX: 00007effcb6fd71b RDX: 000000000000000a RSI: 0000000000000800 RDI: 00005623660e07b8 RBP: 0000000000000000 R08: 00007ffeada26031 R09: 0000000000000000 R10: 00007effcb771280 R11: 0000000000000206 R12: 00007ffeada272e0 R13: 00007ffeada28bc4 R14: 00005623660e02a0 R15: 00005623660e0750 Auxiliary device structures are freed in bnxt_aux_dev_release. So avoid calling kfree from bnxt_remove_one. Also, set bp->edev to NULL before freeing the auxilary private structure. Fixes: d80d88b0dfff ("bnxt_en: Add auxiliary driver support") Reviewed-by: Ajit Khaparde Reviewed-by: Andy Gospodarek Signed-off-by: Selvin Xavier Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f96d539b469c..808236dc898b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13205,8 +13205,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_free_hwrm_resources(bp); bnxt_ethtool_free(bp); bnxt_dcb_free(bp); - kfree(bp->edev); - bp->edev = NULL; kfree(bp->ptp_cfg); bp->ptp_cfg = NULL; kfree(bp->fw_health); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index d4cc9c371e7b..e7b5e28ee29f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -317,9 +317,11 @@ static void bnxt_aux_dev_release(struct device *dev) { struct bnxt_aux_priv *aux_priv = container_of(dev, struct bnxt_aux_priv, aux_dev.dev); + struct bnxt *bp = netdev_priv(aux_priv->edev->net); ida_free(&bnxt_aux_dev_ids, aux_priv->id); kfree(aux_priv->edev->ulp_tbl); + bp->edev = NULL; kfree(aux_priv->edev); kfree(aux_priv); } -- cgit From 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 2 Mar 2023 17:48:31 -0800 Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type It seems that change was unintentional, we have userspace code that needs the mark while listening for events like REPLY, DESTROY, etc. Also include 0-marks in requested dumps, as they were before that fix. Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark") Signed-off-by: Ivan Delalande Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c11dff91d52d..bfc3aaa2c872 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,11 +328,12 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct, + bool dump) { u32 mark = READ_ONCE(ct->mark); - if (!mark) + if (!mark && !dump) return 0; if (nla_put_be32(skb, CTA_MARK, htonl(mark))) @@ -343,7 +344,7 @@ nla_put_failure: return -1; } #else -#define ctnetlink_dump_mark(a, b) (0) +#define ctnetlink_dump_mark(a, b, c) (0) #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -548,7 +549,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct, true) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -831,8 +832,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if (events & (1 << IPCT_MARK) && - ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -2735,7 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct, true) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) -- cgit From 4a02426787bf024dafdb79b362285ee325de3f5e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Mar 2023 10:58:56 +0100 Subject: netfilter: tproxy: fix deadlock due to missing BH disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xtables packet traverser performs an unconditional local_bh_disable(), but the nf_tables evaluation loop does not. Functions that are called from either xtables or nftables must assume that they can be called in process context. inet_twsk_deschedule_put() assumes that no softirq interrupt can occur. If tproxy is used from nf_tables its possible that we'll deadlock trying to aquire a lock already held in process context. Add a small helper that takes care of this and use it. Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/ Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Reported-and-tested-by: Major Dávid Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tproxy.h | 7 +++++++ net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +- net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 82d0e41b76f2..faa108b1ba67 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) return false; } +static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) +{ + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); +} + /* assign a socket to the skb -- consumes sk */ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index b22b2c745c76..69e331799604 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, hp->source, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 929502e51203..52f828bb5a83 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } -- cgit From 32db18d606c9a6eac7ac8bb12b4bf0e2eb1d5d14 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 28 Feb 2023 14:45:22 +0700 Subject: bpf, doc: Do not link to docs.kernel.org for kselftest link The question on how to run BPF selftests have a reference link to kernel selftest documentation (Documentation/dev-tools/kselftest.rst). However, it uses external link to the documentation at kernel.org/docs (aka docs.kernel.org) instead, which requires Internet access. Fix this and replace the link with internal linking, by using :doc: directive while keeping the anchor text. Fixes: b7a27c3aafa252 ("bpf, doc: howto use/run the BPF selftests") Signed-off-by: Bagas Sanjaya Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230228074523.11493-2-bagasdotme@gmail.com --- Documentation/bpf/bpf_devel_QA.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 715f7321020f..eb087c599ffa 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -461,8 +461,8 @@ needed:: $ sudo make run_tests -See the kernels selftest `Documentation/dev-tools/kselftest.rst`_ -document for further documentation. +See :doc:`kernel selftest documentation ` +for details. To maximize the number of tests passing, the .config of the kernel under test should match the config file fragment in @@ -688,7 +688,5 @@ when: .. _netdev-FAQ: Documentation/process/maintainer-netdev.rst .. _selftests: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ -.. _Documentation/dev-tools/kselftest.rst: - https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html Happy BPF hacking! -- cgit From b7abcd9c656b982a99e18f795bb1cf81f84b656d Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 28 Feb 2023 14:45:23 +0700 Subject: bpf, doc: Link to submitting-patches.rst for general patch submission info The link for patch submission information in general refers to index page for "Working with the kernel development community" section of kernel docs, whereas the link should have been Documentation/process/submitting-patches.rst instead. Fix it by replacing the index target with the appropriate doc. Fixes: 542228384888f5 ("bpf, doc: convert bpf_devel_QA.rst to use RST formatting") Signed-off-by: Bagas Sanjaya Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230228074523.11493-3-bagasdotme@gmail.com --- Documentation/bpf/bpf_devel_QA.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index eb087c599ffa..b421d94dc9f2 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -7,8 +7,8 @@ workflows related to reporting bugs, submitting patches, and queueing patches for stable kernels. For general information about submitting patches, please refer to -`Documentation/process/`_. This document only describes additional specifics -related to BPF. +Documentation/process/submitting-patches.rst. This document only describes +additional specifics related to BPF. .. contents:: :local: @@ -684,7 +684,6 @@ when: .. Links -.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/ .. _netdev-FAQ: Documentation/process/maintainer-netdev.rst .. _selftests: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ -- cgit From 294635a8165a31408a8b3a24f9c74849ca3d8701 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Feb 2023 17:36:07 +0100 Subject: bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit &xdp_buff and &xdp_frame are bound in a way that xdp_buff->data_hard_start == xdp_frame It's always the case and e.g. xdp_convert_buff_to_frame() relies on this. IOW, the following: for (u32 i = 0; i < 0xdead; i++) { xdpf = xdp_convert_buff_to_frame(&xdp); xdp_convert_frame_to_buff(xdpf, &xdp); } shouldn't ever modify @xdpf's contents or the pointer itself. However, "live packet" code wrongly treats &xdp_frame as part of its context placed *before* the data_hard_start. With such flow, data_hard_start is sizeof(*xdpf) off to the right and no longer points to the XDP frame. Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several places and praying that there are no more miscalcs left somewhere in the code, unionize ::frm with ::data in a flex array, so that both starts pointing to the actual data_hard_start and the XDP frame actually starts being a part of it, i.e. a part of the headroom, not the context. A nice side effect is that the maximum frame size for this mode gets increased by 40 bytes, as xdp_buff::frame_sz includes everything from data_hard_start (-> includes xdpf already) to the end of XDP/skb shared info. Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it hardcoded for 64 bit && 4k pages, it can be made more flexible later on. Minor: align `&head->data` with how `head->frm` is assigned for consistency. Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for clarity. (was found while testing XDP traffic generator on ice, which calls xdp_convert_frame_to_buff() for each XDP frame) Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN") Acked-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230224163607.2994755-1-aleksander.lobakin@intel.com Signed-off-by: Martin KaFai Lau --- net/bpf/test_run.c | 19 +++++++++++++------ .../selftests/bpf/prog_tests/xdp_do_redirect.c | 7 ++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6f3d654b3339..f81b24320a36 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -97,8 +97,11 @@ reset: struct xdp_page_head { struct xdp_buff orig_ctx; struct xdp_buff ctx; - struct xdp_frame frm; - u8 data[]; + union { + /* ::data_hard_start starts here */ + DECLARE_FLEX_ARRAY(struct xdp_frame, frame); + DECLARE_FLEX_ARRAY(u8, data); + }; }; struct xdp_test_data { @@ -113,6 +116,10 @@ struct xdp_test_data { u32 frame_cnt; }; +/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE + * must be updated accordingly this gets changed, otherwise BPF selftests + * will fail. + */ #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) #define TEST_XDP_MAX_BATCH 256 @@ -132,8 +139,8 @@ static void xdp_test_run_init_page(struct page *page, void *arg) headroom -= meta_len; new_ctx = &head->ctx; - frm = &head->frm; - data = &head->data; + frm = head->frame; + data = head->data; memcpy(data + headroom, orig_ctx->data_meta, frm_len); xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq); @@ -223,7 +230,7 @@ static void reset_ctx(struct xdp_page_head *head) head->ctx.data = head->orig_ctx.data; head->ctx.data_meta = head->orig_ctx.data_meta; head->ctx.data_end = head->orig_ctx.data_end; - xdp_update_frame_from_buff(&head->ctx, &head->frm); + xdp_update_frame_from_buff(&head->ctx, head->frame); } static int xdp_recv_frames(struct xdp_frame **frames, int nframes, @@ -285,7 +292,7 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, head = phys_to_virt(page_to_phys(page)); reset_ctx(head); ctx = &head->ctx; - frm = &head->frm; + frm = head->frame; xdp->frame_cnt++; act = bpf_prog_run_xdp(prog, ctx); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 2666c84dbd01..7271a18ab3e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -65,12 +65,13 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) } /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) - - * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes + * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM = + * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one. */ #if defined(__s390x__) -#define MAX_PKT_SIZE 3176 +#define MAX_PKT_SIZE 3216 #else -#define MAX_PKT_SIZE 3368 +#define MAX_PKT_SIZE 3408 #endif static void test_max_pkt_size(int fd) { -- cgit From 9b459804ff9973e173fabafba2a1319f771e85fa Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 6 Mar 2023 11:21:37 +0000 Subject: btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR btf_datasec_resolve contains a bug that causes the following BTF to fail loading: [1] DATASEC a size=2 vlen=2 type_id=4 offset=0 size=1 type_id=7 offset=1 size=1 [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) [3] PTR (anon) type_id=2 [4] VAR a type_id=3 linkage=0 [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) [6] TYPEDEF td type_id=5 [7] VAR b type_id=6 linkage=0 This error message is printed during btf_check_all_types: [1] DATASEC a size=2 vlen=2 type_id=7 offset=1 size=1 Invalid type By tracing btf_*_resolve we can pinpoint the problem: btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0 btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0 btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0 btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0 btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22 The last invocation of btf_datasec_resolve should invoke btf_var_resolve by means of env_stack_push, instead it returns EINVAL. The reason is that env_stack_push is never executed for the second VAR. if (!env_type_is_resolve_sink(env, var_type) && !env_type_is_resolved(env, var_type_id)) { env_stack_set_next_member(env, i + 1); return env_stack_push(env, var_type, var_type_id); } env_type_is_resolve_sink() changes its behaviour based on resolve_mode. For RESOLVE_PTR, we can simplify the if condition to the following: (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved() Since we're dealing with a VAR the clause evaluates to false. This is not sufficient to trigger the bug however. The log output and EINVAL are only generated if btf_type_id_size() fails. if (!btf_type_id_size(btf, &type_id, &type_size)) { btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); return -EINVAL; } Most types are sized, so for example a VAR referring to an INT is not a problem. The bug is only triggered if a VAR points at a modifier. Since we skipped btf_var_resolve that modifier was also never resolved, which means that btf_resolved_type_id returns 0 aka VOID for the modifier. This in turn causes btf_type_id_size to return NULL, triggering EINVAL. To summarise, the following conditions are necessary: - VAR pointing at PTR, STRUCT, UNION or ARRAY - Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or TYPE_TAG The fix is to reset resolve_mode to RESOLVE_TBD before attempting to resolve a VAR from a DATASEC. Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec") Signed-off-by: Lorenz Bauer Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/btf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index fa22ec79ac0e..73780748404c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4569,6 +4569,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env, struct btf *btf = env->btf; u16 i; + env->resolve_mode = RESOLVE_TBD; for_each_vsi_from(i, v->next_member, v->t, vsi) { u32 var_type_id = vsi->type, type_id, type_size = 0; const struct btf_type *var_type = btf_type_by_id(env->btf, -- cgit From dfdd608c3b365f0fd49d7e13911ebcde06b9865b Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 6 Mar 2023 11:21:38 +0000 Subject: selftests/bpf: check that modifier resolves after pointer Add a regression test that ensures that a VAR pointing at a modifier which follows a PTR (or STRUCT or ARRAY) is resolved correctly by the datasec validator. Signed-off-by: Lorenz Bauer Link: https://lore.kernel.org/r/20230306112138.155352-3-lmb@isovalent.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/btf.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index cbb600be943d..210d643fda6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -879,6 +879,34 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Invalid elem", }, +{ + .descr = "var after datasec, ptr followed by modifier", + .raw_types = { + /* .bss section */ /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), + sizeof(void*)+4), + BTF_VAR_SECINFO_ENC(4, 0, sizeof(void*)), + BTF_VAR_SECINFO_ENC(6, sizeof(void*), 4), + /* int */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + BTF_VAR_ENC(NAME_TBD, 3, 0), /* [4] */ + /* const int */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0c\0", + .str_sec_size = sizeof("\0a\0b\0c\0"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = ".bss", + .key_size = sizeof(int), + .value_size = sizeof(void*)+4, + .key_type_id = 0, + .value_type_id = 1, + .max_entries = 1, +}, /* Test member exceeds the size of struct. * * struct A { -- cgit From 58aac3a2ef414fea6d7fdf823ea177744a087d13 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 4 Mar 2023 11:52:44 +0100 Subject: net: phy: smsc: fix link up detection in forced irq mode Currently link up can't be detected in forced mode if polling isn't used. Only link up interrupt source we have is aneg complete which isn't applicable in forced mode. Therefore we have to use energy-on as link up indicator. Fixes: 7365494550f6 ("net: phy: smsc: skip ENERGYON interrupt if disabled") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index ac7481ce2fc1..00d9eff91dcf 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -44,7 +44,6 @@ static struct smsc_hw_stat smsc_hw_stats[] = { }; struct smsc_phy_priv { - u16 intmask; bool energy_enable; }; @@ -57,7 +56,6 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev) static int smsc_phy_config_intr(struct phy_device *phydev) { - struct smsc_phy_priv *priv = phydev->priv; int rc; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { @@ -65,14 +63,9 @@ static int smsc_phy_config_intr(struct phy_device *phydev) if (rc) return rc; - priv->intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6; - if (priv->energy_enable) - priv->intmask |= MII_LAN83C185_ISF_INT7; - - rc = phy_write(phydev, MII_LAN83C185_IM, priv->intmask); + rc = phy_write(phydev, MII_LAN83C185_IM, + MII_LAN83C185_ISF_INT_PHYLIB_EVENTS); } else { - priv->intmask = 0; - rc = phy_write(phydev, MII_LAN83C185_IM, 0); if (rc) return rc; @@ -85,7 +78,6 @@ static int smsc_phy_config_intr(struct phy_device *phydev) static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) { - struct smsc_phy_priv *priv = phydev->priv; int irq_status; irq_status = phy_read(phydev, MII_LAN83C185_ISF); @@ -96,7 +88,7 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } - if (!(irq_status & priv->intmask)) + if (!(irq_status & MII_LAN83C185_ISF_INT_PHYLIB_EVENTS)) return IRQ_NONE; phy_trigger_machine(phydev); -- cgit From 193250ace270fecd586dd2d0dfbd9cbd2ade977f Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 4 Mar 2023 13:43:20 +0000 Subject: net: ethernet: mtk_eth_soc: fix RX data corruption issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix data corruption issue with SerDes connected PHYs operating at 1.25 Gbps speed where we could previously observe about 30% packet loss while the bad packet counter was increasing. As almost all boards with MediaTek MT7622 or MT7986 use either the MT7531 switch IC operating at 3.125Gbps SerDes rate or single-port PHYs using rate-adaptation to 2500Base-X mode, this issue only got exposed now when we started trying to use SFP modules operating with 1.25 Gbps with the BananaPi R3 board. The fix is to set bit 12 which disables the RX FIFO clear function when setting up MAC MCR, MediaTek SDK did the same change stating: "If without this patch, kernel might receive invalid packets that are corrupted by GMAC."[1] [1]: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/d8a2975939a12686c4a95c40db21efdc3f821f63 Fixes: 42c03844e93d ("net-next: mediatek: add support for MediaTek MT7622 SoC") Tested-by: Bjørn Mork Signed-off-by: Daniel Golle Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/138da2735f92c8b6f8578ec2e5a794ee515b665f.1677937317.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 14be6ea51b88..3cb43623d3db 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -616,7 +616,8 @@ static int mtk_mac_finish(struct phylink_config *config, unsigned int mode, mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); mcr_new = mcr_cur; mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | - MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK; + MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK | + MAC_MCR_RX_FIFO_CLR_DIS; /* Only update control register when needed! */ if (mcr_new != mcr_cur) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index afc9d52e79bf..b65de174c3d9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -397,6 +397,7 @@ #define MAC_MCR_FORCE_MODE BIT(15) #define MAC_MCR_TX_EN BIT(14) #define MAC_MCR_RX_EN BIT(13) +#define MAC_MCR_RX_FIFO_CLR_DIS BIT(12) #define MAC_MCR_BACKOFF_EN BIT(9) #define MAC_MCR_BACKPR_EN BIT(8) #define MAC_MCR_FORCE_RX_FC BIT(5) -- cgit From e539a105f947b9db470fec39fe91d85fe737a432 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 4 Mar 2023 11:26:10 -0800 Subject: net: tls: fix device-offloaded sendpage straddling records Adrien reports that incorrect data is transmitted when a single page straddles multiple records. We would transmit the same data in all iterations of the loop. Reported-by: Adrien Moulin Link: https://lore.kernel.org/all/61481278.42813558.1677845235112.JavaMail.zimbra@corp.free.fr Fixes: c1318b39c7d3 ("tls: Add opt-in zerocopy mode of sendfile()") Tested-by: Adrien Moulin Reviewed-by: Tariq Toukan Acked-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20230304192610.3818098-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 6c593788dc25..a7cc4f9faac2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -508,6 +508,8 @@ handle_error: zc_pfrag.offset = iter_offset.offset; zc_pfrag.size = copy; tls_append_frag(record, &zc_pfrag, copy); + + iter_offset.offset += copy; } else if (copy) { copy = min_t(size_t, copy, pfrag->size - pfrag->offset); -- cgit From c77737b736ceb50fdf150434347dbd81ec76dbb1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Mar 2023 05:22:54 +0000 Subject: netfilter: conntrack: adopt safer max chain length Customers using GKE 1.25 and 1.26 are facing conntrack issues root caused to commit c9c3b6811f74 ("netfilter: conntrack: make max chain length random"). Even if we assume Uniform Hashing, a bucket often reachs 8 chained items while the load factor of the hash table is smaller than 0.5 With a limit of 16, we reach load factors of 3. With a limit of 32, we reach load factors of 11. With a limit of 40, we reach load factors of 15. With a limit of 50, we reach load factors of 24. This patch changes MIN_CHAINLEN to 50, to minimize risks. Ideally, we could in the future add a cushion based on expected load factor (2 * nf_conntrack_max / nf_conntrack_buckets), because some setups might expect unusual values. Fixes: c9c3b6811f74 ("netfilter: conntrack: make max chain length random") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7250082e7de5..c6a6a6099b4e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -96,8 +96,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex); #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) -#define MIN_CHAINLEN 8u -#define MAX_CHAINLEN (32u - MIN_CHAINLEN) +#define MIN_CHAINLEN 50u +#define MAX_CHAINLEN (80u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; -- cgit From 418383e6ed6b4624a54ec05c535f13d184fbf33b Mon Sep 17 00:00:00 2001 From: Enrico Sau Date: Mon, 6 Mar 2023 12:59:33 +0100 Subject: net: usb: cdc_mbim: avoid altsetting toggling for Telit FE990 Add quirk CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE for Telit FE990 0x1081 composition in order to avoid bind error. Signed-off-by: Enrico Sau Link: https://lore.kernel.org/r/20230306115933.198259-1-enrico.sau@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/cdc_mbim.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index c89639381eca..cd4083e0b3b9 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -665,6 +665,11 @@ static const struct usb_device_id mbim_devs[] = { .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, + /* Telit FE990 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1081, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, + }, + /* default entry */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_zlp, -- cgit From 382e363d5bed0cec5807b35761d14e55955eee63 Mon Sep 17 00:00:00 2001 From: Enrico Sau Date: Mon, 6 Mar 2023 13:05:28 +0100 Subject: net: usb: qmi_wwan: add Telit 0x1080 composition Add the following Telit FE990 composition: 0x1080: tty, adb, rmnet, tty, tty, tty, tty Signed-off-by: Enrico Sau Link: https://lore.kernel.org/r/20230306120528.198842-1-enrico.sau@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a808d718c012..571e37e67f9c 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1364,6 +1364,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ -- cgit From fef3f92e8a4214652d8f33f50330dc5a92efbf11 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Fri, 27 Jan 2023 14:24:10 +0100 Subject: ice: Fix DSCP PFC TLV creation When creating the TLV to send to the FW for configuring DSCP mode PFC,the PFCENABLE field was being masked with a 4 bit mask (0xF), but this is an 8 bit bitmask for enabled classes for PFC. This means that traffic classes 4-7 could not be enabled for PFC. Remove the mask completely, as it is not necessary, as we are assigning 8 bits to an 8 bit field. Fixes: 2a87bd73e50d ("ice: Add DSCP support") Signed-off-by: Dave Ertman Signed-off-by: Karen Ostrowska Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dcb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index c557dfc50aad..396e555023aa 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -1411,7 +1411,7 @@ ice_add_dscp_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) tlv->ouisubtype = htonl(ouisubtype); buf[0] = dcbcfg->pfc.pfccap & 0xF; - buf[1] = dcbcfg->pfc.pfcena & 0xF; + buf[1] = dcbcfg->pfc.pfcena; } /** -- cgit From c4a9c8e78aad0fd9b79c9b1e83bab3288a15ce3c Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 13 Feb 2023 12:27:33 +0100 Subject: ice: don't ignore return codes in VSI related code There were few smatch warnings reported by Dan: - ice_vsi_cfg_xdp_txqs can return 0 instead of ret, which is cleaner - return values in ice_vsi_cfg_def were ignored - in ice_vsi_rebuild return value was ignored in case rebuild failed, it was a never reached code, however, rewrite it for clarity. - ice_vsi_cfg_tc can return 0 instead of ret Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Reported-by: Dan Carpenter Signed-off-by: Michal Swiatkowski Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 781475480ff2..0f52ea38b6f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2126,7 +2126,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) ice_tx_xsk_pool(vsi, i); - return ret; + return 0; } /** @@ -2693,12 +2693,14 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) return ret; /* allocate memory for Tx/Rx ring stat pointers */ - if (ice_vsi_alloc_stat_arrays(vsi)) + ret = ice_vsi_alloc_stat_arrays(vsi); + if (ret) goto unroll_vsi_alloc; ice_alloc_fd_res(vsi); - if (ice_vsi_get_qs(vsi)) { + ret = ice_vsi_get_qs(vsi); + if (ret) { dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", vsi->idx); goto unroll_vsi_alloc_stat; @@ -2811,6 +2813,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) break; default: /* clean up the resources and exit */ + ret = -EINVAL; goto unroll_vsi_init; } @@ -3508,10 +3511,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (vsi_flags & ICE_VSI_FLAG_INIT) { ret = -EIO; goto err_vsi_cfg_tc_lan; - } else { - kfree(coalesce); - return ice_schedule_reset(pf, ICE_RESET_PFR); } + + kfree(coalesce); + return ice_schedule_reset(pf, ICE_RESET_PFR); } ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq); @@ -3759,7 +3762,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) dev = ice_pf_to_dev(pf); if (vsi->tc_cfg.ena_tc == ena_tc && vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) - return ret; + return 0; ice_for_each_traffic_class(i) { /* build bitmap of enabled TCs */ -- cgit From 8f5c5a790e3025d6eca96bf7ee5e3873dc92373f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2023 16:25:36 +0100 Subject: ethernet: ice: avoid gcc-9 integer overflow warning With older compilers like gcc-9, the calculation of the vlan priority field causes a false-positive warning from the byteswap: In file included from drivers/net/ethernet/intel/ice/ice_tc_lib.c:4: drivers/net/ethernet/intel/ice/ice_tc_lib.c: In function 'ice_parse_cls_flower': include/uapi/linux/swab.h:15:15: error: integer overflow in expression '(int)(short unsigned int)((int)match.key->..vlan_priority << 13) & 57344 & 255' of type 'int' results in '0' [-Werror=overflow] 15 | (((__u16)(x) & (__u16)0x00ffU) << 8) | \ | ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~ include/uapi/linux/swab.h:106:2: note: in expansion of macro '___constant_swab16' 106 | ___constant_swab16(x) : \ | ^~~~~~~~~~~~~~~~~~ include/uapi/linux/byteorder/little_endian.h:42:43: note: in expansion of macro '__swab16' 42 | #define __cpu_to_be16(x) ((__force __be16)__swab16((x))) | ^~~~~~~~ include/linux/byteorder/generic.h:96:21: note: in expansion of macro '__cpu_to_be16' 96 | #define cpu_to_be16 __cpu_to_be16 | ^~~~~~~~~~~~~ drivers/net/ethernet/intel/ice/ice_tc_lib.c:1458:5: note: in expansion of macro 'cpu_to_be16' 1458 | cpu_to_be16((match.key->vlan_priority << | ^~~~~~~~~~~ After a change to be16_encode_bits(), the code becomes more readable to both people and compilers, which avoids the warning. Fixes: 34800178b302 ("ice: Add support for VLAN priority filters in switchdev") Suggested-by: Alexander Lobakin Signed-off-by: Arnd Bergmann Reviewed-by: Alexander Lobakin Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 6b48cbc049c6..76f29a5bf8d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -1455,8 +1455,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, if (match.mask->vlan_priority) { fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_PRIO; headers->vlan_hdr.vlan_prio = - cpu_to_be16((match.key->vlan_priority << - VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK); + be16_encode_bits(match.key->vlan_priority, + VLAN_PRIO_MASK); } if (match.mask->vlan_tpid) @@ -1489,8 +1489,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, if (match.mask->vlan_priority) { fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN_PRIO; headers->cvlan_hdr.vlan_prio = - cpu_to_be16((match.key->vlan_priority << - VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK); + be16_encode_bits(match.key->vlan_priority, + VLAN_PRIO_MASK); } } -- cgit From 7d834b4d1ab66c48e8c0810fdeadaabb80fa2c81 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 7 Mar 2023 00:26:50 +0300 Subject: nfc: change order inside nfc_se_io error path cb_context should be freed on the error path in nfc_se_io as stated by commit 25ff6f8a5a3b ("nfc: fix memory leak of se_io context in nfc_genl_se_io"). Make the error path in nfc_se_io unwind everything in reverse order, i.e. free the cb_context after unlocking the device. Suggested-by: Krzysztof Kozlowski Signed-off-by: Fedor Pchelkin Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230306212650.230322-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 348bf561bc9f..b9264e730fd9 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1446,8 +1446,8 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, return rc; error: - kfree(cb_context); device_unlock(&dev->dev); + kfree(cb_context); return rc; } -- cgit From e7b15acdc10f74872fdae1482b3f837818362085 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Mar 2023 11:20:18 -0800 Subject: mailmap: add entry for Maxim Mikityanskiy Map Maxim's old corporate addresses to his personal one. Link: https://lore.kernel.org/r/20230306192018.3894988-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index a872c9683958..04bd44774ad0 100644 --- a/.mailmap +++ b/.mailmap @@ -304,6 +304,8 @@ Mauro Carvalho Chehab Mauro Carvalho Chehab Mauro Carvalho Chehab Mauro Carvalho Chehab +Maxim Mikityanskiy +Maxim Mikityanskiy Maxime Ripard Maxime Ripard Mayuresh Janorkar -- cgit From b1649b0fe98c2c7eed156c957f6f524d5660d859 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 6 Mar 2023 11:44:05 -0800 Subject: mailmap: update entries for Stephen Hemminger Map all my old email addresses to current address. Signed-off-by: Stephen Hemminger Link: https://lore.kernel.org/r/20230306194405.108236-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- .mailmap | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 04bd44774ad0..4f37ff13a346 100644 --- a/.mailmap +++ b/.mailmap @@ -411,7 +411,10 @@ Shuah Khan Simon Arlott Simon Kelley Stéphane Witzmann -Stephen Hemminger +Stephen Hemminger +Stephen Hemminger +Stephen Hemminger +Stephen Hemminger Steve Wise Steve Wise Subash Abhinov Kasiviswanathan -- cgit From 37d9df224d1eec1b434fe9ffa40104c756478c29 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Mar 2023 12:04:57 -0800 Subject: ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause I was intending to make all the Netlink Spec code BSD-3-Clause to ease the adoption but it appears that: - I fumbled the uAPI and used "GPL WITH uAPI note" there - it gives people pause as they expect GPL in the kernel As suggested by Chuck re-license under dual. This gives us benefit of full BSD freedom while fulfilling the broad "kernel is under GPL" expectations. Link: https://lore.kernel.org/all/20230304120108.05dd44c5@kernel.org/ Link: https://lore.kernel.org/r/20230306200457.3903854-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink-c.yaml | 2 +- Documentation/netlink/genetlink-legacy.yaml | 2 +- Documentation/netlink/genetlink.yaml | 2 +- Documentation/netlink/specs/ethtool.yaml | 2 ++ Documentation/netlink/specs/fou.yaml | 2 ++ Documentation/netlink/specs/netdev.yaml | 2 ++ Documentation/userspace-api/netlink/specs.rst | 3 +++ include/uapi/linux/fou.h | 2 +- include/uapi/linux/netdev.h | 2 +- net/core/netdev-genl-gen.c | 2 +- net/core/netdev-genl-gen.h | 2 +- net/ipv4/fou_nl.c | 2 +- net/ipv4/fou_nl.h | 2 +- tools/include/uapi/linux/netdev.h | 2 +- tools/net/ynl/cli.py | 2 +- tools/net/ynl/lib/__init__.py | 2 +- tools/net/ynl/lib/nlspec.py | 2 +- tools/net/ynl/lib/ynl.py | 2 +- tools/net/ynl/ynl-gen-c.py | 7 ++++--- tools/net/ynl/ynl-regen.sh | 2 +- 20 files changed, 28 insertions(+), 18 deletions(-) diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index bbcfa2472b04..f082a5ad7cf1 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-c.yaml# diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 5642925c4ceb..c6b8c77f7d12 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml# diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 62a922755ce2..b2d56ab9e615 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml# diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 35c462bce56f..18ecb7d90cbe 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + name: ethtool protocol: genetlink-legacy diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index cca4cf98f03a..cff104288723 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + name: fou protocol: genetlink-legacy diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index ba9ee13cf729..24de747b5344 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + name: netdev doc: diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 32e53328d113..2122e0c4a399 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -24,6 +24,9 @@ YAML specifications can be found under ``Documentation/netlink/specs/`` This document describes details of the schema. See :doc:`intro-specs` for a practical starting guide. +All specs must be licensed under ``GPL-2.0-only OR BSD-3-Clause`` +to allow for easy adoption in user space code. + Compatibility levels ==================== diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index 19ebbef41a63..5041c3598493 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN uapi header */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 588391447bfb..8c4e3e536c04 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 48812ec843f5..9e10802587fc 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel source */ diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index b16dc7e026bb..2c5fc7d1e8a7 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel header */ diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 6c3820f41dd5..5c14fe030eda 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel source */ diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index b7a68121ce6f..58b1e1ed4b3b 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel header */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 588391447bfb..8c4e3e536c04 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index db410b74d539..ffaa8038aa8c 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import argparse import json diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py index 3c73f59eabab..a2cb8b16d6f1 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/lib/__init__.py @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause from .nlspec import SpecAttr, SpecAttrSet, SpecFamily, SpecOperation from .ynl import YnlFamily diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 9d394e50de23..0a2cfb5862aa 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import collections import importlib diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 1c7411ee04dc..a842adc8e87e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import functools import os diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 62f8f2c3c56c..c940ca834d3f 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause import argparse import collections @@ -2127,12 +2128,12 @@ def main(): _, spec_kernel = find_kernel_root(args.spec) if args.mode == 'uapi': - cw.p('/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */') + cw.p('/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */') else: if args.header: - cw.p('/* SPDX-License-Identifier: BSD-3-Clause */') + cw.p('/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */') else: - cw.p('// SPDX-License-Identifier: BSD-3-Clause') + cw.p('// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause') cw.p("/* Do not edit directly, auto-generated from: */") cw.p(f"/*\t{spec_kernel} */") cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */") diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index 43989ae48ed0..74f5de1c2399 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause TOOL=$(dirname $(realpath $0))/ynl-gen-c.py -- cgit From ce7ca794712f186da99719e8b4e97bd5ddbb04c3 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Tue, 7 Mar 2023 11:23:46 +0800 Subject: net/smc: fix fallback failed while sendmsg with fastopen Before determining whether the msg has unsupported options, it has been prematurely terminated by the wrong status check. For the application, the general usages of MSG_FASTOPEN likes fd = socket(...) /* rather than connect */ sendto(fd, data, len, MSG_FASTOPEN) Hence, We need to check the flag before state check, because the sock state here is always SMC_INIT when applications tries MSG_FASTOPEN. Once we found unsupported options, fallback it to TCP. Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback") Signed-off-by: D. Wythe Signed-off-by: Simon Horman v2 -> v1: Optimize code style Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a4cccdfdc00a..ff6dd86bdc9f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2657,16 +2657,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = -EPIPE; + int rc; smc = smc_sk(sk); lock_sock(sk); - if ((sk->sk_state != SMC_ACTIVE) && - (sk->sk_state != SMC_APPCLOSEWAIT1) && - (sk->sk_state != SMC_INIT)) - goto out; + /* SMC does not support connect with fastopen */ if (msg->msg_flags & MSG_FASTOPEN) { + /* not connected yet, fallback */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); if (rc) @@ -2675,6 +2673,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) rc = -EINVAL; goto out; } + } else if ((sk->sk_state != SMC_ACTIVE) && + (sk->sk_state != SMC_APPCLOSEWAIT1) && + (sk->sk_state != SMC_INIT)) { + rc = -EPIPE; + goto out; } if (smc->use_fallback) { -- cgit From ea9dd2e5c6d12c8b65ce7514c8359a70eeaa0e70 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 7 Mar 2023 16:19:08 +0530 Subject: octeontx2-af: Unlock contexts in the queue context cache in case of fault detection NDC caches contexts of frequently used queue's (Rx and Tx queues) contexts. Due to a HW errata when NDC detects fault/poision while accessing contexts it could go into an illegal state where a cache line could get locked forever. To makesure all cache lines in NDC are available for optimum performance upon fault/lockerror/posion errors scan through all cache lines in NDC and clear the lock bit. Fixes: 4a3581cd5995 ("octeontx2-af: NPA AQ instruction enqueue support") Signed-off-by: Suman Ghosh Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 5 ++ .../ethernet/marvell/octeontx2/af/rvu_debugfs.c | 7 ++- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 16 +++++- .../net/ethernet/marvell/octeontx2/af/rvu_npa.c | 58 +++++++++++++++++++++- .../net/ethernet/marvell/octeontx2/af/rvu_reg.h | 3 ++ 5 files changed, 82 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 389663a13d1d..ef721caeac49 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -884,6 +884,9 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); int rvu_cpt_init(struct rvu *rvu); +#define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) +#define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) + /* CN10K RVU */ int rvu_set_channels_base(struct rvu *rvu); void rvu_program_channels(struct rvu *rvu); @@ -902,6 +905,8 @@ static inline void rvu_dbg_init(struct rvu *rvu) {} static inline void rvu_dbg_exit(struct rvu *rvu) {} #endif +int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr); + /* RVU Switch */ void rvu_switch_enable(struct rvu *rvu); void rvu_switch_disable(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index fa280ebd3052..26cfa501f1a1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -198,9 +198,6 @@ enum cpt_eng_type { CPT_IE_TYPE = 3, }; -#define NDC_MAX_BANK(rvu, blk_addr) (rvu_read64(rvu, \ - blk_addr, NDC_AF_CONST) & 0xFF) - #define rvu_dbg_NULL NULL #define rvu_dbg_open_NULL NULL @@ -1448,6 +1445,7 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr) struct nix_hw *nix_hw; struct rvu *rvu; int bank, max_bank; + u64 ndc_af_const; if (blk_addr == BLKADDR_NDC_NPA0) { rvu = s->private; @@ -1456,7 +1454,8 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr) rvu = nix_hw->rvu; } - max_bank = NDC_MAX_BANK(rvu, blk_addr); + ndc_af_const = rvu_read64(rvu, blk_addr, NDC_AF_CONST); + max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const); for (bank = 0; bank < max_bank; bank++) { seq_printf(s, "BANK:%d\n", bank); seq_printf(s, "\tHits:\t%lld\n", diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 26e639e57dae..4ad707e758b9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -790,6 +790,7 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, struct nix_aq_res_s *result; int timeout = 1000; u64 reg, head; + int ret; result = (struct nix_aq_res_s *)aq->res->base; @@ -813,9 +814,22 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, return -EBUSY; } - if (result->compcode != NIX_AQ_COMP_GOOD) + if (result->compcode != NIX_AQ_COMP_GOOD) { /* TODO: Replace this with some error code */ + if (result->compcode == NIX_AQ_COMP_CTX_FAULT || + result->compcode == NIX_AQ_COMP_LOCKERR || + result->compcode == NIX_AQ_COMP_CTX_POISON) { + ret = rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_RX); + ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_TX); + ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_RX); + ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_TX); + if (ret) + dev_err(rvu->dev, + "%s: Not able to unlock cachelines\n", __func__); + } + return -EBUSY; + } return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c index 70bd036ed76e..4f5ca5ab13a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c @@ -4,7 +4,7 @@ * Copyright (C) 2018 Marvell. * */ - +#include #include #include @@ -42,9 +42,18 @@ static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, return -EBUSY; } - if (result->compcode != NPA_AQ_COMP_GOOD) + if (result->compcode != NPA_AQ_COMP_GOOD) { /* TODO: Replace this with some error code */ + if (result->compcode == NPA_AQ_COMP_CTX_FAULT || + result->compcode == NPA_AQ_COMP_LOCKERR || + result->compcode == NPA_AQ_COMP_CTX_POISON) { + if (rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NPA0)) + dev_err(rvu->dev, + "%s: Not able to unlock cachelines\n", __func__); + } + return -EBUSY; + } return 0; } @@ -545,3 +554,48 @@ void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf) npa_ctx_free(rvu, pfvf); } + +/* Due to an Hardware errata, in some corner cases, AQ context lock + * operations can result in a NDC way getting into an illegal state + * of not valid but locked. + * + * This API solves the problem by clearing the lock bit of the NDC block. + * The operation needs to be done for each line of all the NDC banks. + */ +int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr) +{ + int bank, max_bank, line, max_line, err; + u64 reg, ndc_af_const; + + /* Set the ENABLE bit(63) to '0' */ + reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL); + rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0)); + + /* Poll until the BUSY bits(47:32) are set to '0' */ + err = rvu_poll_reg(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, GENMASK_ULL(47, 32), true); + if (err) { + dev_err(rvu->dev, "Timed out while polling for NDC CAM busy bits.\n"); + return err; + } + + ndc_af_const = rvu_read64(rvu, blkaddr, NDC_AF_CONST); + max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const); + max_line = FIELD_GET(NDC_AF_BANK_LINE_MASK, ndc_af_const); + for (bank = 0; bank < max_bank; bank++) { + for (line = 0; line < max_line; line++) { + /* Check if 'cache line valid bit(63)' is not set + * but 'cache line lock bit(60)' is set and on + * success, reset the lock bit(60). + */ + reg = rvu_read64(rvu, blkaddr, + NDC_AF_BANKX_LINEX_METADATA(bank, line)); + if (!(reg & BIT_ULL(63)) && (reg & BIT_ULL(60))) { + rvu_write64(rvu, blkaddr, + NDC_AF_BANKX_LINEX_METADATA(bank, line), + reg & ~BIT_ULL(60)); + } + } + } + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 1729b22580ce..7007f0b8e659 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -694,6 +694,7 @@ #define NDC_AF_INTR_ENA_W1S (0x00068) #define NDC_AF_INTR_ENA_W1C (0x00070) #define NDC_AF_ACTIVE_PC (0x00078) +#define NDC_AF_CAMS_RD_INTERVAL (0x00080) #define NDC_AF_BP_TEST_ENABLE (0x001F8) #define NDC_AF_BP_TEST(a) (0x00200 | (a) << 3) #define NDC_AF_BLK_RST (0x002F0) @@ -709,6 +710,8 @@ (0x00F00 | (a) << 5 | (b) << 4) #define NDC_AF_BANKX_HIT_PC(a) (0x01000 | (a) << 3) #define NDC_AF_BANKX_MISS_PC(a) (0x01100 | (a) << 3) +#define NDC_AF_BANKX_LINEX_METADATA(a, b) \ + (0x10000 | (a) << 12 | (b) << 3) /* LBK */ #define LBK_CONST (0x10ull) -- cgit From cdd28833100c18a469c85a1cc3de9f6bbbe6caa0 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Tue, 7 Mar 2023 12:21:03 +0100 Subject: net: microchip: sparx5: fix deletion of existing DSCP mappings Fix deletion of existing DSCP mappings in the APP table. Adding and deleting DSCP entries are replicated per-port, since the mapping table is global for all ports in the chip. Whenever a mapping for a DSCP value already exists, the old mapping is deleted first. However, it is only deleted for the specified port. Fix this by calling sparx5_dcb_ieee_delapp() instead of dcb_ieee_delapp() as it ought to be. Reproduce: // Map and remap DSCP value 63 $ dcb app add dev eth0 dscp-prio 63:1 $ dcb app add dev eth0 dscp-prio 63:2 $ dcb app show dev eth0 dscp-prio dscp-prio 63:2 $ dcb app show dev eth1 dscp-prio dscp-prio 63:1 63:2 <-- 63:1 should not be there Fixes: 8dcf69a64118 ("net: microchip: sparx5: add support for offloading dscp table") Signed-off-by: Daniel Machon Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c b/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c index 871a3e62f852..2d763664dcda 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c @@ -249,6 +249,21 @@ static int sparx5_dcb_ieee_dscp_setdel(struct net_device *dev, return 0; } +static int sparx5_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + int err; + + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = sparx5_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp); + else + err = dcb_ieee_delapp(dev, app); + + if (err < 0) + return err; + + return sparx5_dcb_app_update(dev); +} + static int sparx5_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) { struct dcb_app app_itr; @@ -264,7 +279,7 @@ static int sparx5_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) if (prio) { app_itr = *app; app_itr.priority = prio; - dcb_ieee_delapp(dev, &app_itr); + sparx5_dcb_ieee_delapp(dev, &app_itr); } if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) @@ -281,21 +296,6 @@ out: return err; } -static int sparx5_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app) -{ - int err; - - if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) - err = sparx5_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp); - else - err = dcb_ieee_delapp(dev, app); - - if (err < 0) - return err; - - return sparx5_dcb_app_update(dev); -} - static int sparx5_dcb_setapptrust(struct net_device *dev, u8 *selectors, int nselectors) { -- cgit From c8b8a3c601f2cfad25ab5ce5b04df700048aef6e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Mar 2023 17:54:11 +0200 Subject: net: dsa: mt7530: permit port 5 to work without port 6 on MT7621 SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MT7530 switch from the MT7621 SoC has 2 ports which can be set up as internal: port 5 and 6. Arınç reports that the GMAC1 attached to port 5 receives corrupted frames, unless port 6 (attached to GMAC0) has been brought up by the driver. This is true regardless of whether port 5 is used as a user port or as a CPU port (carrying DSA tags). Offline debugging (blind for me) which began in the linked thread showed experimentally that the configuration done by the driver for port 6 contains a step which is needed by port 5 as well - the write to CORE_GSWPLL_GRP2 (note that I've no idea as to what it does, apart from the comment "Set core clock into 500Mhz"). Prints put by Arınç show that the reset value of CORE_GSWPLL_GRP2 is RG_GSWPLL_POSDIV_500M(1) | RG_GSWPLL_FBKDIV_500M(40) (0x128), both on the MCM MT7530 from the MT7621 SoC, as well as on the standalone MT7530 from MT7623NI Bananapi BPI-R2. Apparently, port 5 on the standalone MT7530 can work under both values of the register, while on the MT7621 SoC it cannot. The call path that triggers the register write is: mt753x_phylink_mac_config() for port 6 -> mt753x_pad_setup() -> mt7530_pad_clk_setup() so this fully explains the behavior noticed by Arınç, that bringing port 6 up is necessary. The simplest fix for the problem is to extract the register writes which are needed for both port 5 and 6 into a common mt7530_pll_setup() function, which is called at mt7530_setup() time, immediately after switch reset. We can argue that this mirrors the code layout introduced in mt7531_setup() by commit 42bc4fafe359 ("net: mt7531: only do PLL once after the reset"), in that the PLL setup has the exact same positioning, and further work to consolidate the separate setup() functions is not hindered. Testing confirms that: - the slight reordering of writes to MT7530_P6ECR and to CORE_GSWPLL_GRP1 / CORE_GSWPLL_GRP2 introduced by this change does not appear to cause problems for the operation of port 6 on MT7621 and on MT7623 (where port 5 also always worked) - packets sent through port 5 are not corrupted anymore, regardless of whether port 6 is enabled by phylink or not (or even present in the device tree) My algorithm for determining the Fixes: tag is as follows. Testing shows that some logic from mt7530_pad_clk_setup() is needed even for port 5. Prior to commit ca366d6c889b ("net: dsa: mt7530: Convert to PHYLINK API"), a call did exist for all phy_is_pseudo_fixed_link() ports - so port 5 included. That commit replaced it with a temporary "Port 5 is not supported!" comment, and the following commit 38f790a80560 ("net: dsa: mt7530: Add support for port 5") replaced that comment with a configuration procedure in mt7530_setup_port5() which was insufficient for port 5 to work. I'm laying the blame on the patch that claimed support for port 5, although one would have also needed the change from commit c3b8e07909db ("net: dsa: mt7530: setup core clock even in TRGMII mode") for the write to be performed completely independently from port 6's configuration. Thanks go to Arınç for describing the problem, for debugging and for testing. Reported-by: Arınç ÜNAL Link: https://lore.kernel.org/netdev/f297c2c4-6e7c-57ac-2394-f6025d309b9d@arinc9.com/ Fixes: 38f790a80560 ("net: dsa: mt7530: Add support for port 5") Signed-off-by: Vladimir Oltean Tested-by: Arınç ÜNAL Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230307155411.868573-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 3a15015bc409..a508402c4ecb 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -393,6 +393,24 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, mt7530_write(priv, MT7530_ATA1 + (i * 4), reg[i]); } +/* Set up switch core clock for MT7530 */ +static void mt7530_pll_setup(struct mt7530_priv *priv) +{ + /* Disable PLL */ + core_write(priv, CORE_GSWPLL_GRP1, 0); + + /* Set core clock into 500Mhz */ + core_write(priv, CORE_GSWPLL_GRP2, + RG_GSWPLL_POSDIV_500M(1) | + RG_GSWPLL_FBKDIV_500M(25)); + + /* Enable PLL */ + core_write(priv, CORE_GSWPLL_GRP1, + RG_GSWPLL_EN_PRE | + RG_GSWPLL_POSDIV_200M(2) | + RG_GSWPLL_FBKDIV_200M(32)); +} + /* Setup TX circuit including relevant PAD and driving */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) @@ -453,21 +471,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN | REG_TRGMIICK_EN); - /* Setup core clock for MT7530 */ - /* Disable PLL */ - core_write(priv, CORE_GSWPLL_GRP1, 0); - - /* Set core clock into 500Mhz */ - core_write(priv, CORE_GSWPLL_GRP2, - RG_GSWPLL_POSDIV_500M(1) | - RG_GSWPLL_FBKDIV_500M(25)); - - /* Enable PLL */ - core_write(priv, CORE_GSWPLL_GRP1, - RG_GSWPLL_EN_PRE | - RG_GSWPLL_POSDIV_200M(2) | - RG_GSWPLL_FBKDIV_200M(32)); - /* Setup the MT7530 TRGMII Tx Clock */ core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0)); @@ -2196,6 +2199,8 @@ mt7530_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); + mt7530_pll_setup(priv); + /* Enable Port 6 only; P5 as GMAC5 which currently is not supported */ val = mt7530_read(priv, MT7530_MHWTRAP); val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS; -- cgit From 8f14820801042c221bb9fe51643a2585cac5dec2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Mar 2023 09:19:30 -0800 Subject: eth: fealnx: bring back this old driver This reverts commit d5e2d038dbece821f1af57acbeded3aa9a1832c1. We have a report of this chip being used on a SURECOM EP-320X-S 100/10M Ethernet PCI Adapter which could still have been purchased in some parts of the world 3 years ago. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217151 Fixes: d5e2d038dbec ("eth: fealnx: delete the driver for Myson MTD-800") Link: https://lore.kernel.org/r/20230307171930.4008454-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- arch/mips/configs/mtx1_defconfig | 1 + arch/powerpc/configs/ppc6xx_defconfig | 1 + drivers/net/ethernet/Kconfig | 10 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/fealnx.c | 1953 +++++++++++++++++++++++++++++++++ 5 files changed, 1966 insertions(+) create mode 100644 drivers/net/ethernet/fealnx.c diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 89a1511d2ee4..edf9634aa8ee 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -284,6 +284,7 @@ CONFIG_IXGB=m CONFIG_SKGE=m CONFIG_SKY2=m CONFIG_MYRI10GE=m +CONFIG_FEALNX=m CONFIG_NATSEMI=m CONFIG_NS83820=m CONFIG_S2IO=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 110258277959..f73c98be56c8 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -461,6 +461,7 @@ CONFIG_MV643XX_ETH=m CONFIG_SKGE=m CONFIG_SKY2=m CONFIG_MYRI10GE=m +CONFIG_FEALNX=m CONFIG_NATSEMI=m CONFIG_NS83820=m CONFIG_PCMCIA_AXNET=m diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 323ec56e8a74..1917da784191 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -132,6 +132,16 @@ source "drivers/net/ethernet/mscc/Kconfig" source "drivers/net/ethernet/microsoft/Kconfig" source "drivers/net/ethernet/moxa/Kconfig" source "drivers/net/ethernet/myricom/Kconfig" + +config FEALNX + tristate "Myson MTD-8xx PCI Ethernet support" + depends on PCI + select CRC32 + select MII + help + Say Y here to support the Myson MTD-800 family of PCI-based Ethernet + cards. + source "drivers/net/ethernet/ni/Kconfig" source "drivers/net/ethernet/natsemi/Kconfig" source "drivers/net/ethernet/neterion/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 2fedbaa545eb..0d872d4efcd1 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/ obj-$(CONFIG_NET_VENDOR_MICROSEMI) += mscc/ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ +obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/ obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/ diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c new file mode 100644 index 000000000000..ed18450fd2cc --- /dev/null +++ b/drivers/net/ethernet/fealnx.c @@ -0,0 +1,1953 @@ +/* + Written 1998-2000 by Donald Becker. + + This software may be used and distributed according to the terms of + the GNU General Public License (GPL), incorporated herein by reference. + Drivers based on or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. This file is not + a complete program and may only be used when the entire operating + system is licensed under the GPL. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Support information and updates available at + http://www.scyld.com/network/pci-skeleton.html + + Linux kernel updates: + + Version 2.51, Nov 17, 2001 (jgarzik): + - Add ethtool support + - Replace some MII-related magic numbers with constants + +*/ + +#define DRV_NAME "fealnx" + +static int debug; /* 1-> print debug message */ +static int max_interrupt_work = 20; + +/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). */ +static int multicast_filter_limit = 32; + +/* Set the copy breakpoint for the copy-only-tiny-frames scheme. */ +/* Setting to > 1518 effectively disables this feature. */ +static int rx_copybreak; + +/* Used to pass the media type, etc. */ +/* Both 'options[]' and 'full_duplex[]' should exist for driver */ +/* interoperability. */ +/* The media type is usually passed in 'options[]'. */ +#define MAX_UNITS 8 /* More are supported, limit only on options */ +static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +static int full_duplex[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; + +/* Operational parameters that are set at compile time. */ +/* Keep the ring sizes a power of two for compile efficiency. */ +/* The compiler will convert '%'<2^N> into a bit mask. */ +/* Making the Tx ring too large decreases the effectiveness of channel */ +/* bonding and packet priority. */ +/* There are no ill effects from too-large receive rings. */ +// 88-12-9 modify, +// #define TX_RING_SIZE 16 +// #define RX_RING_SIZE 32 +#define TX_RING_SIZE 6 +#define RX_RING_SIZE 12 +#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct fealnx_desc) +#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct fealnx_desc) + +/* Operational parameters that usually are not changed. */ +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (2*HZ) + +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer. */ + + +/* Include files, designed to support most kernel versions 2.0.0 and later. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* Processor type for cache alignment. */ +#include +#include +#include + +/* This driver was written to use PCI memory space, however some x86 systems + work only with I/O space accesses. */ +#ifndef __alpha__ +#define USE_IO_OPS +#endif + +/* Kernel compatibility defines, some common to David Hinds' PCMCIA package. */ +/* This is only in the support-all-kernels source code. */ + +#define RUN_AT(x) (jiffies + (x)) + +MODULE_AUTHOR("Myson or whoever"); +MODULE_DESCRIPTION("Myson MTD-8xx 100/10M Ethernet PCI Adapter Driver"); +MODULE_LICENSE("GPL"); +module_param(max_interrupt_work, int, 0); +module_param(debug, int, 0); +module_param(rx_copybreak, int, 0); +module_param(multicast_filter_limit, int, 0); +module_param_array(options, int, NULL, 0); +module_param_array(full_duplex, int, NULL, 0); +MODULE_PARM_DESC(max_interrupt_work, "fealnx maximum events handled per interrupt"); +MODULE_PARM_DESC(debug, "fealnx enable debugging (0-1)"); +MODULE_PARM_DESC(rx_copybreak, "fealnx copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(multicast_filter_limit, "fealnx maximum number of filtered multicast addresses"); +MODULE_PARM_DESC(options, "fealnx: Bits 0-3: media type, bit 17: full duplex"); +MODULE_PARM_DESC(full_duplex, "fealnx full duplex setting(s) (1)"); + +enum { + MIN_REGION_SIZE = 136, +}; + +/* A chip capabilities table, matching the entries in pci_tbl[] above. */ +enum chip_capability_flags { + HAS_MII_XCVR, + HAS_CHIP_XCVR, +}; + +/* 89/6/13 add, */ +/* for different PHY */ +enum phy_type_flags { + MysonPHY = 1, + AhdocPHY = 2, + SeeqPHY = 3, + MarvellPHY = 4, + Myson981 = 5, + LevelOnePHY = 6, + OtherPHY = 10, +}; + +struct chip_info { + char *chip_name; + int flags; +}; + +static const struct chip_info skel_netdrv_tbl[] = { + { "100/10M Ethernet PCI Adapter", HAS_MII_XCVR }, + { "100/10M Ethernet PCI Adapter", HAS_CHIP_XCVR }, + { "1000/100/10M Ethernet PCI Adapter", HAS_MII_XCVR }, +}; + +/* Offsets to the Command and Status Registers. */ +enum fealnx_offsets { + PAR0 = 0x0, /* physical address 0-3 */ + PAR1 = 0x04, /* physical address 4-5 */ + MAR0 = 0x08, /* multicast address 0-3 */ + MAR1 = 0x0C, /* multicast address 4-7 */ + FAR0 = 0x10, /* flow-control address 0-3 */ + FAR1 = 0x14, /* flow-control address 4-5 */ + TCRRCR = 0x18, /* receive & transmit configuration */ + BCR = 0x1C, /* bus command */ + TXPDR = 0x20, /* transmit polling demand */ + RXPDR = 0x24, /* receive polling demand */ + RXCWP = 0x28, /* receive current word pointer */ + TXLBA = 0x2C, /* transmit list base address */ + RXLBA = 0x30, /* receive list base address */ + ISR = 0x34, /* interrupt status */ + IMR = 0x38, /* interrupt mask */ + FTH = 0x3C, /* flow control high/low threshold */ + MANAGEMENT = 0x40, /* bootrom/eeprom and mii management */ + TALLY = 0x44, /* tally counters for crc and mpa */ + TSR = 0x48, /* tally counter for transmit status */ + BMCRSR = 0x4c, /* basic mode control and status */ + PHYIDENTIFIER = 0x50, /* phy identifier */ + ANARANLPAR = 0x54, /* auto-negotiation advertisement and link + partner ability */ + ANEROCR = 0x58, /* auto-negotiation expansion and pci conf. */ + BPREMRPSR = 0x5c, /* bypass & receive error mask and phy status */ +}; + +/* Bits in the interrupt status/enable registers. */ +/* The bits in the Intr Status/Enable registers, mostly interrupt sources. */ +enum intr_status_bits { + RFCON = 0x00020000, /* receive flow control xon packet */ + RFCOFF = 0x00010000, /* receive flow control xoff packet */ + LSCStatus = 0x00008000, /* link status change */ + ANCStatus = 0x00004000, /* autonegotiation completed */ + FBE = 0x00002000, /* fatal bus error */ + FBEMask = 0x00001800, /* mask bit12-11 */ + ParityErr = 0x00000000, /* parity error */ + TargetErr = 0x00001000, /* target abort */ + MasterErr = 0x00000800, /* master error */ + TUNF = 0x00000400, /* transmit underflow */ + ROVF = 0x00000200, /* receive overflow */ + ETI = 0x00000100, /* transmit early int */ + ERI = 0x00000080, /* receive early int */ + CNTOVF = 0x00000040, /* counter overflow */ + RBU = 0x00000020, /* receive buffer unavailable */ + TBU = 0x00000010, /* transmit buffer unavilable */ + TI = 0x00000008, /* transmit interrupt */ + RI = 0x00000004, /* receive interrupt */ + RxErr = 0x00000002, /* receive error */ +}; + +/* Bits in the NetworkConfig register, W for writing, R for reading */ +/* FIXME: some names are invented by me. Marked with (name?) */ +/* If you have docs and know bit names, please fix 'em */ +enum rx_mode_bits { + CR_W_ENH = 0x02000000, /* enhanced mode (name?) */ + CR_W_FD = 0x00100000, /* full duplex */ + CR_W_PS10 = 0x00080000, /* 10 mbit */ + CR_W_TXEN = 0x00040000, /* tx enable (name?) */ + CR_W_PS1000 = 0x00010000, /* 1000 mbit */ + /* CR_W_RXBURSTMASK= 0x00000e00, Im unsure about this */ + CR_W_RXMODEMASK = 0x000000e0, + CR_W_PROM = 0x00000080, /* promiscuous mode */ + CR_W_AB = 0x00000040, /* accept broadcast */ + CR_W_AM = 0x00000020, /* accept mutlicast */ + CR_W_ARP = 0x00000008, /* receive runt pkt */ + CR_W_ALP = 0x00000004, /* receive long pkt */ + CR_W_SEP = 0x00000002, /* receive error pkt */ + CR_W_RXEN = 0x00000001, /* rx enable (unicast?) (name?) */ + + CR_R_TXSTOP = 0x04000000, /* tx stopped (name?) */ + CR_R_FD = 0x00100000, /* full duplex detected */ + CR_R_PS10 = 0x00080000, /* 10 mbit detected */ + CR_R_RXSTOP = 0x00008000, /* rx stopped (name?) */ +}; + +/* The Tulip Rx and Tx buffer descriptors. */ +struct fealnx_desc { + s32 status; + s32 control; + u32 buffer; + u32 next_desc; + struct fealnx_desc *next_desc_logical; + struct sk_buff *skbuff; + u32 reserved1; + u32 reserved2; +}; + +/* Bits in network_desc.status */ +enum rx_desc_status_bits { + RXOWN = 0x80000000, /* own bit */ + FLNGMASK = 0x0fff0000, /* frame length */ + FLNGShift = 16, + MARSTATUS = 0x00004000, /* multicast address received */ + BARSTATUS = 0x00002000, /* broadcast address received */ + PHYSTATUS = 0x00001000, /* physical address received */ + RXFSD = 0x00000800, /* first descriptor */ + RXLSD = 0x00000400, /* last descriptor */ + ErrorSummary = 0x80, /* error summary */ + RUNTPKT = 0x40, /* runt packet received */ + LONGPKT = 0x20, /* long packet received */ + FAE = 0x10, /* frame align error */ + CRC = 0x08, /* crc error */ + RXER = 0x04, /* receive error */ +}; + +enum rx_desc_control_bits { + RXIC = 0x00800000, /* interrupt control */ + RBSShift = 0, +}; + +enum tx_desc_status_bits { + TXOWN = 0x80000000, /* own bit */ + JABTO = 0x00004000, /* jabber timeout */ + CSL = 0x00002000, /* carrier sense lost */ + LC = 0x00001000, /* late collision */ + EC = 0x00000800, /* excessive collision */ + UDF = 0x00000400, /* fifo underflow */ + DFR = 0x00000200, /* deferred */ + HF = 0x00000100, /* heartbeat fail */ + NCRMask = 0x000000ff, /* collision retry count */ + NCRShift = 0, +}; + +enum tx_desc_control_bits { + TXIC = 0x80000000, /* interrupt control */ + ETIControl = 0x40000000, /* early transmit interrupt */ + TXLD = 0x20000000, /* last descriptor */ + TXFD = 0x10000000, /* first descriptor */ + CRCEnable = 0x08000000, /* crc control */ + PADEnable = 0x04000000, /* padding control */ + RetryTxLC = 0x02000000, /* retry late collision */ + PKTSMask = 0x3ff800, /* packet size bit21-11 */ + PKTSShift = 11, + TBSMask = 0x000007ff, /* transmit buffer bit 10-0 */ + TBSShift = 0, +}; + +/* BootROM/EEPROM/MII Management Register */ +#define MASK_MIIR_MII_READ 0x00000000 +#define MASK_MIIR_MII_WRITE 0x00000008 +#define MASK_MIIR_MII_MDO 0x00000004 +#define MASK_MIIR_MII_MDI 0x00000002 +#define MASK_MIIR_MII_MDC 0x00000001 + +/* ST+OP+PHYAD+REGAD+TA */ +#define OP_READ 0x6000 /* ST:01+OP:10+PHYAD+REGAD+TA:Z0 */ +#define OP_WRITE 0x5002 /* ST:01+OP:01+PHYAD+REGAD+TA:10 */ + +/* ------------------------------------------------------------------------- */ +/* Constants for Myson PHY */ +/* ------------------------------------------------------------------------- */ +#define MysonPHYID 0xd0000302 +/* 89-7-27 add, (begin) */ +#define MysonPHYID0 0x0302 +#define StatusRegister 18 +#define SPEED100 0x0400 // bit10 +#define FULLMODE 0x0800 // bit11 +/* 89-7-27 add, (end) */ + +/* ------------------------------------------------------------------------- */ +/* Constants for Seeq 80225 PHY */ +/* ------------------------------------------------------------------------- */ +#define SeeqPHYID0 0x0016 + +#define MIIRegister18 18 +#define SPD_DET_100 0x80 +#define DPLX_DET_FULL 0x40 + +/* ------------------------------------------------------------------------- */ +/* Constants for Ahdoc 101 PHY */ +/* ------------------------------------------------------------------------- */ +#define AhdocPHYID0 0x0022 + +#define DiagnosticReg 18 +#define DPLX_FULL 0x0800 +#define Speed_100 0x0400 + +/* 89/6/13 add, */ +/* -------------------------------------------------------------------------- */ +/* Constants */ +/* -------------------------------------------------------------------------- */ +#define MarvellPHYID0 0x0141 +#define LevelOnePHYID0 0x0013 + +#define MII1000BaseTControlReg 9 +#define MII1000BaseTStatusReg 10 +#define SpecificReg 17 + +/* for 1000BaseT Control Register */ +#define PHYAbletoPerform1000FullDuplex 0x0200 +#define PHYAbletoPerform1000HalfDuplex 0x0100 +#define PHY1000AbilityMask 0x300 + +// for phy specific status register, marvell phy. +#define SpeedMask 0x0c000 +#define Speed_1000M 0x08000 +#define Speed_100M 0x4000 +#define Speed_10M 0 +#define Full_Duplex 0x2000 + +// 89/12/29 add, for phy specific status register, levelone phy, (begin) +#define LXT1000_100M 0x08000 +#define LXT1000_1000M 0x0c000 +#define LXT1000_Full 0x200 +// 89/12/29 add, for phy specific status register, levelone phy, (end) + +/* for 3-in-1 case, BMCRSR register */ +#define LinkIsUp2 0x00040000 + +/* for PHY */ +#define LinkIsUp 0x0004 + + +struct netdev_private { + /* Descriptor rings first for alignment. */ + struct fealnx_desc *rx_ring; + struct fealnx_desc *tx_ring; + + dma_addr_t rx_ring_dma; + dma_addr_t tx_ring_dma; + + spinlock_t lock; + + /* Media monitoring timer. */ + struct timer_list timer; + + /* Reset timer */ + struct timer_list reset_timer; + int reset_timer_armed; + unsigned long crvalue_sv; + unsigned long imrvalue_sv; + + /* Frequently used values: keep some adjacent for cache effect. */ + int flags; + struct pci_dev *pci_dev; + unsigned long crvalue; + unsigned long bcrvalue; + unsigned long imrvalue; + struct fealnx_desc *cur_rx; + struct fealnx_desc *lack_rxbuf; + int really_rx_count; + struct fealnx_desc *cur_tx; + struct fealnx_desc *cur_tx_copy; + int really_tx_count; + int free_tx_count; + unsigned int rx_buf_sz; /* Based on MTU+slack. */ + + /* These values are keep track of the transceiver/media in use. */ + unsigned int linkok; + unsigned int line_speed; + unsigned int duplexmode; + unsigned int default_port:4; /* Last dev->if_port value. */ + unsigned int PHYType; + + /* MII transceiver section. */ + int mii_cnt; /* MII device addresses. */ + unsigned char phys[2]; /* MII device addresses. */ + struct mii_if_info mii; + void __iomem *mem; +}; + + +static int mdio_read(struct net_device *dev, int phy_id, int location); +static void mdio_write(struct net_device *dev, int phy_id, int location, int value); +static int netdev_open(struct net_device *dev); +static void getlinktype(struct net_device *dev); +static void getlinkstatus(struct net_device *dev); +static void netdev_timer(struct timer_list *t); +static void reset_timer(struct timer_list *t); +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue); +static void init_ring(struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); +static irqreturn_t intr_handler(int irq, void *dev_instance); +static int netdev_rx(struct net_device *dev); +static void set_rx_mode(struct net_device *dev); +static void __set_rx_mode(struct net_device *dev); +static struct net_device_stats *get_stats(struct net_device *dev); +static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static const struct ethtool_ops netdev_ethtool_ops; +static int netdev_close(struct net_device *dev); +static void reset_rx_descriptors(struct net_device *dev); +static void reset_tx_descriptors(struct net_device *dev); + +static void stop_nic_rx(void __iomem *ioaddr, long crvalue) +{ + int delay = 0x1000; + iowrite32(crvalue & ~(CR_W_RXEN), ioaddr + TCRRCR); + while (--delay) { + if ( (ioread32(ioaddr + TCRRCR) & CR_R_RXSTOP) == CR_R_RXSTOP) + break; + } +} + + +static void stop_nic_rxtx(void __iomem *ioaddr, long crvalue) +{ + int delay = 0x1000; + iowrite32(crvalue & ~(CR_W_RXEN+CR_W_TXEN), ioaddr + TCRRCR); + while (--delay) { + if ( (ioread32(ioaddr + TCRRCR) & (CR_R_RXSTOP+CR_R_TXSTOP)) + == (CR_R_RXSTOP+CR_R_TXSTOP) ) + break; + } +} + +static const struct net_device_ops netdev_ops = { + .ndo_open = netdev_open, + .ndo_stop = netdev_close, + .ndo_start_xmit = start_tx, + .ndo_get_stats = get_stats, + .ndo_set_rx_mode = set_rx_mode, + .ndo_eth_ioctl = mii_ioctl, + .ndo_tx_timeout = fealnx_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +static int fealnx_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct netdev_private *np; + int i, option, err, irq; + static int card_idx = -1; + char boardname[12]; + void __iomem *ioaddr; + unsigned long len; + unsigned int chip_id = ent->driver_data; + struct net_device *dev; + void *ring_space; + dma_addr_t ring_dma; + u8 addr[ETH_ALEN]; +#ifdef USE_IO_OPS + int bar = 0; +#else + int bar = 1; +#endif + + card_idx++; + sprintf(boardname, "fealnx%d", card_idx); + + option = card_idx < MAX_UNITS ? options[card_idx] : 0; + + i = pci_enable_device(pdev); + if (i) return i; + pci_set_master(pdev); + + len = pci_resource_len(pdev, bar); + if (len < MIN_REGION_SIZE) { + dev_err(&pdev->dev, + "region size %ld too small, aborting\n", len); + return -ENODEV; + } + + i = pci_request_regions(pdev, boardname); + if (i) + return i; + + irq = pdev->irq; + + ioaddr = pci_iomap(pdev, bar, len); + if (!ioaddr) { + err = -ENOMEM; + goto err_out_res; + } + + dev = alloc_etherdev(sizeof(struct netdev_private)); + if (!dev) { + err = -ENOMEM; + goto err_out_unmap; + } + SET_NETDEV_DEV(dev, &pdev->dev); + + /* read ethernet id */ + for (i = 0; i < 6; ++i) + addr[i] = ioread8(ioaddr + PAR0 + i); + eth_hw_addr_set(dev, addr); + + /* Reset the chip to erase previous misconfiguration. */ + iowrite32(0x00000001, ioaddr + BCR); + + /* Make certain the descriptor lists are aligned. */ + np = netdev_priv(dev); + np->mem = ioaddr; + spin_lock_init(&np->lock); + np->pci_dev = pdev; + np->flags = skel_netdrv_tbl[chip_id].flags; + pci_set_drvdata(pdev, dev); + np->mii.dev = dev; + np->mii.mdio_read = mdio_read; + np->mii.mdio_write = mdio_write; + np->mii.phy_id_mask = 0x1f; + np->mii.reg_num_mask = 0x1f; + + ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, &ring_dma, + GFP_KERNEL); + if (!ring_space) { + err = -ENOMEM; + goto err_out_free_dev; + } + np->rx_ring = ring_space; + np->rx_ring_dma = ring_dma; + + ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, &ring_dma, + GFP_KERNEL); + if (!ring_space) { + err = -ENOMEM; + goto err_out_free_rx; + } + np->tx_ring = ring_space; + np->tx_ring_dma = ring_dma; + + /* find the connected MII xcvrs */ + if (np->flags == HAS_MII_XCVR) { + int phy, phy_idx = 0; + + for (phy = 1; phy < 32 && phy_idx < ARRAY_SIZE(np->phys); + phy++) { + int mii_status = mdio_read(dev, phy, 1); + + if (mii_status != 0xffff && mii_status != 0x0000) { + np->phys[phy_idx++] = phy; + dev_info(&pdev->dev, + "MII PHY found at address %d, status " + "0x%4.4x.\n", phy, mii_status); + /* get phy type */ + { + unsigned int data; + + data = mdio_read(dev, np->phys[0], 2); + if (data == SeeqPHYID0) + np->PHYType = SeeqPHY; + else if (data == AhdocPHYID0) + np->PHYType = AhdocPHY; + else if (data == MarvellPHYID0) + np->PHYType = MarvellPHY; + else if (data == MysonPHYID0) + np->PHYType = Myson981; + else if (data == LevelOnePHYID0) + np->PHYType = LevelOnePHY; + else + np->PHYType = OtherPHY; + } + } + } + + np->mii_cnt = phy_idx; + if (phy_idx == 0) + dev_warn(&pdev->dev, + "MII PHY not found -- this device may " + "not operate correctly.\n"); + } else { + np->phys[0] = 32; +/* 89/6/23 add, (begin) */ + /* get phy type */ + if (ioread32(ioaddr + PHYIDENTIFIER) == MysonPHYID) + np->PHYType = MysonPHY; + else + np->PHYType = OtherPHY; + } + np->mii.phy_id = np->phys[0]; + + if (dev->mem_start) + option = dev->mem_start; + + /* The lower four bits are the media type. */ + if (option > 0) { + if (option & 0x200) + np->mii.full_duplex = 1; + np->default_port = option & 15; + } + + if (card_idx < MAX_UNITS && full_duplex[card_idx] > 0) + np->mii.full_duplex = full_duplex[card_idx]; + + if (np->mii.full_duplex) { + dev_info(&pdev->dev, "Media type forced to Full Duplex.\n"); +/* 89/6/13 add, (begin) */ +// if (np->PHYType==MarvellPHY) + if ((np->PHYType == MarvellPHY) || (np->PHYType == LevelOnePHY)) { + unsigned int data; + + data = mdio_read(dev, np->phys[0], 9); + data = (data & 0xfcff) | 0x0200; + mdio_write(dev, np->phys[0], 9, data); + } +/* 89/6/13 add, (end) */ + if (np->flags == HAS_MII_XCVR) + mdio_write(dev, np->phys[0], MII_ADVERTISE, ADVERTISE_FULL); + else + iowrite32(ADVERTISE_FULL, ioaddr + ANARANLPAR); + np->mii.force_media = 1; + } + + dev->netdev_ops = &netdev_ops; + dev->ethtool_ops = &netdev_ethtool_ops; + dev->watchdog_timeo = TX_TIMEOUT; + + err = register_netdev(dev); + if (err) + goto err_out_free_tx; + + printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n", + dev->name, skel_netdrv_tbl[chip_id].chip_name, ioaddr, + dev->dev_addr, irq); + + return 0; + +err_out_free_tx: + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring, + np->tx_ring_dma); +err_out_free_rx: + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring, + np->rx_ring_dma); +err_out_free_dev: + free_netdev(dev); +err_out_unmap: + pci_iounmap(pdev, ioaddr); +err_out_res: + pci_release_regions(pdev); + return err; +} + + +static void fealnx_remove_one(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev) { + struct netdev_private *np = netdev_priv(dev); + + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring, + np->tx_ring_dma); + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring, + np->rx_ring_dma); + unregister_netdev(dev); + pci_iounmap(pdev, np->mem); + free_netdev(dev); + pci_release_regions(pdev); + } else + printk(KERN_ERR "fealnx: remove for unknown device\n"); +} + + +static ulong m80x_send_cmd_to_phy(void __iomem *miiport, int opcode, int phyad, int regad) +{ + ulong miir; + int i; + unsigned int mask, data; + + /* enable MII output */ + miir = (ulong) ioread32(miiport); + miir &= 0xfffffff0; + + miir |= MASK_MIIR_MII_WRITE + MASK_MIIR_MII_MDO; + + /* send 32 1's preamble */ + for (i = 0; i < 32; i++) { + /* low MDC; MDO is already high (miir) */ + miir &= ~MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + + /* high MDC */ + miir |= MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + } + + /* calculate ST+OP+PHYAD+REGAD+TA */ + data = opcode | (phyad << 7) | (regad << 2); + + /* sent out */ + mask = 0x8000; + while (mask) { + /* low MDC, prepare MDO */ + miir &= ~(MASK_MIIR_MII_MDC + MASK_MIIR_MII_MDO); + if (mask & data) + miir |= MASK_MIIR_MII_MDO; + + iowrite32(miir, miiport); + /* high MDC */ + miir |= MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + udelay(30); + + /* next */ + mask >>= 1; + if (mask == 0x2 && opcode == OP_READ) + miir &= ~MASK_MIIR_MII_WRITE; + } + return miir; +} + + +static int mdio_read(struct net_device *dev, int phyad, int regad) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *miiport = np->mem + MANAGEMENT; + ulong miir; + unsigned int mask, data; + + miir = m80x_send_cmd_to_phy(miiport, OP_READ, phyad, regad); + + /* read data */ + mask = 0x8000; + data = 0; + while (mask) { + /* low MDC */ + miir &= ~MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + + /* read MDI */ + miir = ioread32(miiport); + if (miir & MASK_MIIR_MII_MDI) + data |= mask; + + /* high MDC, and wait */ + miir |= MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + udelay(30); + + /* next */ + mask >>= 1; + } + + /* low MDC */ + miir &= ~MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + + return data & 0xffff; +} + + +static void mdio_write(struct net_device *dev, int phyad, int regad, int data) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *miiport = np->mem + MANAGEMENT; + ulong miir; + unsigned int mask; + + miir = m80x_send_cmd_to_phy(miiport, OP_WRITE, phyad, regad); + + /* write data */ + mask = 0x8000; + while (mask) { + /* low MDC, prepare MDO */ + miir &= ~(MASK_MIIR_MII_MDC + MASK_MIIR_MII_MDO); + if (mask & data) + miir |= MASK_MIIR_MII_MDO; + iowrite32(miir, miiport); + + /* high MDC */ + miir |= MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); + + /* next */ + mask >>= 1; + } + + /* low MDC */ + miir &= ~MASK_MIIR_MII_MDC; + iowrite32(miir, miiport); +} + + +static int netdev_open(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + const int irq = np->pci_dev->irq; + int rc, i; + + iowrite32(0x00000001, ioaddr + BCR); /* Reset */ + + rc = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev); + if (rc) + return -EAGAIN; + + for (i = 0; i < 3; i++) + iowrite16(((const unsigned short *)dev->dev_addr)[i], + ioaddr + PAR0 + i*2); + + init_ring(dev); + + iowrite32(np->rx_ring_dma, ioaddr + RXLBA); + iowrite32(np->tx_ring_dma, ioaddr + TXLBA); + + /* Initialize other registers. */ + /* Configure the PCI bus bursts and FIFO thresholds. + 486: Set 8 longword burst. + 586: no burst limit. + Burst length 5:3 + 0 0 0 1 + 0 0 1 4 + 0 1 0 8 + 0 1 1 16 + 1 0 0 32 + 1 0 1 64 + 1 1 0 128 + 1 1 1 256 + Wait the specified 50 PCI cycles after a reset by initializing + Tx and Rx queues and the address filter list. + FIXME (Ueimor): optimistic for alpha + posted writes ? */ + + np->bcrvalue = 0x10; /* little-endian, 8 burst length */ +#ifdef __BIG_ENDIAN + np->bcrvalue |= 0x04; /* big-endian */ +#endif + +#if defined(__i386__) && !defined(MODULE) && !defined(CONFIG_UML) + if (boot_cpu_data.x86 <= 4) + np->crvalue = 0xa00; + else +#endif + np->crvalue = 0xe00; /* rx 128 burst length */ + + +// 89/12/29 add, +// 90/1/16 modify, +// np->imrvalue=FBE|TUNF|CNTOVF|RBU|TI|RI; + np->imrvalue = TUNF | CNTOVF | RBU | TI | RI; + if (np->pci_dev->device == 0x891) { + np->bcrvalue |= 0x200; /* set PROG bit */ + np->crvalue |= CR_W_ENH; /* set enhanced bit */ + np->imrvalue |= ETI; + } + iowrite32(np->bcrvalue, ioaddr + BCR); + + if (dev->if_port == 0) + dev->if_port = np->default_port; + + iowrite32(0, ioaddr + RXPDR); +// 89/9/1 modify, +// np->crvalue = 0x00e40001; /* tx store and forward, tx/rx enable */ + np->crvalue |= 0x00e40001; /* tx store and forward, tx/rx enable */ + np->mii.full_duplex = np->mii.force_media; + getlinkstatus(dev); + if (np->linkok) + getlinktype(dev); + __set_rx_mode(dev); + + netif_start_queue(dev); + + /* Clear and Enable interrupts by setting the interrupt mask. */ + iowrite32(FBE | TUNF | CNTOVF | RBU | TI | RI, ioaddr + ISR); + iowrite32(np->imrvalue, ioaddr + IMR); + + if (debug) + printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); + + /* Set the timer to check for link beat. */ + timer_setup(&np->timer, netdev_timer, 0); + np->timer.expires = RUN_AT(3 * HZ); + + /* timer handler */ + add_timer(&np->timer); + + timer_setup(&np->reset_timer, reset_timer, 0); + np->reset_timer_armed = 0; + return rc; +} + + +static void getlinkstatus(struct net_device *dev) +/* function: Routine will read MII Status Register to get link status. */ +/* input : dev... pointer to the adapter block. */ +/* output : none. */ +{ + struct netdev_private *np = netdev_priv(dev); + unsigned int i, DelayTime = 0x1000; + + np->linkok = 0; + + if (np->PHYType == MysonPHY) { + for (i = 0; i < DelayTime; ++i) { + if (ioread32(np->mem + BMCRSR) & LinkIsUp2) { + np->linkok = 1; + return; + } + udelay(100); + } + } else { + for (i = 0; i < DelayTime; ++i) { + if (mdio_read(dev, np->phys[0], MII_BMSR) & BMSR_LSTATUS) { + np->linkok = 1; + return; + } + udelay(100); + } + } +} + + +static void getlinktype(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + + if (np->PHYType == MysonPHY) { /* 3-in-1 case */ + if (ioread32(np->mem + TCRRCR) & CR_R_FD) + np->duplexmode = 2; /* full duplex */ + else + np->duplexmode = 1; /* half duplex */ + if (ioread32(np->mem + TCRRCR) & CR_R_PS10) + np->line_speed = 1; /* 10M */ + else + np->line_speed = 2; /* 100M */ + } else { + if (np->PHYType == SeeqPHY) { /* this PHY is SEEQ 80225 */ + unsigned int data; + + data = mdio_read(dev, np->phys[0], MIIRegister18); + if (data & SPD_DET_100) + np->line_speed = 2; /* 100M */ + else + np->line_speed = 1; /* 10M */ + if (data & DPLX_DET_FULL) + np->duplexmode = 2; /* full duplex mode */ + else + np->duplexmode = 1; /* half duplex mode */ + } else if (np->PHYType == AhdocPHY) { + unsigned int data; + + data = mdio_read(dev, np->phys[0], DiagnosticReg); + if (data & Speed_100) + np->line_speed = 2; /* 100M */ + else + np->line_speed = 1; /* 10M */ + if (data & DPLX_FULL) + np->duplexmode = 2; /* full duplex mode */ + else + np->duplexmode = 1; /* half duplex mode */ + } +/* 89/6/13 add, (begin) */ + else if (np->PHYType == MarvellPHY) { + unsigned int data; + + data = mdio_read(dev, np->phys[0], SpecificReg); + if (data & Full_Duplex) + np->duplexmode = 2; /* full duplex mode */ + else + np->duplexmode = 1; /* half duplex mode */ + data &= SpeedMask; + if (data == Speed_1000M) + np->line_speed = 3; /* 1000M */ + else if (data == Speed_100M) + np->line_speed = 2; /* 100M */ + else + np->line_speed = 1; /* 10M */ + } +/* 89/6/13 add, (end) */ +/* 89/7/27 add, (begin) */ + else if (np->PHYType == Myson981) { + unsigned int data; + + data = mdio_read(dev, np->phys[0], StatusRegister); + + if (data & SPEED100) + np->line_speed = 2; + else + np->line_speed = 1; + + if (data & FULLMODE) + np->duplexmode = 2; + else + np->duplexmode = 1; + } +/* 89/7/27 add, (end) */ +/* 89/12/29 add */ + else if (np->PHYType == LevelOnePHY) { + unsigned int data; + + data = mdio_read(dev, np->phys[0], SpecificReg); + if (data & LXT1000_Full) + np->duplexmode = 2; /* full duplex mode */ + else + np->duplexmode = 1; /* half duplex mode */ + data &= SpeedMask; + if (data == LXT1000_1000M) + np->line_speed = 3; /* 1000M */ + else if (data == LXT1000_100M) + np->line_speed = 2; /* 100M */ + else + np->line_speed = 1; /* 10M */ + } + np->crvalue &= (~CR_W_PS10) & (~CR_W_FD) & (~CR_W_PS1000); + if (np->line_speed == 1) + np->crvalue |= CR_W_PS10; + else if (np->line_speed == 3) + np->crvalue |= CR_W_PS1000; + if (np->duplexmode == 2) + np->crvalue |= CR_W_FD; + } +} + + +/* Take lock before calling this */ +static void allocate_rx_buffers(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + + /* allocate skb for rx buffers */ + while (np->really_rx_count != RX_RING_SIZE) { + struct sk_buff *skb; + + skb = netdev_alloc_skb(dev, np->rx_buf_sz); + if (skb == NULL) + break; /* Better luck next round. */ + + while (np->lack_rxbuf->skbuff) + np->lack_rxbuf = np->lack_rxbuf->next_desc_logical; + + np->lack_rxbuf->skbuff = skb; + np->lack_rxbuf->buffer = dma_map_single(&np->pci_dev->dev, + skb->data, + np->rx_buf_sz, + DMA_FROM_DEVICE); + np->lack_rxbuf->status = RXOWN; + ++np->really_rx_count; + } +} + + +static void netdev_timer(struct timer_list *t) +{ + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii.dev; + void __iomem *ioaddr = np->mem; + int old_crvalue = np->crvalue; + unsigned int old_linkok = np->linkok; + unsigned long flags; + + if (debug) + printk(KERN_DEBUG "%s: Media selection timer tick, status %8.8x " + "config %8.8x.\n", dev->name, ioread32(ioaddr + ISR), + ioread32(ioaddr + TCRRCR)); + + spin_lock_irqsave(&np->lock, flags); + + if (np->flags == HAS_MII_XCVR) { + getlinkstatus(dev); + if ((old_linkok == 0) && (np->linkok == 1)) { /* we need to detect the media type again */ + getlinktype(dev); + if (np->crvalue != old_crvalue) { + stop_nic_rxtx(ioaddr, np->crvalue); + iowrite32(np->crvalue, ioaddr + TCRRCR); + } + } + } + + allocate_rx_buffers(dev); + + spin_unlock_irqrestore(&np->lock, flags); + + np->timer.expires = RUN_AT(10 * HZ); + add_timer(&np->timer); +} + + +/* Take lock before calling */ +/* Reset chip and disable rx, tx and interrupts */ +static void reset_and_disable_rxtx(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + int delay=51; + + /* Reset the chip's Tx and Rx processes. */ + stop_nic_rxtx(ioaddr, 0); + + /* Disable interrupts by clearing the interrupt mask. */ + iowrite32(0, ioaddr + IMR); + + /* Reset the chip to erase previous misconfiguration. */ + iowrite32(0x00000001, ioaddr + BCR); + + /* Ueimor: wait for 50 PCI cycles (and flush posted writes btw). + We surely wait too long (address+data phase). Who cares? */ + while (--delay) { + ioread32(ioaddr + BCR); + rmb(); + } +} + + +/* Take lock before calling */ +/* Restore chip after reset */ +static void enable_rxtx(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + + reset_rx_descriptors(dev); + + iowrite32(np->tx_ring_dma + ((char*)np->cur_tx - (char*)np->tx_ring), + ioaddr + TXLBA); + iowrite32(np->rx_ring_dma + ((char*)np->cur_rx - (char*)np->rx_ring), + ioaddr + RXLBA); + + iowrite32(np->bcrvalue, ioaddr + BCR); + + iowrite32(0, ioaddr + RXPDR); + __set_rx_mode(dev); /* changes np->crvalue, writes it into TCRRCR */ + + /* Clear and Enable interrupts by setting the interrupt mask. */ + iowrite32(FBE | TUNF | CNTOVF | RBU | TI | RI, ioaddr + ISR); + iowrite32(np->imrvalue, ioaddr + IMR); + + iowrite32(0, ioaddr + TXPDR); +} + + +static void reset_timer(struct timer_list *t) +{ + struct netdev_private *np = from_timer(np, t, reset_timer); + struct net_device *dev = np->mii.dev; + unsigned long flags; + + printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name); + + spin_lock_irqsave(&np->lock, flags); + np->crvalue = np->crvalue_sv; + np->imrvalue = np->imrvalue_sv; + + reset_and_disable_rxtx(dev); + /* works for me without this: + reset_tx_descriptors(dev); */ + enable_rxtx(dev); + netif_start_queue(dev); /* FIXME: or netif_wake_queue(dev); ? */ + + np->reset_timer_armed = 0; + + spin_unlock_irqrestore(&np->lock, flags); +} + + +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + unsigned long flags; + int i; + + printk(KERN_WARNING + "%s: Transmit timed out, status %8.8x, resetting...\n", + dev->name, ioread32(ioaddr + ISR)); + + { + printk(KERN_DEBUG " Rx ring %p: ", np->rx_ring); + for (i = 0; i < RX_RING_SIZE; i++) + printk(KERN_CONT " %8.8x", + (unsigned int) np->rx_ring[i].status); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG " Tx ring %p: ", np->tx_ring); + for (i = 0; i < TX_RING_SIZE; i++) + printk(KERN_CONT " %4.4x", np->tx_ring[i].status); + printk(KERN_CONT "\n"); + } + + spin_lock_irqsave(&np->lock, flags); + + reset_and_disable_rxtx(dev); + reset_tx_descriptors(dev); + enable_rxtx(dev); + + spin_unlock_irqrestore(&np->lock, flags); + + netif_trans_update(dev); /* prevent tx timeout */ + dev->stats.tx_errors++; + netif_wake_queue(dev); /* or .._start_.. ?? */ +} + + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void init_ring(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int i; + + /* initialize rx variables */ + np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); + np->cur_rx = &np->rx_ring[0]; + np->lack_rxbuf = np->rx_ring; + np->really_rx_count = 0; + + /* initial rx descriptors. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].status = 0; + np->rx_ring[i].control = np->rx_buf_sz << RBSShift; + np->rx_ring[i].next_desc = np->rx_ring_dma + + (i + 1)*sizeof(struct fealnx_desc); + np->rx_ring[i].next_desc_logical = &np->rx_ring[i + 1]; + np->rx_ring[i].skbuff = NULL; + } + + /* for the last rx descriptor */ + np->rx_ring[i - 1].next_desc = np->rx_ring_dma; + np->rx_ring[i - 1].next_desc_logical = np->rx_ring; + + /* allocate skb for rx buffers */ + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz); + + if (skb == NULL) { + np->lack_rxbuf = &np->rx_ring[i]; + break; + } + + ++np->really_rx_count; + np->rx_ring[i].skbuff = skb; + np->rx_ring[i].buffer = dma_map_single(&np->pci_dev->dev, + skb->data, + np->rx_buf_sz, + DMA_FROM_DEVICE); + np->rx_ring[i].status = RXOWN; + np->rx_ring[i].control |= RXIC; + } + + /* initialize tx variables */ + np->cur_tx = &np->tx_ring[0]; + np->cur_tx_copy = &np->tx_ring[0]; + np->really_tx_count = 0; + np->free_tx_count = TX_RING_SIZE; + + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].status = 0; + /* do we need np->tx_ring[i].control = XXX; ?? */ + np->tx_ring[i].next_desc = np->tx_ring_dma + + (i + 1)*sizeof(struct fealnx_desc); + np->tx_ring[i].next_desc_logical = &np->tx_ring[i + 1]; + np->tx_ring[i].skbuff = NULL; + } + + /* for the last tx descriptor */ + np->tx_ring[i - 1].next_desc = np->tx_ring_dma; + np->tx_ring[i - 1].next_desc_logical = &np->tx_ring[0]; +} + + +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + unsigned long flags; + + spin_lock_irqsave(&np->lock, flags); + + np->cur_tx_copy->skbuff = skb; + +#define one_buffer +#define BPT 1022 +#if defined(one_buffer) + np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev, skb->data, + skb->len, DMA_TO_DEVICE); + np->cur_tx_copy->control = TXIC | TXLD | TXFD | CRCEnable | PADEnable; + np->cur_tx_copy->control |= (skb->len << PKTSShift); /* pkt size */ + np->cur_tx_copy->control |= (skb->len << TBSShift); /* buffer size */ +// 89/12/29 add, + if (np->pci_dev->device == 0x891) + np->cur_tx_copy->control |= ETIControl | RetryTxLC; + np->cur_tx_copy->status = TXOWN; + np->cur_tx_copy = np->cur_tx_copy->next_desc_logical; + --np->free_tx_count; +#elif defined(two_buffer) + if (skb->len > BPT) { + struct fealnx_desc *next; + + /* for the first descriptor */ + np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev, + skb->data, BPT, + DMA_TO_DEVICE); + np->cur_tx_copy->control = TXIC | TXFD | CRCEnable | PADEnable; + np->cur_tx_copy->control |= (skb->len << PKTSShift); /* pkt size */ + np->cur_tx_copy->control |= (BPT << TBSShift); /* buffer size */ + + /* for the last descriptor */ + next = np->cur_tx_copy->next_desc_logical; + next->skbuff = skb; + next->control = TXIC | TXLD | CRCEnable | PADEnable; + next->control |= (skb->len << PKTSShift); /* pkt size */ + next->control |= ((skb->len - BPT) << TBSShift); /* buf size */ +// 89/12/29 add, + if (np->pci_dev->device == 0x891) + np->cur_tx_copy->control |= ETIControl | RetryTxLC; + next->buffer = dma_map_single(&ep->pci_dev->dev, + skb->data + BPT, skb->len - BPT, + DMA_TO_DEVICE); + + next->status = TXOWN; + np->cur_tx_copy->status = TXOWN; + + np->cur_tx_copy = next->next_desc_logical; + np->free_tx_count -= 2; + } else { + np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev, + skb->data, skb->len, + DMA_TO_DEVICE); + np->cur_tx_copy->control = TXIC | TXLD | TXFD | CRCEnable | PADEnable; + np->cur_tx_copy->control |= (skb->len << PKTSShift); /* pkt size */ + np->cur_tx_copy->control |= (skb->len << TBSShift); /* buffer size */ +// 89/12/29 add, + if (np->pci_dev->device == 0x891) + np->cur_tx_copy->control |= ETIControl | RetryTxLC; + np->cur_tx_copy->status = TXOWN; + np->cur_tx_copy = np->cur_tx_copy->next_desc_logical; + --np->free_tx_count; + } +#endif + + if (np->free_tx_count < 2) + netif_stop_queue(dev); + ++np->really_tx_count; + iowrite32(0, np->mem + TXPDR); + + spin_unlock_irqrestore(&np->lock, flags); + return NETDEV_TX_OK; +} + + +/* Take lock before calling */ +/* Chip probably hosed tx ring. Clean up. */ +static void reset_tx_descriptors(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct fealnx_desc *cur; + int i; + + /* initialize tx variables */ + np->cur_tx = &np->tx_ring[0]; + np->cur_tx_copy = &np->tx_ring[0]; + np->really_tx_count = 0; + np->free_tx_count = TX_RING_SIZE; + + for (i = 0; i < TX_RING_SIZE; i++) { + cur = &np->tx_ring[i]; + if (cur->skbuff) { + dma_unmap_single(&np->pci_dev->dev, cur->buffer, + cur->skbuff->len, DMA_TO_DEVICE); + dev_kfree_skb_any(cur->skbuff); + cur->skbuff = NULL; + } + cur->status = 0; + cur->control = 0; /* needed? */ + /* probably not needed. We do it for purely paranoid reasons */ + cur->next_desc = np->tx_ring_dma + + (i + 1)*sizeof(struct fealnx_desc); + cur->next_desc_logical = &np->tx_ring[i + 1]; + } + /* for the last tx descriptor */ + np->tx_ring[TX_RING_SIZE - 1].next_desc = np->tx_ring_dma; + np->tx_ring[TX_RING_SIZE - 1].next_desc_logical = &np->tx_ring[0]; +} + + +/* Take lock and stop rx before calling this */ +static void reset_rx_descriptors(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct fealnx_desc *cur = np->cur_rx; + int i; + + allocate_rx_buffers(dev); + + for (i = 0; i < RX_RING_SIZE; i++) { + if (cur->skbuff) + cur->status = RXOWN; + cur = cur->next_desc_logical; + } + + iowrite32(np->rx_ring_dma + ((char*)np->cur_rx - (char*)np->rx_ring), + np->mem + RXLBA); +} + + +/* The interrupt handler does all of the Rx thread work and cleans up + after the Tx thread. */ +static irqreturn_t intr_handler(int irq, void *dev_instance) +{ + struct net_device *dev = (struct net_device *) dev_instance; + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + long boguscnt = max_interrupt_work; + unsigned int num_tx = 0; + int handled = 0; + + spin_lock(&np->lock); + + iowrite32(0, ioaddr + IMR); + + do { + u32 intr_status = ioread32(ioaddr + ISR); + + /* Acknowledge all of the current interrupt sources ASAP. */ + iowrite32(intr_status, ioaddr + ISR); + + if (debug) + printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", dev->name, + intr_status); + + if (!(intr_status & np->imrvalue)) + break; + + handled = 1; + +// 90/1/16 delete, +// +// if (intr_status & FBE) +// { /* fatal error */ +// stop_nic_tx(ioaddr, 0); +// stop_nic_rx(ioaddr, 0); +// break; +// }; + + if (intr_status & TUNF) + iowrite32(0, ioaddr + TXPDR); + + if (intr_status & CNTOVF) { + /* missed pkts */ + dev->stats.rx_missed_errors += + ioread32(ioaddr + TALLY) & 0x7fff; + + /* crc error */ + dev->stats.rx_crc_errors += + (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16; + } + + if (intr_status & (RI | RBU)) { + if (intr_status & RI) + netdev_rx(dev); + else { + stop_nic_rx(ioaddr, np->crvalue); + reset_rx_descriptors(dev); + iowrite32(np->crvalue, ioaddr + TCRRCR); + } + } + + while (np->really_tx_count) { + long tx_status = np->cur_tx->status; + long tx_control = np->cur_tx->control; + + if (!(tx_control & TXLD)) { /* this pkt is combined by two tx descriptors */ + struct fealnx_desc *next; + + next = np->cur_tx->next_desc_logical; + tx_status = next->status; + tx_control = next->control; + } + + if (tx_status & TXOWN) + break; + + if (!(np->crvalue & CR_W_ENH)) { + if (tx_status & (CSL | LC | EC | UDF | HF)) { + dev->stats.tx_errors++; + if (tx_status & EC) + dev->stats.tx_aborted_errors++; + if (tx_status & CSL) + dev->stats.tx_carrier_errors++; + if (tx_status & LC) + dev->stats.tx_window_errors++; + if (tx_status & UDF) + dev->stats.tx_fifo_errors++; + if ((tx_status & HF) && np->mii.full_duplex == 0) + dev->stats.tx_heartbeat_errors++; + + } else { + dev->stats.tx_bytes += + ((tx_control & PKTSMask) >> PKTSShift); + + dev->stats.collisions += + ((tx_status & NCRMask) >> NCRShift); + dev->stats.tx_packets++; + } + } else { + dev->stats.tx_bytes += + ((tx_control & PKTSMask) >> PKTSShift); + dev->stats.tx_packets++; + } + + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + np->cur_tx->buffer, + np->cur_tx->skbuff->len, + DMA_TO_DEVICE); + dev_consume_skb_irq(np->cur_tx->skbuff); + np->cur_tx->skbuff = NULL; + --np->really_tx_count; + if (np->cur_tx->control & TXLD) { + np->cur_tx = np->cur_tx->next_desc_logical; + ++np->free_tx_count; + } else { + np->cur_tx = np->cur_tx->next_desc_logical; + np->cur_tx = np->cur_tx->next_desc_logical; + np->free_tx_count += 2; + } + num_tx++; + } /* end of for loop */ + + if (num_tx && np->free_tx_count >= 2) + netif_wake_queue(dev); + + /* read transmit status for enhanced mode only */ + if (np->crvalue & CR_W_ENH) { + long data; + + data = ioread32(ioaddr + TSR); + dev->stats.tx_errors += (data & 0xff000000) >> 24; + dev->stats.tx_aborted_errors += + (data & 0xff000000) >> 24; + dev->stats.tx_window_errors += + (data & 0x00ff0000) >> 16; + dev->stats.collisions += (data & 0x0000ffff); + } + + if (--boguscnt < 0) { + printk(KERN_WARNING "%s: Too much work at interrupt, " + "status=0x%4.4x.\n", dev->name, intr_status); + if (!np->reset_timer_armed) { + np->reset_timer_armed = 1; + np->reset_timer.expires = RUN_AT(HZ/2); + add_timer(&np->reset_timer); + stop_nic_rxtx(ioaddr, 0); + netif_stop_queue(dev); + /* or netif_tx_disable(dev); ?? */ + /* Prevent other paths from enabling tx,rx,intrs */ + np->crvalue_sv = np->crvalue; + np->imrvalue_sv = np->imrvalue; + np->crvalue &= ~(CR_W_TXEN | CR_W_RXEN); /* or simply = 0? */ + np->imrvalue = 0; + } + + break; + } + } while (1); + + /* read the tally counters */ + /* missed pkts */ + dev->stats.rx_missed_errors += ioread32(ioaddr + TALLY) & 0x7fff; + + /* crc error */ + dev->stats.rx_crc_errors += + (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16; + + if (debug) + printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", + dev->name, ioread32(ioaddr + ISR)); + + iowrite32(np->imrvalue, ioaddr + IMR); + + spin_unlock(&np->lock); + + return IRQ_RETVAL(handled); +} + + +/* This routine is logically part of the interrupt handler, but separated + for clarity and better register allocation. */ +static int netdev_rx(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + + /* If EOP is set on the next entry, it's a new packet. Send it up. */ + while (!(np->cur_rx->status & RXOWN) && np->cur_rx->skbuff) { + s32 rx_status = np->cur_rx->status; + + if (np->really_rx_count == 0) + break; + + if (debug) + printk(KERN_DEBUG " netdev_rx() status was %8.8x.\n", rx_status); + + if ((!((rx_status & RXFSD) && (rx_status & RXLSD))) || + (rx_status & ErrorSummary)) { + if (rx_status & ErrorSummary) { /* there was a fatal error */ + if (debug) + printk(KERN_DEBUG + "%s: Receive error, Rx status %8.8x.\n", + dev->name, rx_status); + + dev->stats.rx_errors++; /* end of a packet. */ + if (rx_status & (LONGPKT | RUNTPKT)) + dev->stats.rx_length_errors++; + if (rx_status & RXER) + dev->stats.rx_frame_errors++; + if (rx_status & CRC) + dev->stats.rx_crc_errors++; + } else { + int need_to_reset = 0; + int desno = 0; + + if (rx_status & RXFSD) { /* this pkt is too long, over one rx buffer */ + struct fealnx_desc *cur; + + /* check this packet is received completely? */ + cur = np->cur_rx; + while (desno <= np->really_rx_count) { + ++desno; + if ((!(cur->status & RXOWN)) && + (cur->status & RXLSD)) + break; + /* goto next rx descriptor */ + cur = cur->next_desc_logical; + } + if (desno > np->really_rx_count) + need_to_reset = 1; + } else /* RXLSD did not find, something error */ + need_to_reset = 1; + + if (need_to_reset == 0) { + int i; + + dev->stats.rx_length_errors++; + + /* free all rx descriptors related this long pkt */ + for (i = 0; i < desno; ++i) { + if (!np->cur_rx->skbuff) { + printk(KERN_DEBUG + "%s: I'm scared\n", dev->name); + break; + } + np->cur_rx->status = RXOWN; + np->cur_rx = np->cur_rx->next_desc_logical; + } + continue; + } else { /* rx error, need to reset this chip */ + stop_nic_rx(ioaddr, np->crvalue); + reset_rx_descriptors(dev); + iowrite32(np->crvalue, ioaddr + TCRRCR); + } + break; /* exit the while loop */ + } + } else { /* this received pkt is ok */ + + struct sk_buff *skb; + /* Omit the four octet CRC from the length. */ + short pkt_len = ((rx_status & FLNGMASK) >> FLNGShift) - 4; + +#ifndef final_version + if (debug) + printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d" + " status %x.\n", pkt_len, rx_status); +#endif + + /* Check if the packet is long enough to accept without copying + to a minimally-sized skbuff. */ + if (pkt_len < rx_copybreak && + (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { + skb_reserve(skb, 2); /* 16 byte align the IP header */ + dma_sync_single_for_cpu(&np->pci_dev->dev, + np->cur_rx->buffer, + np->rx_buf_sz, + DMA_FROM_DEVICE); + /* Call copy + cksum if available. */ + +#if ! defined(__alpha__) + skb_copy_to_linear_data(skb, + np->cur_rx->skbuff->data, pkt_len); + skb_put(skb, pkt_len); +#else + skb_put_data(skb, np->cur_rx->skbuff->data, + pkt_len); +#endif + dma_sync_single_for_device(&np->pci_dev->dev, + np->cur_rx->buffer, + np->rx_buf_sz, + DMA_FROM_DEVICE); + } else { + dma_unmap_single(&np->pci_dev->dev, + np->cur_rx->buffer, + np->rx_buf_sz, + DMA_FROM_DEVICE); + skb_put(skb = np->cur_rx->skbuff, pkt_len); + np->cur_rx->skbuff = NULL; + --np->really_rx_count; + } + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; + } + + np->cur_rx = np->cur_rx->next_desc_logical; + } /* end of while loop */ + + /* allocate skb for rx buffers */ + allocate_rx_buffers(dev); + + return 0; +} + + +static struct net_device_stats *get_stats(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + + /* The chip only need report frame silently dropped. */ + if (netif_running(dev)) { + dev->stats.rx_missed_errors += + ioread32(ioaddr + TALLY) & 0x7fff; + dev->stats.rx_crc_errors += + (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16; + } + + return &dev->stats; +} + + +/* for dev->set_multicast_list */ +static void set_rx_mode(struct net_device *dev) +{ + spinlock_t *lp = &((struct netdev_private *)netdev_priv(dev))->lock; + unsigned long flags; + spin_lock_irqsave(lp, flags); + __set_rx_mode(dev); + spin_unlock_irqrestore(lp, flags); +} + + +/* Take lock before calling */ +static void __set_rx_mode(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + u32 mc_filter[2]; /* Multicast hash filter */ + u32 rx_mode; + + if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = CR_W_PROM | CR_W_AB | CR_W_AM; + } else if ((netdev_mc_count(dev) > multicast_filter_limit) || + (dev->flags & IFF_ALLMULTI)) { + /* Too many to match, or accept all multicasts. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = CR_W_AB | CR_W_AM; + } else { + struct netdev_hw_addr *ha; + + memset(mc_filter, 0, sizeof(mc_filter)); + netdev_for_each_mc_addr(ha, dev) { + unsigned int bit; + bit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F; + mc_filter[bit >> 5] |= (1 << bit); + } + rx_mode = CR_W_AB | CR_W_AM; + } + + stop_nic_rxtx(ioaddr, np->crvalue); + + iowrite32(mc_filter[0], ioaddr + MAR0); + iowrite32(mc_filter[1], ioaddr + MAR1); + np->crvalue &= ~CR_W_RXMODEMASK; + np->crvalue |= rx_mode; + iowrite32(np->crvalue, ioaddr + TCRRCR); +} + +static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct netdev_private *np = netdev_priv(dev); + + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info)); +} + +static int netdev_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + + spin_lock_irq(&np->lock); + mii_ethtool_get_link_ksettings(&np->mii, cmd); + spin_unlock_irq(&np->lock); + + return 0; +} + +static int netdev_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int rc; + + spin_lock_irq(&np->lock); + rc = mii_ethtool_set_link_ksettings(&np->mii, cmd); + spin_unlock_irq(&np->lock); + + return rc; +} + +static int netdev_nway_reset(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_nway_restart(&np->mii); +} + +static u32 netdev_get_link(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_link_ok(&np->mii); +} + +static u32 netdev_get_msglevel(struct net_device *dev) +{ + return debug; +} + +static void netdev_set_msglevel(struct net_device *dev, u32 value) +{ + debug = value; +} + +static const struct ethtool_ops netdev_ethtool_ops = { + .get_drvinfo = netdev_get_drvinfo, + .nway_reset = netdev_nway_reset, + .get_link = netdev_get_link, + .get_msglevel = netdev_get_msglevel, + .set_msglevel = netdev_set_msglevel, + .get_link_ksettings = netdev_get_link_ksettings, + .set_link_ksettings = netdev_set_link_ksettings, +}; + +static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int rc; + + if (!netif_running(dev)) + return -EINVAL; + + spin_lock_irq(&np->lock); + rc = generic_mii_ioctl(&np->mii, if_mii(rq), cmd, NULL); + spin_unlock_irq(&np->lock); + + return rc; +} + + +static int netdev_close(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->mem; + int i; + + netif_stop_queue(dev); + + /* Disable interrupts by clearing the interrupt mask. */ + iowrite32(0x0000, ioaddr + IMR); + + /* Stop the chip's Tx and Rx processes. */ + stop_nic_rxtx(ioaddr, 0); + + del_timer_sync(&np->timer); + del_timer_sync(&np->reset_timer); + + free_irq(np->pci_dev->irq, dev); + + /* Free all the skbuffs in the Rx queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb = np->rx_ring[i].skbuff; + + np->rx_ring[i].status = 0; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + np->rx_ring[i].buffer, np->rx_buf_sz, + DMA_FROM_DEVICE); + dev_kfree_skb(skb); + np->rx_ring[i].skbuff = NULL; + } + } + + for (i = 0; i < TX_RING_SIZE; i++) { + struct sk_buff *skb = np->tx_ring[i].skbuff; + + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + np->tx_ring[i].buffer, skb->len, + DMA_TO_DEVICE); + dev_kfree_skb(skb); + np->tx_ring[i].skbuff = NULL; + } + } + + return 0; +} + +static const struct pci_device_id fealnx_pci_tbl[] = { + {0x1516, 0x0800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0x1516, 0x0803, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1}, + {0x1516, 0x0891, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2}, + {} /* terminate list */ +}; +MODULE_DEVICE_TABLE(pci, fealnx_pci_tbl); + + +static struct pci_driver fealnx_driver = { + .name = "fealnx", + .id_table = fealnx_pci_tbl, + .probe = fealnx_init_one, + .remove = fealnx_remove_one, +}; + +module_pci_driver(fealnx_driver); -- cgit From 2aab4b96900272885bc157f8b236abf1cdc02e08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Mar 2023 16:45:30 +0000 Subject: af_unix: fix struct pid leaks in OOB support syzbot reported struct pid leak [1]. Issue is that queue_oob() calls maybe_add_creds() which potentially holds a reference on a pid. But skb->destructor is not set (either directly or by calling unix_scm_to_skb()) This means that subsequent kfree_skb() or consume_skb() would leak this reference. In this fix, I chose to fully support scm even for the OOB message. [1] BUG: memory leak unreferenced object 0xffff8881053e7f80 (size 128): comm "syz-executor242", pid 5066, jiffies 4294946079 (age 13.220s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] alloc_pid+0x6a/0x560 kernel/pid.c:180 [] copy_process+0x169f/0x26c0 kernel/fork.c:2285 [] kernel_clone+0xf7/0x610 kernel/fork.c:2684 [] __do_sys_clone+0x7c/0xb0 kernel/fork.c:2825 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: syzbot+7699d9e5635c10253a27@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Rao Shoaib Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230307164530.771896-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 347122c3575e..0b0f18ecce44 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2105,7 +2105,8 @@ out: #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) -static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, + struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; @@ -2116,6 +2117,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (!skb) return err; + err = unix_scm_to_skb(scm, skb, !fds_sent); + if (err < 0) { + kfree_skb(skb); + return err; + } skb_put(skb, 1); err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); @@ -2243,7 +2249,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { - err = queue_oob(sock, msg, other); + err = queue_oob(sock, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; -- cgit From 649c15c7691e9b13cbe9bf6c65c365350e056067 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 7 Mar 2023 14:37:07 -0300 Subject: net: avoid double iput when sock_alloc_file fails When sock_alloc_file fails to allocate a file, it will call sock_release. __sys_socket_file should then not call sock_release again, otherwise there will be a double free. [ 89.319884] ------------[ cut here ]------------ [ 89.320286] kernel BUG at fs/inode.c:1764! [ 89.320656] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 89.321051] CPU: 7 PID: 125 Comm: iou-sqp-124 Not tainted 6.2.0+ #361 [ 89.321535] RIP: 0010:iput+0x1ff/0x240 [ 89.321808] Code: d1 83 e1 03 48 83 f9 02 75 09 48 81 fa 00 10 00 00 77 05 83 e2 01 75 1f 4c 89 ef e8 fb d2 ba 00 e9 80 fe ff ff c3 cc cc cc cc <0f> 0b 0f 0b e9 d0 fe ff ff 0f 0b eb 8d 49 8d b4 24 08 01 00 00 48 [ 89.322760] RSP: 0018:ffffbdd60068bd50 EFLAGS: 00010202 [ 89.323036] RAX: 0000000000000000 RBX: ffff9d7ad3cacac0 RCX: 0000000000001107 [ 89.323412] RDX: 000000000003af00 RSI: 0000000000000000 RDI: ffff9d7ad3cacb40 [ 89.323785] RBP: ffffbdd60068bd68 R08: ffffffffffffffff R09: ffffffffab606438 [ 89.324157] R10: ffffffffacb3dfa0 R11: 6465686361657256 R12: ffff9d7ad3cacb40 [ 89.324529] R13: 0000000080000001 R14: 0000000080000001 R15: 0000000000000002 [ 89.324904] FS: 00007f7b28516740(0000) GS:ffff9d7aeb1c0000(0000) knlGS:0000000000000000 [ 89.325328] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 89.325629] CR2: 00007f0af52e96c0 CR3: 0000000002a02006 CR4: 0000000000770ee0 [ 89.326004] PKRU: 55555554 [ 89.326161] Call Trace: [ 89.326298] [ 89.326419] __sock_release+0xb5/0xc0 [ 89.326632] __sys_socket_file+0xb2/0xd0 [ 89.326844] io_socket+0x88/0x100 [ 89.327039] ? io_issue_sqe+0x6a/0x430 [ 89.327258] io_issue_sqe+0x67/0x430 [ 89.327450] io_submit_sqes+0x1fe/0x670 [ 89.327661] io_sq_thread+0x2e6/0x530 [ 89.327859] ? __pfx_autoremove_wake_function+0x10/0x10 [ 89.328145] ? __pfx_io_sq_thread+0x10/0x10 [ 89.328367] ret_from_fork+0x29/0x50 [ 89.328576] RIP: 0033:0x0 [ 89.328732] Code: Unable to access opcode bytes at 0xffffffffffffffd6. [ 89.329073] RSP: 002b:0000000000000000 EFLAGS: 00000202 ORIG_RAX: 00000000000001a9 [ 89.329477] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f7b28637a3d [ 89.329845] RDX: 00007fff4e4318a8 RSI: 00007fff4e4318b0 RDI: 0000000000000400 [ 89.330216] RBP: 00007fff4e431830 R08: 00007fff4e431711 R09: 00007fff4e4318b0 [ 89.330584] R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff4e441b38 [ 89.330950] R13: 0000563835e3e725 R14: 0000563835e40d10 R15: 00007f7b28784040 [ 89.331318] [ 89.331441] Modules linked in: [ 89.331617] ---[ end trace 0000000000000000 ]--- Fixes: da214a475f8b ("net: add __sys_socket_file()") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Jens Axboe Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230307173707.468744-1-cascardo@canonical.com Signed-off-by: Jakub Kicinski --- net/socket.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/socket.c b/net/socket.c index 6bae8ce7059e..9c92c0e6c4da 100644 --- a/net/socket.c +++ b/net/socket.c @@ -450,7 +450,9 @@ static struct file_system_type sock_fs_type = { * * Returns the &file bound with @sock, implicitly storing it * in sock->file. If dname is %NULL, sets to "". - * On failure the return is a ERR pointer (see linux/err.h). + * + * On failure @sock is released, and an ERR pointer is returned. + * * This function uses GFP_KERNEL internally. */ @@ -1638,7 +1640,6 @@ static struct socket *__sys_socket_create(int family, int type, int protocol) struct file *__sys_socket_file(int family, int type, int protocol) { struct socket *sock; - struct file *file; int flags; sock = __sys_socket_create(family, type, protocol); @@ -1649,11 +1650,7 @@ struct file *__sys_socket_file(int family, int type, int protocol) if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - file = sock_alloc_file(sock, flags, NULL); - if (IS_ERR(file)) - sock_release(sock); - - return file; + return sock_alloc_file(sock, flags, NULL); } int __sys_socket(int family, int type, int protocol) -- cgit From 6517a60b0307abdcca80133c237551cc7cc8e6ae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Mar 2023 16:39:22 -0800 Subject: tools: ynl: move the enum classes to shared code Move bulk of the EnumSet and EnumEntry code to shared code for reuse by cli. Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/__init__.py | 7 +-- tools/net/ynl/lib/nlspec.py | 96 +++++++++++++++++++++++++++++++++++++ tools/net/ynl/ynl-gen-c.py | 107 +++++++++--------------------------------- 3 files changed, 121 insertions(+), 89 deletions(-) diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py index a2cb8b16d6f1..4b3797fe784b 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/lib/__init__.py @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause -from .nlspec import SpecAttr, SpecAttrSet, SpecFamily, SpecOperation +from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \ + SpecFamily, SpecOperation from .ynl import YnlFamily -__all__ = ["SpecAttr", "SpecAttrSet", "SpecFamily", "SpecOperation", - "YnlFamily"] +__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet", + "SpecFamily", "SpecOperation", "YnlFamily"] diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 0a2cfb5862aa..7c1cf6c1499e 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -57,6 +57,91 @@ class SpecElement: pass +class SpecEnumEntry(SpecElement): + """ Entry within an enum declared in the Netlink spec. + + Attributes: + doc documentation string + enum_set back reference to the enum + value numerical value of this enum (use accessors in most situations!) + + Methods: + raw_value raw value, i.e. the id in the enum, unlike user value which is a mask for flags + user_value user value, same as raw value for enums, for flags it's the mask + """ + def __init__(self, enum_set, yaml, prev, value_start): + if isinstance(yaml, str): + yaml = {'name': yaml} + super().__init__(enum_set.family, yaml) + + self.doc = yaml.get('doc', '') + self.enum_set = enum_set + + if 'value' in yaml: + self.value = yaml['value'] + elif prev: + self.value = prev.value + 1 + else: + self.value = value_start + + def has_doc(self): + return bool(self.doc) + + def raw_value(self): + return self.value + + def user_value(self): + if self.enum_set['type'] == 'flags': + return 1 << self.value + else: + return self.value + + +class SpecEnumSet(SpecElement): + """ Enum type + + Represents an enumeration (list of numerical constants) + as declared in the "definitions" section of the spec. + + Attributes: + type enum or flags + entries entries by name + Methods: + get_mask for flags compute the mask of all defined values + """ + def __init__(self, family, yaml): + super().__init__(family, yaml) + + self.type = yaml['type'] + + prev_entry = None + value_start = self.yaml.get('value-start', 0) + self.entries = dict() + for entry in self.yaml['entries']: + e = self.new_entry(entry, prev_entry, value_start) + self.entries[e.name] = e + prev_entry = e + + def new_entry(self, entry, prev_entry, value_start): + return SpecEnumEntry(self, entry, prev_entry, value_start) + + def has_doc(self): + if 'doc' in self.yaml: + return True + for entry in self.entries.values(): + if entry.has_doc(): + return True + return False + + def get_mask(self): + mask = 0 + idx = self.yaml.get('value-start', 0) + for _ in self.entries.values(): + mask |= 1 << idx + idx += 1 + return mask + + class SpecAttr(SpecElement): """ Single Netlink atttribute type @@ -193,6 +278,7 @@ class SpecFamily(SpecElement): msgs dict of all messages (index by name) msgs_by_value dict of all messages (indexed by name) ops dict of all valid requests / responses + consts dict of all constants/enums """ def __init__(self, spec_path, schema_path=None): with open(spec_path, "r") as stream: @@ -222,6 +308,7 @@ class SpecFamily(SpecElement): self.req_by_value = collections.OrderedDict() self.rsp_by_value = collections.OrderedDict() self.ops = collections.OrderedDict() + self.consts = collections.OrderedDict() last_exception = None while len(self._resolution_list) > 0: @@ -242,6 +329,9 @@ class SpecFamily(SpecElement): if len(resolved) == 0: raise last_exception + def new_enum(self, elem): + return SpecEnumSet(self, elem) + def new_attr_set(self, elem): return SpecAttrSet(self, elem) @@ -296,6 +386,12 @@ class SpecFamily(SpecElement): def resolve(self): self.resolve_up(super()) + for elem in self.yaml['definitions']: + if elem['type'] == 'enum' or elem['type'] == 'flags': + self.consts[elem['name']] = self.new_enum(elem) + else: + self.consts[elem['name']] = elem + for elem in self.yaml['attribute-sets']: attr_set = self.new_attr_set(elem) self.attr_sets[elem['name']] = attr_set diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index c940ca834d3f..1bcc5354d800 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -6,7 +6,7 @@ import collections import os import yaml -from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation +from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry def c_upper(name): @@ -567,97 +567,37 @@ class Struct: self.inherited = [c_lower(x) for x in sorted(self._inherited)] -class EnumEntry: +class EnumEntry(SpecEnumEntry): def __init__(self, enum_set, yaml, prev, value_start): - if isinstance(yaml, str): - self.name = yaml - yaml = {} - self.doc = '' - else: - self.name = yaml['name'] - self.doc = yaml.get('doc', '') - - self.yaml = yaml - self.enum_set = enum_set - self.c_name = c_upper(enum_set.value_pfx + self.name) - - if 'value' in yaml: - self.value = yaml['value'] - if prev: - self.value_change = (self.value != prev.value + 1) - elif prev: - self.value_change = False - self.value = prev.value + 1 + super().__init__(enum_set, yaml, prev, value_start) + + if prev: + self.value_change = (self.value != prev.value + 1) else: - self.value = value_start self.value_change = (self.value != 0) - self.value_change = self.value_change or self.enum_set['type'] == 'flags' - def __getitem__(self, key): - return self.yaml[key] - - def __contains__(self, key): - return key in self.yaml - - def has_doc(self): - return bool(self.doc) + # Added by resolve: + self.c_name = None + delattr(self, "c_name") - # raw value, i.e. the id in the enum, unlike user value which is a mask for flags - def raw_value(self): - return self.value + def resolve(self): + self.resolve_up(super()) - # user value, same as raw value for enums, for flags it's the mask - def user_value(self): - if self.enum_set['type'] == 'flags': - return 1 << self.value - else: - return self.value + self.c_name = c_upper(self.enum_set.value_pfx + self.name) -class EnumSet: +class EnumSet(SpecEnumSet): def __init__(self, family, yaml): - self.yaml = yaml - self.family = family - self.render_name = c_lower(family.name + '-' + yaml['name']) self.enum_name = 'enum ' + self.render_name self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-") - self.type = yaml['type'] - - prev_entry = None - value_start = self.yaml.get('value-start', 0) - self.entries = {} - self.entry_list = [] - for entry in self.yaml['entries']: - e = EnumEntry(self, entry, prev_entry, value_start) - self.entries[e.name] = e - self.entry_list.append(e) - prev_entry = e - - def __getitem__(self, key): - return self.yaml[key] - - def __contains__(self, key): - return key in self.yaml - - def has_doc(self): - if 'doc' in self.yaml: - return True - for entry in self.entry_list: - if entry.has_doc(): - return True - return False + super().__init__(family, yaml) - def get_mask(self): - mask = 0 - idx = self.yaml.get('value-start', 0) - for _ in self.entry_list: - mask |= 1 << idx - idx += 1 - return mask + def new_entry(self, entry, prev_entry, value_start): + return EnumEntry(self, entry, prev_entry, value_start) class AttrSet(SpecAttrSet): @@ -792,8 +732,6 @@ class Family(SpecFamily): self.mcgrps = self.yaml.get('mcast-groups', {'list': []}) - self.consts = dict() - self.hooks = dict() for when in ['pre', 'post']: self.hooks[when] = dict() @@ -820,6 +758,9 @@ class Family(SpecFamily): if self.kernel_policy == 'global': self._load_global_policy() + def new_enum(self, elem): + return EnumSet(self, elem) + def new_attr_set(self, elem): return AttrSet(self, elem) @@ -837,12 +778,6 @@ class Family(SpecFamily): } def _dictify(self): - for elem in self.yaml['definitions']: - if elem['type'] == 'enum' or elem['type'] == 'flags': - self.consts[elem['name']] = EnumSet(self, elem) - else: - self.consts[elem['name']] = elem - ntf = [] for msg in self.msgs.values(): if 'notify' in msg: @@ -1980,7 +1915,7 @@ def render_uapi(family, cw): if 'doc' in enum: doc = ' - ' + enum['doc'] cw.write_doc_line(enum.enum_name + doc) - for entry in enum.entry_list: + for entry in enum.entries.values(): if entry.has_doc(): doc = '@' + entry.c_name + ': ' + entry['doc'] cw.write_doc_line(doc) @@ -1988,7 +1923,7 @@ def render_uapi(family, cw): uapi_enum_start(family, cw, const, 'name') name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-") - for entry in enum.entry_list: + for entry in enum.entries.values(): suffix = ',' if entry.value_change: suffix = f" = {entry.user_value()}" + suffix -- cgit From c311aaa74ca18bd0781d1d3bebd08484799f840c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Mar 2023 16:39:23 -0800 Subject: tools: ynl: fix enum-as-flags in the generic CLI Lorenzo points out that the generic CLI is broken for the netdev family. When I added the support for documentation of enums (and sparse enums) the client script was not updated. It expects the values in enum to be a list of names, now it can also be a dict (YAML object). Reported-by: Lorenzo Bianconi Fixes: e4b48ed460d3 ("tools: ynl: add a completely generic client") Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/nlspec.py | 7 +++++-- tools/net/ynl/lib/ynl.py | 9 ++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 7c1cf6c1499e..a34d088f6743 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -104,8 +104,9 @@ class SpecEnumSet(SpecElement): as declared in the "definitions" section of the spec. Attributes: - type enum or flags - entries entries by name + type enum or flags + entries entries by name + entries_by_val entries by value Methods: get_mask for flags compute the mask of all defined values """ @@ -117,9 +118,11 @@ class SpecEnumSet(SpecElement): prev_entry = None value_start = self.yaml.get('value-start', 0) self.entries = dict() + self.entries_by_val = dict() for entry in self.yaml['entries']: e = self.new_entry(entry, prev_entry, value_start) self.entries[e.name] = e + self.entries_by_val[e.raw_value()] = e prev_entry = e def new_entry(self, entry, prev_entry, value_start): diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index a842adc8e87e..90764a83c646 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -303,11 +303,6 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1) - self._types = dict() - - for elem in self.yaml.get('definitions', []): - self._types[elem['name']] = elem - self.async_msg_ids = set() self.async_msg_queue = [] @@ -353,13 +348,13 @@ class YnlFamily(SpecFamily): def _decode_enum(self, rsp, attr_spec): raw = rsp[attr_spec['name']] - enum = self._types[attr_spec['enum']] + enum = self.consts[attr_spec['enum']] i = attr_spec.get('value-start', 0) if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']: value = set() while raw: if raw & 1: - value.add(enum['entries'][i]) + value.add(enum.entries_by_val[i].name) raw >>= 1 i += 1 else: -- cgit