From 3f14b377d01d8357eba032b4cabc8c1149b458b6 Mon Sep 17 00:00:00 2001
From: Tao Liu <taoliu828@163.com>
Date: Thu, 28 Dec 2023 16:14:57 +0800
Subject: net/sched: act_ct: fix skb leak and crash on ooo frags

act_ct adds skb->users before defragmentation. If frags arrive in order,
the last frag's reference is reset in:

  inet_frag_reasm_prepare
    skb_morph

which is not straightforward.

However when frags arrive out of order, nobody unref the last frag, and
all frags are leaked. The situation is even worse, as initiating packet
capture can lead to a crash[0] when skb has been cloned and shared at the
same time.

Fix the issue by removing skb_get() before defragmentation. act_ct
returns TC_ACT_CONSUMED when defrag failed or in progress.

[0]:
[  843.804823] ------------[ cut here ]------------
[  843.809659] kernel BUG at net/core/skbuff.c:2091!
[  843.814516] invalid opcode: 0000 [#1] PREEMPT SMP
[  843.819296] CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G S 6.7.0-rc3 #2
[  843.824107] Hardware name: XFUSION 1288H V6/BC13MBSBD, BIOS 1.29 11/25/2022
[  843.828953] RIP: 0010:pskb_expand_head+0x2ac/0x300
[  843.833805] Code: 8b 70 28 48 85 f6 74 82 48 83 c6 08 bf 01 00 00 00 e8 38 bd ff ff 8b 83 c0 00 00 00 48 03 83 c8 00 00 00 e9 62 ff ff ff 0f 0b <0f> 0b e8 8d d0 ff ff e9 b3 fd ff ff 81 7c 24 14 40 01 00 00 4c 89
[  843.843698] RSP: 0018:ffffc9000cce07c0 EFLAGS: 00010202
[  843.848524] RAX: 0000000000000002 RBX: ffff88811a211d00 RCX: 0000000000000820
[  843.853299] RDX: 0000000000000640 RSI: 0000000000000000 RDI: ffff88811a211d00
[  843.857974] RBP: ffff888127d39518 R08: 00000000bee97314 R09: 0000000000000000
[  843.862584] R10: 0000000000000000 R11: ffff8881109f0000 R12: 0000000000000880
[  843.867147] R13: ffff888127d39580 R14: 0000000000000640 R15: ffff888170f7b900
[  843.871680] FS:  0000000000000000(0000) GS:ffff889ffffc0000(0000) knlGS:0000000000000000
[  843.876242] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  843.880778] CR2: 00007fa42affcfb8 CR3: 000000011433a002 CR4: 0000000000770ef0
[  843.885336] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  843.889809] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  843.894229] PKRU: 55555554
[  843.898539] Call Trace:
[  843.902772]  <IRQ>
[  843.906922]  ? __die_body+0x1e/0x60
[  843.911032]  ? die+0x3c/0x60
[  843.915037]  ? do_trap+0xe2/0x110
[  843.918911]  ? pskb_expand_head+0x2ac/0x300
[  843.922687]  ? do_error_trap+0x65/0x80
[  843.926342]  ? pskb_expand_head+0x2ac/0x300
[  843.929905]  ? exc_invalid_op+0x50/0x60
[  843.933398]  ? pskb_expand_head+0x2ac/0x300
[  843.936835]  ? asm_exc_invalid_op+0x1a/0x20
[  843.940226]  ? pskb_expand_head+0x2ac/0x300
[  843.943580]  inet_frag_reasm_prepare+0xd1/0x240
[  843.946904]  ip_defrag+0x5d4/0x870
[  843.950132]  nf_ct_handle_fragments+0xec/0x130 [nf_conntrack]
[  843.953334]  tcf_ct_act+0x252/0xd90 [act_ct]
[  843.956473]  ? tcf_mirred_act+0x516/0x5a0 [act_mirred]
[  843.959657]  tcf_action_exec+0xa1/0x160
[  843.962823]  fl_classify+0x1db/0x1f0 [cls_flower]
[  843.966010]  ? skb_clone+0x53/0xc0
[  843.969173]  tcf_classify+0x24d/0x420
[  843.972333]  tc_run+0x8f/0xf0
[  843.975465]  __netif_receive_skb_core+0x67a/0x1080
[  843.978634]  ? dev_gro_receive+0x249/0x730
[  843.981759]  __netif_receive_skb_list_core+0x12d/0x260
[  843.984869]  netif_receive_skb_list_internal+0x1cb/0x2f0
[  843.987957]  ? mlx5e_handle_rx_cqe_mpwrq_rep+0xfa/0x1a0 [mlx5_core]
[  843.991170]  napi_complete_done+0x72/0x1a0
[  843.994305]  mlx5e_napi_poll+0x28c/0x6d0 [mlx5_core]
[  843.997501]  __napi_poll+0x25/0x1b0
[  844.000627]  net_rx_action+0x256/0x330
[  844.003705]  __do_softirq+0xb3/0x29b
[  844.006718]  irq_exit_rcu+0x9e/0xc0
[  844.009672]  common_interrupt+0x86/0xa0
[  844.012537]  </IRQ>
[  844.015285]  <TASK>
[  844.017937]  asm_common_interrupt+0x26/0x40
[  844.020591] RIP: 0010:acpi_safe_halt+0x1b/0x20
[  844.023247] Code: ff 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 65 48 8b 04 25 00 18 03 00 48 8b 00 a8 08 75 0c 66 90 0f 00 2d 81 d0 44 00 fb f4 <fa> c3 0f 1f 00 89 fa ec 48 8b 05 ee 88 ed 00 a9 00 00 00 80 75 11
[  844.028900] RSP: 0018:ffffc90000533e70 EFLAGS: 00000246
[  844.031725] RAX: 0000000000004000 RBX: 0000000000000001 RCX: 0000000000000000
[  844.034553] RDX: ffff889ffffc0000 RSI: ffffffff828b7f20 RDI: ffff88a090f45c64
[  844.037368] RBP: ffff88a0901a2800 R08: ffff88a090f45c00 R09: 00000000000317c0
[  844.040155] R10: 00ec812281150475 R11: ffff889fffff0e04 R12: ffffffff828b7fa0
[  844.042962] R13: ffffffff828b7f20 R14: 0000000000000001 R15: 0000000000000000
[  844.045819]  acpi_idle_enter+0x7b/0xc0
[  844.048621]  cpuidle_enter_state+0x7f/0x430
[  844.051451]  cpuidle_enter+0x2d/0x40
[  844.054279]  do_idle+0x1d4/0x240
[  844.057096]  cpu_startup_entry+0x2a/0x30
[  844.059934]  start_secondary+0x104/0x130
[  844.062787]  secondary_startup_64_no_verify+0x16b/0x16b
[  844.065674]  </TASK>

Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct")
Signed-off-by: Tao Liu <taoliu828@163.com>
Link: https://lore.kernel.org/r/20231228081457.936732-1-taoliu828@163.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_ct.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f69c47945175..3d50215985d5 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -850,7 +850,6 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 	if (err || !frag)
 		return err;
 
-	skb_get(skb);
 	err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
 	if (err)
 		return err;
@@ -999,12 +998,8 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 	nh_ofs = skb_network_offset(skb);
 	skb_pull_rcsum(skb, nh_ofs);
 	err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
-	if (err == -EINPROGRESS) {
-		retval = TC_ACT_STOLEN;
-		goto out_clear;
-	}
 	if (err)
-		goto drop;
+		goto out_frag;
 
 	err = nf_ct_skb_network_trim(skb, family);
 	if (err)
@@ -1091,6 +1086,11 @@ out_clear:
 		qdisc_skb_cb(skb)->pkt_len = skb->len;
 	return retval;
 
+out_frag:
+	if (err != -EINPROGRESS)
+		tcf_action_inc_drop_qstats(&c->common);
+	return TC_ACT_CONSUMED;
+
 drop:
 	tcf_action_inc_drop_qstats(&c->common);
 	return TC_ACT_SHOT;
-- 
cgit 


From ef210ef85d5cb543ce34a57803ed856d0c8c08c2 Mon Sep 17 00:00:00 2001
From: Asmaa Mnebhi <asmaa@nvidia.com>
Date: Fri, 5 Jan 2024 10:59:46 -0500
Subject: mlxbf_gige: Fix intermittent no ip issue

Although the link is up, there is no ip assigned on setups with high background
traffic. Nothing is transmitted nor received. The RX error count keeps on
increasing. After several minutes, the RX error count stagnates and the
GigE interface finally gets an ip.

The issue is that mlxbf_gige_rx_init() is called before phy_start().
As soon as the RX DMA is enabled in mlxbf_gige_rx_init(), the RX CI reaches the max
of 128, and becomes equal to RX PI. RX CI doesn't decrease since the code hasn't
ran phy_start yet.
Bring the PHY up before starting the RX.

Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
Reviewed-by: David Thompson <davthompson@nvidia.com>
Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 14 +++++++-------
 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c   |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 954ba0826c61..ac7f0128619c 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -147,14 +147,14 @@ static int mlxbf_gige_open(struct net_device *netdev)
 	 */
 	priv->valid_polarity = 0;
 
-	err = mlxbf_gige_rx_init(priv);
+	phy_start(phydev);
+
+	err = mlxbf_gige_tx_init(priv);
 	if (err)
 		goto free_irqs;
-	err = mlxbf_gige_tx_init(priv);
+	err = mlxbf_gige_rx_init(priv);
 	if (err)
-		goto rx_deinit;
-
-	phy_start(phydev);
+		goto tx_deinit;
 
 	netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll);
 	napi_enable(&priv->napi);
@@ -176,8 +176,8 @@ static int mlxbf_gige_open(struct net_device *netdev)
 
 	return 0;
 
-rx_deinit:
-	mlxbf_gige_rx_deinit(priv);
+tx_deinit:
+	mlxbf_gige_tx_deinit(priv);
 
 free_irqs:
 	mlxbf_gige_free_irqs(priv);
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
index 227d01cace3f..699984358493 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
@@ -142,6 +142,9 @@ int mlxbf_gige_rx_init(struct mlxbf_gige *priv)
 	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN,
 	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS);
 
+	writeq(ilog2(priv->rx_q_entries),
+	       priv->base + MLXBF_GIGE_RX_WQE_SIZE_LOG2);
+
 	/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
 	 * indicate readiness to receive interrupts
 	 */
@@ -154,9 +157,6 @@ int mlxbf_gige_rx_init(struct mlxbf_gige *priv)
 	data |= MLXBF_GIGE_RX_DMA_EN;
 	writeq(data, priv->base + MLXBF_GIGE_RX_DMA);
 
-	writeq(ilog2(priv->rx_q_entries),
-	       priv->base + MLXBF_GIGE_RX_WQE_SIZE_LOG2);
-
 	return 0;
 
 free_wqe_and_skb:
-- 
cgit 


From a460f4a684511e007bbf1700758a41f05d9981e6 Mon Sep 17 00:00:00 2001
From: Asmaa Mnebhi <asmaa@nvidia.com>
Date: Fri, 5 Jan 2024 11:00:14 -0500
Subject: mlxbf_gige: Enable the GigE port in mlxbf_gige_open

At the moment, the GigE port is enabled in the mlxbf_gige_probe
function. If the mlxbf_gige_open is not executed, this could cause
pause frames to increase in the case where there is high backgroud
traffic. This results in clogging the port.
So move enabling the OOB port to mlxbf_gige_open.

Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
Reviewed-by: David Thompson <davthompson@nvidia.com>
Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index ac7f0128619c..3d09fa54598f 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -130,9 +130,15 @@ static int mlxbf_gige_open(struct net_device *netdev)
 {
 	struct mlxbf_gige *priv = netdev_priv(netdev);
 	struct phy_device *phydev = netdev->phydev;
+	u64 control;
 	u64 int_en;
 	int err;
 
+	/* Perform general init of GigE block */
+	control = readq(priv->base + MLXBF_GIGE_CONTROL);
+	control |= MLXBF_GIGE_CONTROL_PORT_EN;
+	writeq(control, priv->base + MLXBF_GIGE_CONTROL);
+
 	err = mlxbf_gige_request_irqs(priv);
 	if (err)
 		return err;
@@ -365,7 +371,6 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
 	void __iomem *plu_base;
 	void __iomem *base;
 	int addr, phy_irq;
-	u64 control;
 	int err;
 
 	base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC);
@@ -380,11 +385,6 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
 	if (IS_ERR(plu_base))
 		return PTR_ERR(plu_base);
 
-	/* Perform general init of GigE block */
-	control = readq(base + MLXBF_GIGE_CONTROL);
-	control |= MLXBF_GIGE_CONTROL_PORT_EN;
-	writeq(control, base + MLXBF_GIGE_CONTROL);
-
 	netdev = devm_alloc_etherdev(&pdev->dev, sizeof(*priv));
 	if (!netdev)
 		return -ENOMEM;
-- 
cgit 


From 4fc68c4c1a114ba597b4f3b082f04622dfa0e0f6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 5 Jan 2024 17:05:41 +0000
Subject: rxrpc: Fix skbuff cleanup of call's recvmsg_queue and rx_oos_queue

Fix rxrpc_cleanup_ring() to use rxrpc_purge_queue() rather than
skb_queue_purge() so that the count of outstanding skbuffs is correctly
updated when a failed call is cleaned up.

Without this rmmod may hang waiting for rxrpc_n_rx_skbs to become zero.

Fixes: 5d7edbc9231e ("rxrpc: Get rid of the Rx ring")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: linux-afs@lists.infradead.org
cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/call_object.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 773eecd1e979..f10b37c14772 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -545,8 +545,8 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
  */
 static void rxrpc_cleanup_ring(struct rxrpc_call *call)
 {
-	skb_queue_purge(&call->recvmsg_queue);
-	skb_queue_purge(&call->rx_oos_queue);
+	rxrpc_purge_queue(&call->recvmsg_queue);
+	rxrpc_purge_queue(&call->rx_oos_queue);
 }
 
 /*
-- 
cgit 


From d375b98e0248980681e5e56b712026174d617198 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 5 Jan 2024 17:03:13 +0000
Subject: ip6_tunnel: fix NEXTHDR_FRAGMENT handling in
 ip6_tnl_parse_tlv_enc_lim()

syzbot pointed out [1] that NEXTHDR_FRAGMENT handling is broken.

Reading frag_off can only be done if we pulled enough bytes
to skb->head. Currently we might access garbage.

[1]
BUG: KMSAN: uninit-value in ip6_tnl_parse_tlv_enc_lim+0x94f/0xbb0
ip6_tnl_parse_tlv_enc_lim+0x94f/0xbb0
ipxip6_tnl_xmit net/ipv6/ip6_tunnel.c:1326 [inline]
ip6_tnl_start_xmit+0xab2/0x1a70 net/ipv6/ip6_tunnel.c:1432
__netdev_start_xmit include/linux/netdevice.h:4940 [inline]
netdev_start_xmit include/linux/netdevice.h:4954 [inline]
xmit_one net/core/dev.c:3548 [inline]
dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3564
__dev_queue_xmit+0x33b8/0x5130 net/core/dev.c:4349
dev_queue_xmit include/linux/netdevice.h:3134 [inline]
neigh_connected_output+0x569/0x660 net/core/neighbour.c:1592
neigh_output include/net/neighbour.h:542 [inline]
ip6_finish_output2+0x23a9/0x2b30 net/ipv6/ip6_output.c:137
ip6_finish_output+0x855/0x12b0 net/ipv6/ip6_output.c:222
NF_HOOK_COND include/linux/netfilter.h:303 [inline]
ip6_output+0x323/0x610 net/ipv6/ip6_output.c:243
dst_output include/net/dst.h:451 [inline]
ip6_local_out+0xe9/0x140 net/ipv6/output_core.c:155
ip6_send_skb net/ipv6/ip6_output.c:1952 [inline]
ip6_push_pending_frames+0x1f9/0x560 net/ipv6/ip6_output.c:1972
rawv6_push_pending_frames+0xbe8/0xdf0 net/ipv6/raw.c:582
rawv6_sendmsg+0x2b66/0x2e70 net/ipv6/raw.c:920
inet_sendmsg+0x105/0x190 net/ipv4/af_inet.c:847
sock_sendmsg_nosec net/socket.c:730 [inline]
__sock_sendmsg net/socket.c:745 [inline]
____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584
___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638
__sys_sendmsg net/socket.c:2667 [inline]
__do_sys_sendmsg net/socket.c:2676 [inline]
__se_sys_sendmsg net/socket.c:2674 [inline]
__x64_sys_sendmsg+0x307/0x490 net/socket.c:2674
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x63/0x6b

Uninit was created at:
slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768
slab_alloc_node mm/slub.c:3478 [inline]
__kmem_cache_alloc_node+0x5c9/0x970 mm/slub.c:3517
__do_kmalloc_node mm/slab_common.c:1006 [inline]
__kmalloc_node_track_caller+0x118/0x3c0 mm/slab_common.c:1027
kmalloc_reserve+0x249/0x4a0 net/core/skbuff.c:582
pskb_expand_head+0x226/0x1a00 net/core/skbuff.c:2098
__pskb_pull_tail+0x13b/0x2310 net/core/skbuff.c:2655
pskb_may_pull_reason include/linux/skbuff.h:2673 [inline]
pskb_may_pull include/linux/skbuff.h:2681 [inline]
ip6_tnl_parse_tlv_enc_lim+0x901/0xbb0 net/ipv6/ip6_tunnel.c:408
ipxip6_tnl_xmit net/ipv6/ip6_tunnel.c:1326 [inline]
ip6_tnl_start_xmit+0xab2/0x1a70 net/ipv6/ip6_tunnel.c:1432
__netdev_start_xmit include/linux/netdevice.h:4940 [inline]
netdev_start_xmit include/linux/netdevice.h:4954 [inline]
xmit_one net/core/dev.c:3548 [inline]
dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3564
__dev_queue_xmit+0x33b8/0x5130 net/core/dev.c:4349
dev_queue_xmit include/linux/netdevice.h:3134 [inline]
neigh_connected_output+0x569/0x660 net/core/neighbour.c:1592
neigh_output include/net/neighbour.h:542 [inline]
ip6_finish_output2+0x23a9/0x2b30 net/ipv6/ip6_output.c:137
ip6_finish_output+0x855/0x12b0 net/ipv6/ip6_output.c:222
NF_HOOK_COND include/linux/netfilter.h:303 [inline]
ip6_output+0x323/0x610 net/ipv6/ip6_output.c:243
dst_output include/net/dst.h:451 [inline]
ip6_local_out+0xe9/0x140 net/ipv6/output_core.c:155
ip6_send_skb net/ipv6/ip6_output.c:1952 [inline]
ip6_push_pending_frames+0x1f9/0x560 net/ipv6/ip6_output.c:1972
rawv6_push_pending_frames+0xbe8/0xdf0 net/ipv6/raw.c:582
rawv6_sendmsg+0x2b66/0x2e70 net/ipv6/raw.c:920
inet_sendmsg+0x105/0x190 net/ipv4/af_inet.c:847
sock_sendmsg_nosec net/socket.c:730 [inline]
__sock_sendmsg net/socket.c:745 [inline]
____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584
___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638
__sys_sendmsg net/socket.c:2667 [inline]
__do_sys_sendmsg net/socket.c:2676 [inline]
__se_sys_sendmsg net/socket.c:2674 [inline]
__x64_sys_sendmsg+0x307/0x490 net/socket.c:2674
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x63/0x6b

CPU: 0 PID: 7345 Comm: syz-executor.3 Not tainted 6.7.0-rc8-syzkaller-00024-gac865f00af29 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023

Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5e80e517f071..46c19bd48990 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -399,7 +399,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
 	unsigned int nhoff = raw - skb->data;
 	unsigned int off = nhoff + sizeof(*ipv6h);
-	u8 next, nexthdr = ipv6h->nexthdr;
+	u8 nexthdr = ipv6h->nexthdr;
 
 	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
 		struct ipv6_opt_hdr *hdr;
@@ -410,25 +410,25 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 
 		hdr = (struct ipv6_opt_hdr *)(skb->data + off);
 		if (nexthdr == NEXTHDR_FRAGMENT) {
-			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
-			if (frag_hdr->frag_off)
-				break;
 			optlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH) {
 			optlen = ipv6_authlen(hdr);
 		} else {
 			optlen = ipv6_optlen(hdr);
 		}
-		/* cache hdr->nexthdr, since pskb_may_pull() might
-		 * invalidate hdr
-		 */
-		next = hdr->nexthdr;
-		if (nexthdr == NEXTHDR_DEST) {
-			u16 i = 2;
 
-			/* Remember : hdr is no longer valid at this point. */
-			if (!pskb_may_pull(skb, off + optlen))
+		if (!pskb_may_pull(skb, off + optlen))
+			break;
+
+		hdr = (struct ipv6_opt_hdr *)(skb->data + off);
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr;
+
+			if (frag_hdr->frag_off)
 				break;
+		}
+		if (nexthdr == NEXTHDR_DEST) {
+			u16 i = 2;
 
 			while (1) {
 				struct ipv6_tlv_tnl_enc_lim *tel;
@@ -449,7 +449,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 					i++;
 			}
 		}
-		nexthdr = next;
+		nexthdr = hdr->nexthdr;
 		off += optlen;
 	}
 	return 0;
-- 
cgit 


From 61921bdaa132b580b6db6858e6d7dcdb870df5fe Mon Sep 17 00:00:00 2001
From: Petr Tesarik <petr@tesarici.cz>
Date: Fri, 5 Jan 2024 21:16:42 +0100
Subject: net: stmmac: fix ethtool per-queue statistics

Fix per-queue statistics for devices with more than one queue.

The output data pointer is currently reset in each loop iteration,
effectively summing all queue statistics in the first four u64 values.

The summary values are not even labeled correctly. For example, if eth0 has
2 queues, ethtool -S eth0 shows:

     q0_tx_pkt_n: 374 (actually tx_pkt_n over all queues)
     q0_tx_irq_n: 23  (actually tx_normal_irq_n over all queues)
     q1_tx_pkt_n: 462 (actually rx_pkt_n over all queues)
     q1_tx_irq_n: 446 (actually rx_normal_irq_n over all queues)
     q0_rx_pkt_n: 0
     q0_rx_irq_n: 0
     q1_rx_pkt_n: 0
     q1_rx_irq_n: 0

Fixes: 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary")
Cc: stable@vger.kernel.org
Signed-off-by: Petr Tesarik <petr@tesarici.cz>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index f628411ae4ae..112a36a698f1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -543,15 +543,12 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
 	u32 rx_cnt = priv->plat->rx_queues_to_use;
 	unsigned int start;
 	int q, stat;
-	u64 *pos;
 	char *p;
 
-	pos = data;
 	for (q = 0; q < tx_cnt; q++) {
 		struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
 		struct stmmac_txq_stats snapshot;
 
-		data = pos;
 		do {
 			start = u64_stats_fetch_begin(&txq_stats->syncp);
 			snapshot = *txq_stats;
@@ -559,17 +556,15 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
 
 		p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
 		for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
-			*data++ += (*(u64 *)p);
+			*data++ = (*(u64 *)p);
 			p += sizeof(u64);
 		}
 	}
 
-	pos = data;
 	for (q = 0; q < rx_cnt; q++) {
 		struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
 		struct stmmac_rxq_stats snapshot;
 
-		data = pos;
 		do {
 			start = u64_stats_fetch_begin(&rxq_stats->syncp);
 			snapshot = *rxq_stats;
@@ -577,7 +572,7 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
 
 		p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
 		for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
-			*data++ += (*(u64 *)p);
+			*data++ = (*(u64 *)p);
 			p += sizeof(u64);
 		}
 	}
-- 
cgit 


From ac631873c9e7a50d2a8de457cfc4b9f86666403e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 6 Jan 2024 01:12:22 +0100
Subject: net: ethernet: cortina: Drop TSO support

The recent change to allow large frames without hardware checksumming
slotted in software checksumming in the driver if hardware could not
do it.

This will however upset TSO (TCP Segment Offloading). Typical
error dumps includes this:

skb len=2961 headroom=222 headlen=66 tailroom=0
(...)
WARNING: CPU: 0 PID: 956 at net/core/dev.c:3259 skb_warn_bad_offload+0x7c/0x108
gemini-ethernet-port: caps=(0x0000010000154813, 0x00002007ffdd7889)

And the packets do not go through.

The TSO implementation is bogus: a TSO enabled driver must propagate
the skb_shinfo(skb)->gso_size value to the TSO engine on the NIC.

Drop the size check and TSO offloading features for now: this
needs to be fixed up properly.

After this ethernet works fine on Gemini devices with a direct connected
PHY such as D-Link DNS-313.

Also tested to still be working with a DSA switch using the Gemini
ethernet as conduit interface.

Link: https://lore.kernel.org/netdev/CANn89iJLfxng1sYL5Zk0mknXpyYQPCp83m3KgD2KJ2_hKCpEUg@mail.gmail.com/
Suggested-by: Eric Dumazet <edumazet@google.com>
Fixes: d4d0c5b4d279 ("net: ethernet: cortina: Handle large frames")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cortina/gemini.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 78287cfcbf63..705c3eb19cd3 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -79,8 +79,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 #define GMAC0_IRQ4_8 (GMAC0_MIB_INT_BIT | GMAC0_RX_OVERRUN_INT_BIT)
 
 #define GMAC_OFFLOAD_FEATURES (NETIF_F_SG | NETIF_F_IP_CSUM | \
-		NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | \
-		NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
+			       NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM)
 
 /**
  * struct gmac_queue_page - page buffer per-page info
@@ -1143,23 +1142,13 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
 	struct gmac_txdesc *txd;
 	skb_frag_t *skb_frag;
 	dma_addr_t mapping;
-	unsigned short mtu;
 	void *buffer;
 	int ret;
 
-	mtu  = ETH_HLEN;
-	mtu += netdev->mtu;
-	if (skb->protocol == htons(ETH_P_8021Q))
-		mtu += VLAN_HLEN;
-
+	/* TODO: implement proper TSO using MTU in word3 */
 	word1 = skb->len;
 	word3 = SOF_BIT;
 
-	if (word1 > mtu) {
-		word1 |= TSS_MTU_ENABLE_BIT;
-		word3 |= mtu;
-	}
-
 	if (skb->len >= ETH_FRAME_LEN) {
 		/* Hardware offloaded checksumming isn't working on frames
 		 * bigger than 1514 bytes. A hypothesis about this is that the
-- 
cgit