From 28be7ca4fcfd69a2d52aaa331adbf9dbe91f9e6e Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 10 Oct 2022 15:46:13 +1300 Subject: tipc: Fix recognition of trial period The trial period exists until jiffies is after addr_trial_end. But as jiffies will eventually overflow, just using time_after will eventually give incorrect results. As the node address is set once the trial period ends, this can be used to know that we are not in the trial period. Fixes: e415577f57f4 ("tipc: correct discovery message handling during address trial period") Signed-off-by: Mark Tomlinson Signed-off-by: David S. Miller --- net/tipc/discover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index da69e1abf68f..e8630707901e 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -148,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, { struct net *net = d->net; struct tipc_net *tn = tipc_net(net); - bool trial = time_before(jiffies, tn->addr_trial_end); u32 self = tipc_own_addr(net); + bool trial = time_before(jiffies, tn->addr_trial_end) && !self; if (mtyp == DSC_TRIAL_FAIL_MSG) { if (!trial) -- cgit From 777ecaabd614d47c482a5c9031579e66da13989a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 12 Oct 2022 17:25:14 +0200 Subject: tipc: fix an information leak in tipc_topsrv_kern_subscr Use a 8-byte write to initialize sub.usr_handle in tipc_topsrv_kern_subscr(), otherwise four bytes remain uninitialized when issuing setsockopt(..., SOL_TIPC, ...). This resulted in an infoleak reported by KMSAN when the packet was received: ===================================================== BUG: KMSAN: kernel-infoleak in copyout+0xbc/0x100 lib/iov_iter.c:169 instrument_copy_to_user ./include/linux/instrumented.h:121 copyout+0xbc/0x100 lib/iov_iter.c:169 _copy_to_iter+0x5c0/0x20a0 lib/iov_iter.c:527 copy_to_iter ./include/linux/uio.h:176 simple_copy_to_iter+0x64/0xa0 net/core/datagram.c:513 __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:419 skb_copy_datagram_iter+0x58/0x200 net/core/datagram.c:527 skb_copy_datagram_msg ./include/linux/skbuff.h:3903 packet_recvmsg+0x521/0x1e70 net/packet/af_packet.c:3469 ____sys_recvmsg+0x2c4/0x810 net/socket.c:? ___sys_recvmsg+0x217/0x840 net/socket.c:2743 __sys_recvmsg net/socket.c:2773 __do_sys_recvmsg net/socket.c:2783 __se_sys_recvmsg net/socket.c:2780 __x64_sys_recvmsg+0x364/0x540 net/socket.c:2780 do_syscall_x64 arch/x86/entry/common.c:50 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120 ... Uninit was stored to memory at: tipc_sub_subscribe+0x42d/0xb50 net/tipc/subscr.c:156 tipc_conn_rcv_sub+0x246/0x620 net/tipc/topsrv.c:375 tipc_topsrv_kern_subscr+0x2e8/0x400 net/tipc/topsrv.c:579 tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190 tipc_sk_join+0x2a8/0x770 net/tipc/socket.c:3084 tipc_setsockopt+0xae5/0xe40 net/tipc/socket.c:3201 __sys_setsockopt+0x87f/0xdc0 net/socket.c:2252 __do_sys_setsockopt net/socket.c:2263 __se_sys_setsockopt net/socket.c:2260 __x64_sys_setsockopt+0xe0/0x160 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:50 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd arch/x86/entry/entry_64.S:120 Local variable sub created at: tipc_topsrv_kern_subscr+0x57/0x400 net/tipc/topsrv.c:562 tipc_group_create+0x4e7/0x7d0 net/tipc/group.c:190 Bytes 84-87 of 88 are uninitialized Memory access of size 88 starts at ffff88801ed57cd0 Data copied to user address 0000000020000400 ... ===================================================== Signed-off-by: Alexander Potapenko Fixes: 026321c6d056a5 ("tipc: rename tipc_server to tipc_topsrv") Signed-off-by: David S. Miller --- net/tipc/topsrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 5522865deae9..14fd05fd6107 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -568,7 +568,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; sub.filter = filter; - *(u32 *)&sub.usr_handle = port; + *(u64 *)&sub.usr_handle = (u64)port; con = tipc_conn_alloc(tipc_topsrv(net)); if (IS_ERR(con)) -- cgit From a2550d3ce53c68f54042bc5e468c4d07491ffe0e Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 12 Oct 2022 19:18:36 +0200 Subject: net: dsa: qca8k: fix inband mgmt for big-endian systems The header and the data of the skb for the inband mgmt requires to be in little-endian. This is problematic for big-endian system as the mgmt header is written in the cpu byte order. Fix this by converting each value for the mgmt header and data to little-endian, and convert to cpu byte order the mgmt header and data sent by the switch. Fixes: 5950c7c0a68c ("net: dsa: qca8k: add support for mgmt read/write in Ethernet packet") Tested-by: Pawel Dembicki Tested-by: Lech Perczak Signed-off-by: Christian Marangi Reviewed-by: Lech Perczak Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 63 ++++++++++++++++++++++++++++++---------- include/linux/dsa/tag_qca.h | 6 ++-- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 5669c92c93f7..644338ca0510 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -137,27 +137,42 @@ static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb) struct qca8k_mgmt_eth_data *mgmt_eth_data; struct qca8k_priv *priv = ds->priv; struct qca_mgmt_ethhdr *mgmt_ethhdr; + u32 command; u8 len, cmd; + int i; mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb_mac_header(skb); mgmt_eth_data = &priv->mgmt_eth_data; - cmd = FIELD_GET(QCA_HDR_MGMT_CMD, mgmt_ethhdr->command); - len = FIELD_GET(QCA_HDR_MGMT_LENGTH, mgmt_ethhdr->command); + command = get_unaligned_le32(&mgmt_ethhdr->command); + cmd = FIELD_GET(QCA_HDR_MGMT_CMD, command); + len = FIELD_GET(QCA_HDR_MGMT_LENGTH, command); /* Make sure the seq match the requested packet */ - if (mgmt_ethhdr->seq == mgmt_eth_data->seq) + if (get_unaligned_le32(&mgmt_ethhdr->seq) == mgmt_eth_data->seq) mgmt_eth_data->ack = true; if (cmd == MDIO_READ) { - mgmt_eth_data->data[0] = mgmt_ethhdr->mdio_data; + u32 *val = mgmt_eth_data->data; + + *val = get_unaligned_le32(&mgmt_ethhdr->mdio_data); /* Get the rest of the 12 byte of data. * The read/write function will extract the requested data. */ - if (len > QCA_HDR_MGMT_DATA1_LEN) - memcpy(mgmt_eth_data->data + 1, skb->data, - QCA_HDR_MGMT_DATA2_LEN); + if (len > QCA_HDR_MGMT_DATA1_LEN) { + __le32 *data2 = (__le32 *)skb->data; + int data_len = min_t(int, QCA_HDR_MGMT_DATA2_LEN, + len - QCA_HDR_MGMT_DATA1_LEN); + + val++; + + for (i = sizeof(u32); i <= data_len; i += sizeof(u32)) { + *val = get_unaligned_le32(data2); + val++; + data2++; + } + } } complete(&mgmt_eth_data->rw_done); @@ -169,8 +184,10 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 * struct qca_mgmt_ethhdr *mgmt_ethhdr; unsigned int real_len; struct sk_buff *skb; - u32 *data2; + __le32 *data2; + u32 command; u16 hdr; + int i; skb = dev_alloc_skb(QCA_HDR_MGMT_PKT_LEN); if (!skb) @@ -199,20 +216,32 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 * hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(0)); hdr |= FIELD_PREP(QCA_HDR_XMIT_CONTROL, QCA_HDR_XMIT_TYPE_RW_REG); - mgmt_ethhdr->command = FIELD_PREP(QCA_HDR_MGMT_ADDR, reg); - mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_LENGTH, real_len); - mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CMD, cmd); - mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CHECK_CODE, + command = FIELD_PREP(QCA_HDR_MGMT_ADDR, reg); + command |= FIELD_PREP(QCA_HDR_MGMT_LENGTH, real_len); + command |= FIELD_PREP(QCA_HDR_MGMT_CMD, cmd); + command |= FIELD_PREP(QCA_HDR_MGMT_CHECK_CODE, QCA_HDR_MGMT_CHECK_CODE_VAL); + put_unaligned_le32(command, &mgmt_ethhdr->command); + if (cmd == MDIO_WRITE) - mgmt_ethhdr->mdio_data = *val; + put_unaligned_le32(*val, &mgmt_ethhdr->mdio_data); mgmt_ethhdr->hdr = htons(hdr); data2 = skb_put_zero(skb, QCA_HDR_MGMT_DATA2_LEN + QCA_HDR_MGMT_PADDING_LEN); - if (cmd == MDIO_WRITE && len > QCA_HDR_MGMT_DATA1_LEN) - memcpy(data2, val + 1, len - QCA_HDR_MGMT_DATA1_LEN); + if (cmd == MDIO_WRITE && len > QCA_HDR_MGMT_DATA1_LEN) { + int data_len = min_t(int, QCA_HDR_MGMT_DATA2_LEN, + len - QCA_HDR_MGMT_DATA1_LEN); + + val++; + + for (i = sizeof(u32); i <= data_len; i += sizeof(u32)) { + put_unaligned_le32(*val, data2); + data2++; + val++; + } + } return skb; } @@ -220,9 +249,11 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 * static void qca8k_mdio_header_fill_seq_num(struct sk_buff *skb, u32 seq_num) { struct qca_mgmt_ethhdr *mgmt_ethhdr; + u32 seq; + seq = FIELD_PREP(QCA_HDR_MGMT_SEQ_NUM, seq_num); mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb->data; - mgmt_ethhdr->seq = FIELD_PREP(QCA_HDR_MGMT_SEQ_NUM, seq_num); + put_unaligned_le32(seq, &mgmt_ethhdr->seq); } static int qca8k_read_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 50be7cbd93a5..0e176da1e43f 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -61,9 +61,9 @@ struct sk_buff; /* Special struct emulating a Ethernet header */ struct qca_mgmt_ethhdr { - u32 command; /* command bit 31:0 */ - u32 seq; /* seq 63:32 */ - u32 mdio_data; /* first 4byte mdio */ + __le32 command; /* command bit 31:0 */ + __le32 seq; /* seq 63:32 */ + __le32 mdio_data; /* first 4byte mdio */ __be16 hdr; /* qca hdr */ } __packed; -- cgit From 0d4636f7d72df3179b20a2d32b647881917a5e2a Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 12 Oct 2022 19:18:37 +0200 Subject: net: dsa: qca8k: fix ethtool autocast mib for big-endian systems The switch sends autocast mib in little-endian. This is problematic for big-endian system as the values needs to be converted. Fix this by converting each mib value to cpu byte order. Fixes: 5c957c7ca78c ("net: dsa: qca8k: add support for mib autocast in Ethernet packet") Tested-by: Pawel Dembicki Tested-by: Lech Perczak Signed-off-by: Christian Marangi Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 20 ++++++++------------ include/linux/dsa/tag_qca.h | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 644338ca0510..c5c3b4e92f28 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -1518,9 +1518,9 @@ static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *sk struct qca8k_priv *priv = ds->priv; const struct qca8k_mib_desc *mib; struct mib_ethhdr *mib_ethhdr; - int i, mib_len, offset = 0; - u64 *data; + __le32 *data2; u8 port; + int i; mib_ethhdr = (struct mib_ethhdr *)skb_mac_header(skb); mib_eth_data = &priv->mib_eth_data; @@ -1532,28 +1532,24 @@ static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *sk if (port != mib_eth_data->req_port) goto exit; - data = mib_eth_data->data; + data2 = (__le32 *)skb->data; for (i = 0; i < priv->info->mib_count; i++) { mib = &ar8327_mib[i]; /* First 3 mib are present in the skb head */ if (i < 3) { - data[i] = mib_ethhdr->data[i]; + mib_eth_data->data[i] = get_unaligned_le32(mib_ethhdr->data + i); continue; } - mib_len = sizeof(uint32_t); - /* Some mib are 64 bit wide */ if (mib->size == 2) - mib_len = sizeof(uint64_t); - - /* Copy the mib value from packet to the */ - memcpy(data + i, skb->data + offset, mib_len); + mib_eth_data->data[i] = get_unaligned_le64((__le64 *)data2); + else + mib_eth_data->data[i] = get_unaligned_le32(data2); - /* Set the offset for the next mib */ - offset += mib_len; + data2 += mib->size; } exit: diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 0e176da1e43f..b1b5720d89a5 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -73,7 +73,7 @@ enum mdio_cmd { }; struct mib_ethhdr { - u32 data[3]; /* first 3 mib counter */ + __le32 data[3]; /* first 3 mib counter */ __be16 hdr; /* qca hdr */ } __packed; -- cgit From aae425efdfd1b1d8452260a3cb49344ebf20b1f5 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Wed, 12 Oct 2022 13:54:40 -0700 Subject: i40e: Fix DMA mappings leak During reallocation of RX buffers, new DMA mappings are created for those buffers. steps for reproduction: while : do for ((i=0; i<=8160; i=i+32)) do ethtool -G enp130s0f0 rx $i tx $i sleep 0.5 ethtool -g enp130s0f0 done done This resulted in crash: i40e 0000:01:00.1: Unable to allocate memory for the Rx descriptor ring, size=65536 Driver BUG WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:141 xdp_rxq_info_unreg+0x43/0x50 Call Trace: i40e_free_rx_resources+0x70/0x80 [i40e] i40e_set_ringparam+0x27c/0x800 [i40e] ethnl_set_rings+0x1b2/0x290 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? rings_fill_reply+0x1a0/0x1a0 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? handle_mm_fault+0xbe/0x1e0 ? syscall_trace_enter+0x1d3/0x2c0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f5eac8b035b Missing register, driver bug WARNING: CPU: 0 PID: 4300 at net/core/xdp.c:119 xdp_rxq_info_unreg_mem_model+0x69/0x140 Call Trace: xdp_rxq_info_unreg+0x1e/0x50 i40e_free_rx_resources+0x70/0x80 [i40e] i40e_set_ringparam+0x27c/0x800 [i40e] ethnl_set_rings+0x1b2/0x290 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? rings_fill_reply+0x1a0/0x1a0 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? handle_mm_fault+0xbe/0x1e0 ? syscall_trace_enter+0x1d3/0x2c0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f5eac8b035b This was caused because of new buffers with different RX ring count should substitute older ones, but those buffers were freed in i40e_configure_rx_ring and reallocated again with i40e_alloc_rx_bi, thus kfree on rx_bi caused leak of already mapped DMA. Fix this by reallocating ZC with rx_bi_zc struct when BPF program loads. Additionally reallocate back to rx_bi when BPF program unloads. If BPF program is loaded/unloaded and XSK pools are created, reallocate RX queues accordingly in XSP_SETUP_XSK_POOL handler. Fixes: be1222b585fd ("i40e: Separate kernel allocated rx_bi rings from AF_XDP rings") Signed-off-by: Jan Sokolowski Signed-off-by: Mateusz Palczewski Signed-off-by: Jacob Keller Tested-by: Chandan (A Contingent Worker at Intel) Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 -- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++--- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 13 ++--- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 - drivers/net/ethernet/intel/i40e/i40e_xsk.c | 67 +++++++++++++++++++++++--- drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 +- 6 files changed, 74 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 7e75706f76db..87f36d1ce800 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2181,9 +2181,6 @@ static int i40e_set_ringparam(struct net_device *netdev, */ rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS; err = i40e_setup_rx_descriptors(&rx_rings[i]); - if (err) - goto rx_unwind; - err = i40e_alloc_rx_bi(&rx_rings[i]); if (err) goto rx_unwind; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2c07fa8ecfc8..b5dcd15ced36 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3566,12 +3566,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (ring->vsi->type == I40E_VSI_MAIN) xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); - kfree(ring->rx_bi); ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { - ret = i40e_alloc_rx_bi_zc(ring); - if (ret) - return ret; ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); /* For AF_XDP ZC, we disallow packets to span on @@ -3589,9 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ring->queue_index); } else { - ret = i40e_alloc_rx_bi(ring); - if (ret) - return ret; ring->rx_buf_len = vsi->rx_buf_len; if (ring->vsi->type == I40E_VSI_MAIN) { ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -13296,6 +13289,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, i40e_reset_and_rebuild(pf, true, true); } + if (!i40e_enabled_xdp_vsi(vsi) && prog) { + if (i40e_realloc_rx_bi_zc(vsi, true)) + return -ENOMEM; + } else if (i40e_enabled_xdp_vsi(vsi) && !prog) { + if (i40e_realloc_rx_bi_zc(vsi, false)) + return -ENOMEM; + } + for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); @@ -13528,6 +13529,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) i40e_queue_pair_disable_irq(vsi, queue_pair); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 69e67eb6aea7..b97c95f89fa0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1457,14 +1457,6 @@ err: return -ENOMEM; } -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring) -{ - unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count; - - rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL); - return rx_ring->rx_bi ? 0 : -ENOMEM; -} - static void i40e_clear_rx_bi(struct i40e_ring *rx_ring) { memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count); @@ -1593,6 +1585,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; + rx_ring->rx_bi = + kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL); + if (!rx_ring->rx_bi) + return -ENOMEM; + return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 41f86e9535a0..768290dc6f48 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -469,7 +469,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); -int i40e_alloc_rx_bi(struct i40e_ring *rx_ring); /** * i40e_get_head - Retrieve head from head writeback diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 6d4009e0cbd6..cd7b52fb6b46 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -10,14 +10,6 @@ #include "i40e_txrx_common.h" #include "i40e_xsk.h" -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring) -{ - unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count; - - rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL); - return rx_ring->rx_bi_zc ? 0 : -ENOMEM; -} - void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring) { memset(rx_ring->rx_bi_zc, 0, @@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx) return &rx_ring->rx_bi_zc[idx]; } +/** + * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer + * @rx_ring: Current rx ring + * @pool_present: is pool for XSK present + * + * Try allocating memory and return ENOMEM, if failed to allocate. + * If allocation was successful, substitute buffer with allocated one. + * Returns 0 on success, negative on failure + */ +static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present) +{ + size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) : + sizeof(*rx_ring->rx_bi); + void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL); + + if (!sw_ring) + return -ENOMEM; + + if (pool_present) { + kfree(rx_ring->rx_bi); + rx_ring->rx_bi = NULL; + rx_ring->rx_bi_zc = sw_ring; + } else { + kfree(rx_ring->rx_bi_zc); + rx_ring->rx_bi_zc = NULL; + rx_ring->rx_bi = sw_ring; + } + return 0; +} + +/** + * i40e_realloc_rx_bi_zc - reallocate rx SW rings + * @vsi: Current VSI + * @zc: is zero copy set + * + * Reallocate buffer for rx_rings that might be used by XSK. + * XDP requires more memory, than rx_buf provides. + * Returns 0 on success, negative on failure + */ +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc) +{ + struct i40e_ring *rx_ring; + unsigned long q; + + for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) { + rx_ring = vsi->rx_rings[q]; + if (i40e_realloc_rx_xdp_bi(rx_ring, zc)) + return -ENOMEM; + } + return 0; +} + /** * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a * certain ring/qid @@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi, if (err) return err; + err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true); + if (err) + return err; + err = i40e_queue_pair_enable(vsi, qid); if (err) return err; @@ -113,6 +161,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid) xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR); if (if_running) { + err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false); + if (err) + return err; err = i40e_queue_pair_enable(vsi, qid); if (err) return err; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index bb962987f300..821df248f8be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -32,7 +32,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); -int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring); +int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc); void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring); #endif /* _I40E_XSK_H_ */ -- cgit From 0d87bbd39d7fd1135ab9eca672d760470f6508e8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Oct 2022 15:55:20 -0700 Subject: tls: strp: make sure the TCP skbs do not have overlapping data TLS tries to get away with using the TCP input queue directly. This does not work if there is duplicated data (multiple skbs holding bytes for the same seq number range due to retransmits). Check for this condition and fall back to copy mode, it should be rare. Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_strp.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 9b79e334dbd9..955ac3e0bf4d 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -273,7 +273,7 @@ static int tls_strp_read_copyin(struct tls_strparser *strp) return desc.error; } -static int tls_strp_read_short(struct tls_strparser *strp) +static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) { struct skb_shared_info *shinfo; struct page *page; @@ -283,7 +283,7 @@ static int tls_strp_read_short(struct tls_strparser *strp) * to read the data out. Otherwise the connection will stall. * Without pressure threshold of INT_MAX will never be ready. */ - if (likely(!tcp_epollin_ready(strp->sk, INT_MAX))) + if (likely(qshort && !tcp_epollin_ready(strp->sk, INT_MAX))) return 0; shinfo = skb_shinfo(strp->anchor); @@ -315,6 +315,27 @@ static int tls_strp_read_short(struct tls_strparser *strp) return 0; } +static bool tls_strp_check_no_dup(struct tls_strparser *strp) +{ + unsigned int len = strp->stm.offset + strp->stm.full_len; + struct sk_buff *skb; + u32 seq; + + skb = skb_shinfo(strp->anchor)->frag_list; + seq = TCP_SKB_CB(skb)->seq; + + while (skb->len < len) { + seq += skb->len; + len -= skb->len; + skb = skb->next; + + if (TCP_SKB_CB(skb)->seq != seq) + return false; + } + + return true; +} + static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) { struct tcp_sock *tp = tcp_sk(strp->sk); @@ -373,7 +394,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp) return tls_strp_read_copyin(strp); if (inq < strp->stm.full_len) - return tls_strp_read_short(strp); + return tls_strp_read_copy(strp, true); if (!strp->stm.full_len) { tls_strp_load_anchor_with_queue(strp, inq); @@ -387,9 +408,12 @@ static int tls_strp_read_sock(struct tls_strparser *strp) strp->stm.full_len = sz; if (!strp->stm.full_len || inq < strp->stm.full_len) - return tls_strp_read_short(strp); + return tls_strp_read_copy(strp, true); } + if (!tls_strp_check_no_dup(strp)) + return tls_strp_read_copy(strp, false); + strp->msg_ready = 1; tls_rx_msg_ready(strp); -- cgit From 3d6642eac74d9442fde232181aa52d26d47991df Mon Sep 17 00:00:00 2001 From: zhangxiangqian Date: Thu, 13 Oct 2022 15:41:12 +0800 Subject: net: macvlan: change schedule system_wq to system_unbound_wq For FT2000+/64 devices, when four virtual machines share the same physical network interface, DROP will occur due to the single core CPU performance problem. ip_check_defrag and macvlan_process_broadcast is on the same CPU. When the MACVLAN PORT increases, the CPU usage reaches more than 90%. bc_queue > bc_queue_len_used (default 1000), causing DROP. Signed-off-by: zhangxiangqian Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 8f8f73099de8..c5cfe8555199 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -361,7 +361,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, } spin_unlock(&port->bc_queue.lock); - schedule_work(&port->bc_work); + queue_work(system_unbound_wq, &port->bc_work); if (err) goto free_nskb; -- cgit From 9a9a5d80ec9887814042c69768c2fee7961db7f4 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:36 -0700 Subject: MAINTAINERS: git://github -> https://github.com for petkan Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a96c60c787af..fb514facd169 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21300,7 +21300,7 @@ L: linux-usb@vger.kernel.org L: netdev@vger.kernel.org S: Maintained W: https://github.com/petkan/pegasus -T: git git://github.com/petkan/pegasus.git +T: git https://github.com/petkan/pegasus.git F: drivers/net/usb/pegasus.* USB PHY LAYER @@ -21337,7 +21337,7 @@ L: linux-usb@vger.kernel.org L: netdev@vger.kernel.org S: Maintained W: https://github.com/petkan/rtl8150 -T: git git://github.com/petkan/rtl8150.git +T: git https://github.com/petkan/rtl8150.git F: drivers/net/usb/rtl8150.c USB SERIAL SUBSYSTEM -- cgit From 0c93411795513a0e8dfcb5bcc7bab756b98bfc73 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 13 Oct 2022 19:42:05 -0400 Subject: MAINTAINERS: nfc: s3fwrn5: Drop Krzysztof Opasiak Emails to Krzysztof Opasiak bounce ("Recipient address rejected: User unknown") so drop his email from maintainers of s3fwrn5 NFC bindings and driver. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml | 1 - MAINTAINERS | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml index 64995cbb0f97..41c9760227cd 100644 --- a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml +++ b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml @@ -8,7 +8,6 @@ title: Samsung S3FWRN5 NCI NFC Controller maintainers: - Krzysztof Kozlowski - - Krzysztof Opasiak properties: compatible: diff --git a/MAINTAINERS b/MAINTAINERS index fb514facd169..abbe88e1c50b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18217,7 +18217,6 @@ F: include/media/drv-intf/s3c_camif.h SAMSUNG S3FWRN5 NFC DRIVER M: Krzysztof Kozlowski -M: Krzysztof Opasiak L: linux-nfc@lists.01.org (subscribers-only) S: Maintained F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml -- cgit From a8aed7b35becfd21f22a77c7014029ea837b018f Mon Sep 17 00:00:00 2001 From: Jonathan Cooper Date: Thu, 13 Oct 2022 10:55:53 +0100 Subject: sfc: Change VF mac via PF as first preference if available. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changing a VF's mac address through the VF (rather than via the PF) fails with EPERM because the latter part of efx_ef10_set_mac_address attempts to change the vport mac address list as the VF. Even with this fixed it still fails with EBUSY because the vadaptor is still assigned on the VF - the vadaptor reassignment must be within a section where the VF has torn down its state. A major reason this has broken is because we have two functions that ostensibly do the same thing - have a PF and VF cooperate to change a VF mac address. Rather than do this, if we are changing the mac of a VF that has a link to the PF in the same VM then simply call sriov_set_vf_mac instead, which is a proven working function that does that. If there is no PF available, or that fails non-fatally, then attempt to change the VF's mac address as we would a PF, without updating the PF's data. Test case: Create a VF: echo 1 > /sys/class/net//device/sriov_numvfs Set the mac address of the VF directly: ip link set addr 00:11:22:33:44:55 Set the MAC address of the VF via the PF: ip link set vf 0 mac 00:11:22:33:44:66 Without this patch the last command will fail with ENOENT. Signed-off-by: Jonathan Cooper Reported-by: Íñigo Huguet Fixes: 910c8789a777 ("set the MAC address using MC_CMD_VADAPTOR_SET_MAC") Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 58 +++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index d1e1aa19a68e..7022fb2005a2 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3277,6 +3277,30 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) bool was_enabled = efx->port_enabled; int rc; +#ifdef CONFIG_SFC_SRIOV + /* If this function is a VF and we have access to the parent PF, + * then use the PF control path to attempt to change the VF MAC address. + */ + if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { + struct efx_nic *efx_pf = pci_get_drvdata(efx->pci_dev->physfn); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 mac[ETH_ALEN]; + + /* net_dev->dev_addr can be zeroed by efx_net_stop in + * efx_ef10_sriov_set_vf_mac, so pass in a copy. + */ + ether_addr_copy(mac, efx->net_dev->dev_addr); + + rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, mac); + if (!rc) + return 0; + + netif_dbg(efx, drv, efx->net_dev, + "Updating VF mac via PF failed (%d), setting directly\n", + rc); + } +#endif + efx_device_detach_sync(efx); efx_net_stop(efx->net_dev); @@ -3297,40 +3321,6 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx_net_open(efx->net_dev); efx_device_attach_if_not_resetting(efx); -#ifdef CONFIG_SFC_SRIOV - if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { - struct efx_ef10_nic_data *nic_data = efx->nic_data; - struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - - if (rc == -EPERM) { - struct efx_nic *efx_pf; - - /* Switch to PF and change MAC address on vport */ - efx_pf = pci_get_drvdata(pci_dev_pf); - - rc = efx_ef10_sriov_set_vf_mac(efx_pf, - nic_data->vf_index, - efx->net_dev->dev_addr); - } else if (!rc) { - struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); - struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; - unsigned int i; - - /* MAC address successfully changed by VF (with MAC - * spoofing) so update the parent PF if possible. - */ - for (i = 0; i < efx_pf->vf_count; ++i) { - struct ef10_vf *vf = nic_data->vf + i; - - if (vf->efx == efx) { - ether_addr_copy(vf->mac, - efx->net_dev->dev_addr); - return 0; - } - } - } - } else -#endif if (rc == -EPERM) { netif_err(efx, drv, efx->net_dev, "Cannot change MAC address; use sfboot to enable" -- cgit From d8bde3bf7f82dac5fc68a62c2816793a12cafa2a Mon Sep 17 00:00:00 2001 From: Xiaobo Liu Date: Fri, 14 Oct 2022 10:05:40 +0800 Subject: net/atm: fix proc_mpc_write incorrect return value Then the input contains '\0' or '\n', proc_mpc_write has read them, so the return value needs +1. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xiaobo Liu Signed-off-by: David S. Miller --- net/atm/mpoa_proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 829db9eba0cb..aaf64b953915 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff, if (!page) return -ENOMEM; - for (p = page, len = 0; len < nbytes; p++, len++) { + for (p = page, len = 0; len < nbytes; p++) { if (get_user(*p, buff++)) { free_page((unsigned long)page); return -EFAULT; } + len += 1; if (*p == '\0' || *p == '\n') break; } -- cgit From 017e42540639a46fdf7c7f5ee647e0b7806c9013 Mon Sep 17 00:00:00 2001 From: Cezar Bulinaru Date: Thu, 13 Oct 2022 22:45:03 -0400 Subject: net: hv_netvsc: Fix a warning triggered by memcpy in rndis_filter memcpy: detected field-spanning write (size 168) of single field "(void *)&request->response_msg + (sizeof(struct rndis_message) - sizeof(union rndis_message_container)) + sizeof(*req_id)" at drivers/net/hyperv/rndis_filter.c:338 (size 40) RSP: 0018:ffffc90000144de0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8881766b4000 RCX: 0000000000000000 RDX: 0000000000000102 RSI: 0000000000009ffb RDI: 00000000ffffffff RBP: ffffc90000144e38 R08: 0000000000000000 R09: 00000000ffffdfff R10: ffffc90000144c48 R11: ffffffff82f56ac8 R12: ffff8881766b403c R13: 00000000000000a8 R14: ffff888100b75000 R15: ffff888179301d00 FS: 0000000000000000(0000) GS:ffff8884d6280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f8b024c418 CR3: 0000000176548001 CR4: 00000000003706e0 Call Trace: ? _raw_spin_unlock_irqrestore+0x27/0x50 netvsc_poll+0x556/0x940 [hv_netvsc] __napi_poll+0x2e/0x170 net_rx_action+0x299/0x2f0 __do_softirq+0xed/0x2ef __irq_exit_rcu+0x9f/0x110 irq_exit_rcu+0xe/0x20 sysvec_hyperv_callback+0xb0/0xd0 asm_sysvec_hyperv_callback+0x1b/0x20 RIP: 0010:native_safe_halt+0xb/0x10 Fixes: A warning triggered when the response message len exceeds the size of rndis_message. Inside the rndis_request structure these fields are however followed by a RNDIS_EXT_LEN padding so it is safe to use unsafe_memcpy. Reviewed-by: Michael Kelley Signed-off-by: Cezar Bulinaru Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 11f767a20444..eea777ec2541 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "hyperv_net.h" #include "netvsc_trace.h" @@ -335,9 +336,10 @@ static void rndis_filter_receive_response(struct net_device *ndev, if (resp->msg_len <= sizeof(struct rndis_message) + RNDIS_EXT_LEN) { memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id)); - memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id), + unsafe_memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id), data + RNDIS_HEADER_SIZE + sizeof(*req_id), - resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id)); + resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id), + "request->response_msg is followed by a padding of RNDIS_EXT_LEN inside rndis_request"); if (request->request_msg.ndis_msg_type == RNDIS_MSG_QUERY && request->request_msg.msg. query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) -- cgit From 0c9efbd5c50c64ead434960a404c9c9a097b0403 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Fri, 14 Oct 2022 12:17:35 +0530 Subject: net: phy: dp83867: Extend RX strap quirk for SGMII mode When RX strap in HW is not set to MODE 3 or 4, bit 7 and 8 in CF4 register should be set. The former is already handled in dp83867_config_init; add the latter in SGMII specific initialization. Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") Signed-off-by: Harini Katakam Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/dp83867.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 6939563d3b7c..417527f8bbf5 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -853,6 +853,14 @@ static int dp83867_config_init(struct phy_device *phydev) else val &= ~DP83867_SGMII_TYPE; phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_SGMIICTL, val); + + /* This is a SW workaround for link instability if RX_CTRL is + * not strapped to mode 3 or 4 in HW. This is required for SGMII + * in addition to clearing bit 7, handled above. + */ + if (dp83867->rxctrl_strap_quirk) + phy_set_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, + BIT(8)); } val = phy_read(phydev, DP83867_CFG3); -- cgit From bdee15e8c58b450ad736a2b62ef8c7a12548b704 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:34:36 +0300 Subject: net/smc: Fix an error code in smc_lgr_create() If smc_wr_alloc_lgr_mem() fails then return an error code. Don't return success. Fixes: 8799e310fb3f ("net/smc: add v2 support to the work request layer") Signed-off-by: Dan Carpenter Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/smc_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e6ee797640b4..c305d8dd23f8 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -896,7 +896,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); - if (smc_wr_alloc_lgr_mem(lgr)) + rc = smc_wr_alloc_lgr_mem(lgr); + if (rc) goto free_wq; smc_llc_lgr_init(lgr, smc); -- cgit From 9408f3d321ed2286b9722bceff08ca28b741c026 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 17:33:02 +0300 Subject: sunhme: Uninitialized variable in happy_meal_init() The "burst" string is only initialized for CONFIG_SPARC. It should be set to "64" because that's what is used by PCI. Fixes: 24cddbc3ef11 ("sunhme: Combine continued messages") Signed-off-by: Dan Carpenter Reviewed-by: Sean Anderson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunhme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 91f10f746dff..1c16548415cd 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1328,7 +1328,7 @@ static int happy_meal_init(struct happy_meal *hp) void __iomem *erxregs = hp->erxregs; void __iomem *bregs = hp->bigmacregs; void __iomem *tregs = hp->tcvregs; - const char *bursts; + const char *bursts = "64"; u32 regtmp, rxcfg; /* If auto-negotiation timer is running, kill it. */ -- cgit From 0a6d58a70a39d9a74882af6d00ec6df0737503ff Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 18:08:39 +0300 Subject: net: dsa: uninitialized variable in dsa_slave_netdevice_event() Return zero if both dsa_slave_dev_check() and netdev_uses_dsa() are false. Fixes: acc43b7bf52a ("net: dsa: allow masters to join a LAG") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/dsa/slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1a59918d3b30..a9fde48cffd4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -3145,7 +3145,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, case NETDEV_CHANGELOWERSTATE: { struct netdev_notifier_changelowerstate_info *info = ptr; struct dsa_port *dp; - int err; + int err = 0; if (dsa_slave_dev_check(dev)) { dp = dsa_slave_to_port(dev); -- cgit From fc8695eb11f07d936a4a9dbd15d7797986bc8b89 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 14 Oct 2022 09:07:46 -0700 Subject: Revert "net: fix cpu_max_bits_warn() usage in netif_attrmask_next{,_and}" This reverts commit 854701ba4c39afae2362ba19a580c461cb183e4f. We have more violations around, which leads to: WARNING: CPU: 2 PID: 1 at include/linux/cpumask.h:110 __netif_set_xps_queue+0x14e/0x770 Let's back this out and retry with a larger clean up in -next. Fixes: 854701ba4c39 ("net: fix cpu_max_bits_warn() usage in netif_attrmask_next{,_and}") Link: https://lore.kernel.org/all/20221014030459.3272206-2-guoren@kernel.org/ Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a36edb0ec199..eddf8ee270e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3663,8 +3663,9 @@ static inline bool netif_attr_test_online(unsigned long j, static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) { - /* n is a prior cpu */ - cpu_max_bits_warn(n + 1, nr_bits); + /* -1 is a legal arg here. */ + if (n != -1) + cpu_max_bits_warn(n, nr_bits); if (srcp) return find_next_bit(srcp, nr_bits, n + 1); @@ -3685,8 +3686,9 @@ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, unsigned int nr_bits) { - /* n is a prior cpu */ - cpu_max_bits_warn(n + 1, nr_bits); + /* -1 is a legal arg here. */ + if (n != -1) + cpu_max_bits_warn(n, nr_bits); if (src1p && src2p) return find_next_and_bit(src1p, src2p, nr_bits, n + 1); -- cgit From 96de900ae78e7dbedc937fd91bafe2934579c65a Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Fri, 14 Oct 2022 09:47:28 -0500 Subject: net: phylink: add mac_managed_pm in phylink_config structure The recent commit 'commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")' requires the MAC driver explicitly tell the phy driver who is managing the PM, otherwise you will see warning during resume stage. Add a boolean property in the phylink_config structure so that the MAC driver can use it to tell the PHY driver if it wants to manage the PM. Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Signed-off-by: Shenwei Wang Acked-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 3 +++ include/linux/phylink.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 75464df191ef..6547b6cc6cbe 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1661,6 +1661,9 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, if (phy_interrupt_is_valid(phy)) phy_request_interrupt(phy); + if (pl->config->mac_managed_pm) + phy->mac_managed_pm = true; + return 0; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 664dd409feb9..3f01ac8017e0 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -122,6 +122,7 @@ enum phylink_op_type { * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. + * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. @@ -134,6 +135,7 @@ struct phylink_config { enum phylink_op_type type; bool legacy_pre_march2020; bool poll_fixed_state; + bool mac_managed_pm; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); -- cgit From f151c147b3afcf92dedff53f5f0e965414e4fd2c Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Fri, 14 Oct 2022 09:47:29 -0500 Subject: net: stmmac: Enable mac_managed_pm phylink config Enable the mac_managed_pm configuration in the phylink_config structure to avoid the kernel warning during system resume. Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Signed-off-by: Shenwei Wang Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 65c96773c6d2..8273e6a175c8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1214,6 +1214,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) if (priv->plat->tx_queues_to_use > 1) priv->phylink_config.mac_capabilities &= ~(MAC_10HD | MAC_100HD | MAC_1000HD); + priv->phylink_config.mac_managed_pm = true; phylink = phylink_create(&priv->phylink_config, fwnode, mode, &stmmac_phylink_mac_ops); -- cgit From 2d1f274b95c6e4ba6a813b3b8e7a1a38d54a0a08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Oct 2022 21:24:41 +0000 Subject: skmsg: pass gfp argument to alloc_sk_msg() syzbot found that alloc_sk_msg() could be called from a non sleepable context. sk_psock_verdict_recv() uses rcu_read_lock() protection. We need the callers to pass a gfp_t argument to avoid issues. syzbot report was: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 3613, name: syz-executor414 preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 INFO: lockdep is turned off. CPU: 0 PID: 3613 Comm: syz-executor414 Not tainted 6.0.0-syzkaller-09589-g55be6084c8e0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106 __might_resched+0x538/0x6a0 kernel/sched/core.c:9877 might_alloc include/linux/sched/mm.h:274 [inline] slab_pre_alloc_hook mm/slab.h:700 [inline] slab_alloc_node mm/slub.c:3162 [inline] slab_alloc mm/slub.c:3256 [inline] kmem_cache_alloc_trace+0x59/0x310 mm/slub.c:3287 kmalloc include/linux/slab.h:600 [inline] kzalloc include/linux/slab.h:733 [inline] alloc_sk_msg net/core/skmsg.c:507 [inline] sk_psock_skb_ingress_self+0x5c/0x330 net/core/skmsg.c:600 sk_psock_verdict_apply+0x395/0x440 net/core/skmsg.c:1014 sk_psock_verdict_recv+0x34d/0x560 net/core/skmsg.c:1201 tcp_read_skb+0x4a1/0x790 net/ipv4/tcp.c:1770 tcp_rcv_established+0x129d/0x1a10 net/ipv4/tcp_input.c:5971 tcp_v4_do_rcv+0x479/0xac0 net/ipv4/tcp_ipv4.c:1681 sk_backlog_rcv include/net/sock.h:1109 [inline] __release_sock+0x1d8/0x4c0 net/core/sock.c:2906 release_sock+0x5d/0x1c0 net/core/sock.c:3462 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1483 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] __sys_sendto+0x46d/0x5f0 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xda/0xf0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 43312915b5ba ("skmsg: Get rid of unncessary memset()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Daniel Borkmann Cc: John Fastabend Signed-off-by: David S. Miller --- net/core/skmsg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ca70525621c7..1efdc47a999b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -500,11 +500,11 @@ bool sk_msg_is_readable(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_msg_is_readable); -static struct sk_msg *alloc_sk_msg(void) +static struct sk_msg *alloc_sk_msg(gfp_t gfp) { struct sk_msg *msg; - msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); + msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN); if (unlikely(!msg)) return NULL; sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); @@ -520,7 +520,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL; - return alloc_sk_msg(); + return alloc_sk_msg(GFP_KERNEL); } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, @@ -597,7 +597,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { - struct sk_msg *msg = alloc_sk_msg(); + struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); struct sock *sk = psock->sk; int err; -- cgit From b3d0d98179d62f9d55635a600679c4fa362baf8d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 17 Oct 2022 11:51:54 +0800 Subject: net: ethernet: mtk_eth_soc: fix possible memory leak in mtk_probe() If mtk_wed_add_hw() has been called, mtk_wed_exit() needs be called in error path or removing module to free the memory allocated in mtk_wed_add_hw(). Fixes: 804775dfc288 ("net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED)") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4fba7cb0144b..7cd381530aa4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4060,19 +4060,23 @@ static int mtk_probe(struct platform_device *pdev) eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { dev_err(&pdev->dev, "no IRQ%d resource found\n", i); - return -ENXIO; + err = -ENXIO; + goto err_wed_exit; } } for (i = 0; i < ARRAY_SIZE(eth->clks); i++) { eth->clks[i] = devm_clk_get(eth->dev, mtk_clks_source_name[i]); if (IS_ERR(eth->clks[i])) { - if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto err_wed_exit; + } if (eth->soc->required_clks & BIT(i)) { dev_err(&pdev->dev, "clock %s not found\n", mtk_clks_source_name[i]); - return -EINVAL; + err = -EINVAL; + goto err_wed_exit; } eth->clks[i] = NULL; } @@ -4083,7 +4087,7 @@ static int mtk_probe(struct platform_device *pdev) err = mtk_hw_init(eth); if (err) - return err; + goto err_wed_exit; eth->hwlro = MTK_HAS_CAPS(eth->soc->caps, MTK_HWLRO); @@ -4179,6 +4183,8 @@ err_free_dev: mtk_free_dev(eth); err_deinit_hw: mtk_hw_deinit(eth); +err_wed_exit: + mtk_wed_exit(); return err; } @@ -4198,6 +4204,7 @@ static int mtk_remove(struct platform_device *pdev) phylink_disconnect_phy(mac->phylink); } + mtk_wed_exit(); mtk_hw_deinit(eth); netif_napi_del(ð->tx_napi); -- cgit From 9d4f20a476ca57e4c9246eb1fa2a61bea2354720 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 17 Oct 2022 11:51:55 +0800 Subject: net: ethernet: mtk_eth_wed: add missing put_device() in mtk_wed_add_hw() After calling get_device() in mtk_wed_add_hw(), in error path, put_device() needs be called. Fixes: 804775dfc288 ("net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED)") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_wed.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 099b6e0df619..09bbd05bd83c 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -1077,11 +1077,11 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, get_device(&pdev->dev); irq = platform_get_irq(pdev, 0); if (irq < 0) - return; + goto err_put_device; regs = syscon_regmap_lookup_by_phandle(np, NULL); if (IS_ERR(regs)) - return; + goto err_put_device; rcu_assign_pointer(mtk_soc_wed_ops, &wed_ops); @@ -1124,8 +1124,14 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, hw_list[index] = hw; + mutex_unlock(&hw_lock); + + return; + unlock: mutex_unlock(&hw_lock); +err_put_device: + put_device(&pdev->dev); } void mtk_wed_exit(void) -- cgit From e0bb4659e235770e6f53b3692e958591f49448f5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 17 Oct 2022 11:51:56 +0800 Subject: net: ethernet: mtk_eth_wed: add missing of_node_put() The device_node pointer returned by of_parse_phandle() with refcount incremented, when finish using it, the refcount need be decreased. Fixes: 804775dfc288 ("net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED)") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_wed.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 09bbd05bd83c..65e01bf4b4d2 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -1072,7 +1072,7 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, pdev = of_find_device_by_node(np); if (!pdev) - return; + goto err_of_node_put; get_device(&pdev->dev); irq = platform_get_irq(pdev, 0); @@ -1132,6 +1132,8 @@ unlock: mutex_unlock(&hw_lock); err_put_device: put_device(&pdev->dev); +err_of_node_put: + of_node_put(np); } void mtk_wed_exit(void) @@ -1152,6 +1154,7 @@ void mtk_wed_exit(void) hw_list[i] = NULL; debugfs_remove(hw->debugfs_dir); put_device(hw->dev); + of_node_put(hw->node); kfree(hw); } } -- cgit From 402fe7a5728789f3a3998d2823b7a110f4cd924e Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 17 Oct 2022 14:49:20 +0800 Subject: net: ethernet: mediatek: ppe: Remove the unused function mtk_foe_entry_usable() The function mtk_foe_entry_usable() is defined in the mtk_ppe.c file, but not called elsewhere, so delete this unused function. drivers/net/ethernet/mediatek/mtk_ppe.c:400:20: warning: unused function 'mtk_foe_entry_usable'. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2409 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_ppe.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index ae00e572390d..2d8ca99f2467 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -397,12 +397,6 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, return 0; } -static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry) -{ - return !(entry->ib1 & MTK_FOE_IB1_STATIC) && - FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND; -} - static bool mtk_flow_entry_match(struct mtk_eth *eth, struct mtk_flow_entry *entry, struct mtk_foe_entry *data) -- cgit From 69421bf98482d089e50799f45e48b25ce4a8d154 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 14 Oct 2022 11:26:25 -0700 Subject: udp: Update reuse->has_conns under reuseport_lock. When we call connect() for a UDP socket in a reuseport group, we have to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel could select a unconnected socket wrongly for packets sent to the connected socket. However, the current way to set has_conns is illegal and possible to trigger that problem. reuseport_has_conns() changes has_conns under rcu_read_lock(), which upgrades the RCU reader to the updater. Then, it must do the update under the updater's lock, reuseport_lock, but it doesn't for now. For this reason, there is a race below where we fail to set has_conns resulting in the wrong socket selection. To avoid the race, let's split the reader and updater with proper locking. cpu1 cpu2 +----+ +----+ __ip[46]_datagram_connect() reuseport_grow() . . |- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size) | . | | |- rcu_read_lock() | |- reuse = rcu_dereference(sk->sk_reuseport_cb) | | | | | /* reuse->has_conns == 0 here */ | | |- more_reuse->has_conns = reuse->has_conns | |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */ | | | | | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, | | | more_reuse) | `- rcu_read_unlock() `- kfree_rcu(reuse, rcu) | |- sk->sk_state = TCP_ESTABLISHED Note the likely(reuse) in reuseport_has_conns_set() is always true, but we put the test there for ease of review. [0] For the record, usually, sk_reuseport_cb is changed under lock_sock(). The only exception is reuseport_grow() & TCP reqsk migration case. 1) shutdown() TCP listener, which is moved into the latter part of reuse->socks[] to migrate reqsk. 2) New listen() overflows reuse->socks[] and call reuseport_grow(). 3) reuse->max_socks overflows u16 with the new listener. 4) reuseport_grow() pops the old shutdown()ed listener from the array and update its sk->sk_reuseport_cb as NULL without lock_sock(). shutdown()ed TCP sk->sk_reuseport_cb can be changed without lock_sock(), but, reuseport_has_conns_set() is called only for UDP under lock_sock(), so likely(reuse) never be false in reuseport_has_conns_set(). [0]: https://lore.kernel.org/netdev/CANn89iLja=eQHbsM_Ta2sQF0tOGU8vAGrh_izRuuHjuO1ouUag@mail.gmail.com/ Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20221014182625.89913-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/sock_reuseport.h | 11 +++++------ net/core/sock_reuseport.c | 16 ++++++++++++++++ net/ipv4/datagram.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 2 +- net/ipv6/udp.c | 2 +- 6 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 473b0b0fa4ab..efc9085c6892 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk, extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_detach_prog(struct sock *sk); -static inline bool reuseport_has_conns(struct sock *sk, bool set) +static inline bool reuseport_has_conns(struct sock *sk) { struct sock_reuseport *reuse; bool ret = false; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); - if (reuse) { - if (set) - reuse->has_conns = 1; - ret = reuse->has_conns; - } + if (reuse && reuse->has_conns) + ret = true; rcu_read_unlock(); return ret; } +void reuseport_has_conns_set(struct sock *sk); + #endif /* _SOCK_REUSEPORT_H */ diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 5daa1fa54249..fb90e1e00773 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -21,6 +21,22 @@ static DEFINE_IDA(reuseport_ida); static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse, struct sock_reuseport *reuse, bool bind_inany); +void reuseport_has_conns_set(struct sock *sk) +{ + struct sock_reuseport *reuse; + + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (likely(reuse)) + reuse->has_conns = 1; + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_has_conns_set); + static int reuseport_sock_index(struct sock *sk, const struct sock_reuseport *reuse, bool closed) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 405a8c2aea64..5e66add7befa 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; - reuseport_has_conns(sk, true); + reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); inet->inet_id = prandom_u32(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8126f67d18b3..752b72892a44 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk, false)) + if (result && !reuseport_has_conns(sk)) return result; result = result ? : sk; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index df665d4e8f0f..5ecb56522f9d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -256,7 +256,7 @@ ipv4_connected: goto out; } - reuseport_has_conns(sk, true); + reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8d09f0ea5b8c..129ec5a9b0eb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -195,7 +195,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk, false)) + if (result && !reuseport_has_conns(sk)) return result; result = result ? : sk; -- cgit From 1ca695207ed2271ecbf8ee6c641970f621c157cc Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 17 Oct 2022 16:03:31 +0800 Subject: ip6mr: fix UAF issue in ip6mr_sk_done() when addrconf_init_net() failed If the initialization fails in calling addrconf_init_net(), devconf_all is the pointer that has been released. Then ip6mr_sk_done() is called to release the net, accessing devconf->mc_forwarding directly causes invalid pointer access. The process is as follows: setup_net() ops_init() addrconf_init_net() all = kmemdup(...) ---> alloc "all" ... net->ipv6.devconf_all = all; __addrconf_sysctl_register() ---> failed ... kfree(all); ---> ipv6.devconf_all invalid ... ops_exit_list() ... ip6mr_sk_done() devconf = net->ipv6.devconf_all; //devconf is invalid pointer if (!devconf || !atomic_read(&devconf->mc_forwarding)) The following is the Call Trace information: BUG: KASAN: use-after-free in ip6mr_sk_done+0x112/0x3a0 Read of size 4 at addr ffff888075508e88 by task ip/14554 Call Trace: dump_stack_lvl+0x8e/0xd1 print_report+0x155/0x454 kasan_report+0xba/0x1f0 kasan_check_range+0x35/0x1b0 ip6mr_sk_done+0x112/0x3a0 rawv6_close+0x48/0x70 inet_release+0x109/0x230 inet6_release+0x4c/0x70 sock_release+0x87/0x1b0 igmp6_net_exit+0x6b/0x170 ops_exit_list+0xb0/0x170 setup_net+0x7ac/0xbd0 copy_net_ns+0x2e6/0x6b0 create_new_namespaces+0x382/0xa50 unshare_nsproxy_namespaces+0xa6/0x1c0 ksys_unshare+0x3a4/0x7e0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f7963322547 Allocated by task 14554: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0xa1/0xb0 __kmalloc_node_track_caller+0x4a/0xb0 kmemdup+0x28/0x60 addrconf_init_net+0x1be/0x840 ops_init+0xa5/0x410 setup_net+0x5aa/0xbd0 copy_net_ns+0x2e6/0x6b0 create_new_namespaces+0x382/0xa50 unshare_nsproxy_namespaces+0xa6/0x1c0 ksys_unshare+0x3a4/0x7e0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 14554: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 ____kasan_slab_free+0x155/0x1b0 slab_free_freelist_hook+0x11b/0x220 __kmem_cache_free+0xa4/0x360 addrconf_init_net+0x623/0x840 ops_init+0xa5/0x410 setup_net+0x5aa/0xbd0 copy_net_ns+0x2e6/0x6b0 create_new_namespaces+0x382/0xa50 unshare_nsproxy_namespaces+0xa6/0x1c0 ksys_unshare+0x3a4/0x7e0 __x64_sys_unshare+0x2d/0x40 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 7d9b1b578d67 ("ip6mr: fix use-after-free in ip6mr_sk_done()") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221017080331.16878-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 10ce86bf228e..d5967cba5b56 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7214,9 +7214,11 @@ err_reg_dflt: __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: kfree(dflt); + net->ipv6.devconf_dflt = NULL; #endif err_alloc_dflt: kfree(all); + net->ipv6.devconf_all = NULL; err_alloc_all: kfree(net->ipv6.inet6_addr_lst); err_alloc_addr: -- cgit From ba077d683d45190afc993c1ce45bcdbfda741a40 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 17 Oct 2022 11:32:22 -0400 Subject: bnxt_en: fix memory leak in bnxt_nvm_test() Free the kzalloc'ed buffer before returning in the success path. Fixes: 5b6ff128fdf6 ("bnxt_en: implement callbacks for devlink selftests") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/1666020742-25834-1-git-send-email-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index a36803e79e92..8a6f788f6294 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -613,6 +613,7 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti static bool bnxt_nvm_test(struct bnxt *bp, struct netlink_ext_ack *extack) { + bool rc = false; u32 datalen; u16 index; u8 *buf; @@ -632,20 +633,20 @@ static bool bnxt_nvm_test(struct bnxt *bp, struct netlink_ext_ack *extack) if (bnxt_get_nvram_item(bp->dev, index, 0, datalen, buf)) { NL_SET_ERR_MSG_MOD(extack, "nvm test vpd read error"); - goto err; + goto done; } if (bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_VPD, BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, 0, 0, buf, datalen)) { NL_SET_ERR_MSG_MOD(extack, "nvm test vpd write error"); - goto err; + goto done; } - return true; + rc = true; -err: +done: kfree(buf); - return false; + return rc; } static bool bnxt_dl_selftest_check(struct devlink *dl, unsigned int id, -- cgit From d8b57135fd9ffe9a5b445350a686442a531c5339 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2022 16:59:28 +0000 Subject: net: hsr: avoid possible NULL deref in skb_clone() syzbot got a crash [1] in skb_clone(), caused by a bug in hsr_get_untagged_frame(). When/if create_stripped_skb_hsr() returns NULL, we must not attempt to call skb_clone(). While we are at it, replace a WARN_ONCE() by netdev_warn_once(). [1] general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f] CPU: 1 PID: 754 Comm: syz-executor.0 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 RIP: 0010:skb_clone+0x108/0x3c0 net/core/skbuff.c:1641 Code: 93 02 00 00 49 83 7c 24 28 00 0f 85 e9 00 00 00 e8 5d 4a 29 fa 4c 8d 75 7e 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 84 c0 0f 85 9e 01 00 00 RSP: 0018:ffffc90003ccf4e0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffffc90003ccf5f8 RCX: ffffc9000c24b000 RDX: 000000000000000f RSI: ffffffff8751cb13 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000000000000f0 R09: 0000000000000140 R10: fffffbfff181d972 R11: 0000000000000000 R12: ffff888161fc3640 R13: 0000000000000a20 R14: 000000000000007e R15: ffffffff8dc5f620 FS: 00007feb621e4700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007feb621e3ff8 CR3: 00000001643a9000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_get_untagged_frame+0x4e/0x610 net/hsr/hsr_forward.c:164 hsr_forward_do net/hsr/hsr_forward.c:461 [inline] hsr_forward_skb+0xcca/0x1d50 net/hsr/hsr_forward.c:623 hsr_handle_frame+0x588/0x7c0 net/hsr/hsr_slave.c:69 __netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 __netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 netif_receive_skb_internal net/core/dev.c:5685 [inline] netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 tun_rx_batched+0x4ab/0x7a0 drivers/net/tun.c:1544 tun_get_user+0x2686/0x3a00 drivers/net/tun.c:1995 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2025 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:584 ksys_write+0x127/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221017165928.2150130-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5bf357734b11..a50429a62f74 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -150,15 +150,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { if (!frame->skb_std) { - if (frame->skb_hsr) { + if (frame->skb_hsr) frame->skb_std = create_stripped_skb_hsr(frame->skb_hsr, frame); - } else { - /* Unexpected */ - WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", - __FILE__, __LINE__, port->dev->name); + else + netdev_warn_once(port->dev, + "Unexpected frame received in hsr_get_untagged_frame()\n"); + + if (!frame->skb_std) return NULL; - } } return skb_clone(frame->skb_std, GFP_ATOMIC); -- cgit From aa1d7e1267c12e07d979aa34c613716a89029db2 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 17 Oct 2022 16:31:23 -0700 Subject: ionic: catch NULL pointer issue on reconfig It's possible that the driver will dereference a qcq that doesn't exist when calling ionic_reconfigure_queues(), which causes a page fault BUG. If a reduction in the number of queues is followed by a different reconfig such as changing the ring size, the driver can hit a NULL pointer when trying to clean up non-existent queues. Fix this by checking to make sure both the qcqs array and qcq entry exists bofore trying to use and free the entry. Fixes: 101b40a0171f ("ionic: change queue count with no reset") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20221017233123.15869-1-snelson@pensando.io Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 5d58fd99be3c..19d4848df17d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2817,11 +2817,15 @@ err_out: * than the full array, but leave the qcq shells in place */ for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) { - lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; - ionic_qcq_free(lif, lif->txqcqs[i]); + if (lif->txqcqs && lif->txqcqs[i]) { + lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; + ionic_qcq_free(lif, lif->txqcqs[i]); + } - lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; - ionic_qcq_free(lif, lif->rxqcqs[i]); + if (lif->rxqcqs && lif->rxqcqs[i]) { + lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR; + ionic_qcq_free(lif, lif->rxqcqs[i]); + } } if (err) -- cgit From 1fcc064b305a1aadeff0d4bff961094d27660acd Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 13 Oct 2022 16:37:47 +0200 Subject: netfilter: rpfilter/fib: Set ->flowic_uid correctly for user namespaces. Currently netfilter's rpfilter and fib modules implicitely initialise ->flowic_uid with 0. This is normally the root UID. However, this isn't the case in user namespaces, where user ID 0 is mapped to a different kernel UID. By initialising ->flowic_uid with sock_net_uid(), we get the root UID of the user namespace, thus keeping the same behaviour whether or not we're running in a user namepspace. Note, this is similar to commit 8bcfd0925ef1 ("ipv4: add missing initialization for flowi4_uid"), which fixed the rp_filter sysctl. Fixes: 622ec2c9d524 ("net: core: add UID to flows, rules, and routes") Signed-off-by: Guillaume Nault Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 1 + net/ipv4/netfilter/nft_fib_ipv4.c | 1 + net/ipv6/netfilter/ip6t_rpfilter.c | 1 + net/ipv6/netfilter/nft_fib_ipv6.c | 2 ++ 4 files changed, 5 insertions(+) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index ff85db52b2e5..ded5bef02f77 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -78,6 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); + flow.flowi4_uid = sock_net_uid(xt_net(par), NULL); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index e886147eed11..fc65d69f23e1 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), }; const struct net_device *oif; const struct net_device *found; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 69d86b040a6a..a01d9b842bd0 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -40,6 +40,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev), .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, + .flowi6_uid = sock_net_uid(net, NULL), .daddr = iph->saddr, }; int lookup_flags; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 91faac610e03..36dc14b34388 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -66,6 +66,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, + .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; u32 ret = 0; @@ -163,6 +164,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, + .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; struct rt6_info *rt; int lookup_flags; -- cgit From 96df8360dbb435cc69f7c3c8db44bf8b1c24cd7b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Oct 2022 14:12:58 +0200 Subject: netfilter: nf_tables: relax NFTA_SET_ELEM_KEY_END set flags requirements Otherwise EINVAL is bogusly reported to userspace when deleting a set element. NFTA_SET_ELEM_KEY_END does not need to be set in case of: - insertion: if not present, start key is used as end key. - deletion: only start key needs to be specified, end key is ignored. Hence, relax the sanity check. Fixes: 88cccd908d51 ("netfilter: nf_tables: NFTA_SET_ELEM_KEY_END requires concat and interval flags") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a0653a8dfa82..58d9cbc9ccdc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5865,8 +5865,9 @@ static bool nft_setelem_valid_key_end(const struct nft_set *set, (NFT_SET_CONCAT | NFT_SET_INTERVAL)) { if (flags & NFT_SET_ELEM_INTERVAL_END) return false; - if (!nla[NFTA_SET_ELEM_KEY_END] && - !(flags & NFT_SET_ELEM_CATCHALL)) + + if (nla[NFTA_SET_ELEM_KEY_END] && + flags & NFT_SET_ELEM_CATCHALL) return false; } else { if (nla[NFTA_SET_ELEM_KEY_END]) -- cgit From 7b55c2ed2ba061b65fc51d7a18d37e017085997f Mon Sep 17 00:00:00 2001 From: Manank Patel Date: Tue, 18 Oct 2022 11:03:18 +0530 Subject: ethernet: marvell: octeontx2 Fix resource not freed after malloc fix rxsc and txsc not getting freed before going out of scope Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Manank Patel Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 9809f551fc2e..9ec5f38d38a8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -815,6 +815,7 @@ free_flowid: cn10k_mcs_free_rsrc(pfvf, MCS_TX, MCS_RSRC_TYPE_FLOWID, txsc->hw_flow_id, false); fail: + kfree(txsc); return ERR_PTR(ret); } @@ -870,6 +871,7 @@ free_flowid: cn10k_mcs_free_rsrc(pfvf, MCS_RX, MCS_RSRC_TYPE_FLOWID, rxsc->hw_flow_id, false); fail: + kfree(rxsc); return ERR_PTR(ret); } -- cgit From 51f9a8921ceacd7bf0d3f47fa867a64988ba1dcb Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 18 Oct 2022 14:31:59 +0800 Subject: net: sched: cake: fix null pointer access issue when cake_init() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the default qdisc is cake, if the qdisc of dev_queue fails to be inited during mqprio_init(), cake_reset() is invoked to clear resources. In this case, the tins is NULL, and it will cause gpf issue. The process is as follows: qdisc_create_dflt() cake_init() q->tins = kvcalloc(...) --->failed, q->tins is NULL ... qdisc_put() ... cake_reset() ... cake_dequeue_one() b = &q->tins[...] --->q->tins is NULL The following is the Call Trace information: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:cake_dequeue_one+0xc9/0x3c0 Call Trace: cake_reset+0xb1/0x140 qdisc_reset+0xed/0x6f0 qdisc_destroy+0x82/0x4c0 qdisc_put+0x9e/0xb0 qdisc_create_dflt+0x2c3/0x4a0 mqprio_init+0xa71/0x1760 qdisc_create+0x3eb/0x1000 tc_modify_qdisc+0x408/0x1720 rtnetlink_rcv_msg+0x38e/0xac0 netlink_rcv_skb+0x12d/0x3a0 netlink_unicast+0x4a2/0x740 netlink_sendmsg+0x826/0xcc0 sock_sendmsg+0xc5/0x100 ____sys_sendmsg+0x583/0x690 ___sys_sendmsg+0xe8/0x160 __sys_sendmsg+0xbf/0x160 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f89e5122d04 Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Zhengchao Shao Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 55c6879d2c7e..87f8ce2c65ee 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2224,8 +2224,12 @@ retry: static void cake_reset(struct Qdisc *sch) { + struct cake_sched_data *q = qdisc_priv(sch); u32 c; + if (!q->tins) + return; + for (c = 0; c < CAKE_MAX_TINS; c++) cake_clear_tin(sch, c); } -- cgit From f5ffa3b1197395501b72c10b35518bf58ef24475 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 18 Oct 2022 14:32:00 +0800 Subject: Revert "net: sched: fq_codel: remove redundant resource cleanup in fq_codel_init()" This reverts commit 494f5063b86cd6e972cb41a27e083c9a3664319d. When the default qdisc is fq_codel, if the qdisc of dev_queue fails to be inited during mqprio_init(), fq_codel_reset() is invoked to clear resources. In this case, the flow is NULL, and it will cause gpf issue. The process is as follows: qdisc_create_dflt() fq_codel_init() ... q->flows_cnt = 1024; ... q->flows = kvcalloc(...) --->failed, q->flows is NULL ... qdisc_put() ... fq_codel_reset() ... flow = q->flows + i --->q->flows is NULL The following is the Call Trace information: general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] RIP: 0010:fq_codel_reset+0x14d/0x350 Call Trace: qdisc_reset+0xed/0x6f0 qdisc_destroy+0x82/0x4c0 qdisc_put+0x9e/0xb0 qdisc_create_dflt+0x2c3/0x4a0 mqprio_init+0xa71/0x1760 qdisc_create+0x3eb/0x1000 tc_modify_qdisc+0x408/0x1720 rtnetlink_rcv_msg+0x38e/0xac0 netlink_rcv_skb+0x12d/0x3a0 netlink_unicast+0x4a2/0x740 netlink_sendmsg+0x826/0xcc0 sock_sendmsg+0xc5/0x100 ____sys_sendmsg+0x583/0x690 ___sys_sendmsg+0xe8/0x160 __sys_sendmsg+0xbf/0x160 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fd272b22d04 Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 99d318b60568..8c4fee063436 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -478,24 +478,26 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, if (opt) { err = fq_codel_change(sch, opt, extack); if (err) - return err; + goto init_failure; } err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) - return err; + goto init_failure; if (!q->flows) { q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_codel_flow), GFP_KERNEL); - if (!q->flows) - return -ENOMEM; - + if (!q->flows) { + err = -ENOMEM; + goto init_failure; + } q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL); - if (!q->backlogs) - return -ENOMEM; - + if (!q->backlogs) { + err = -ENOMEM; + goto alloc_failure; + } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; @@ -508,6 +510,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, else sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; + +alloc_failure: + kvfree(q->flows); + q->flows = NULL; +init_failure: + q->flows_cnt = 0; + return err; } static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) -- cgit From 2a3fc78210b9f0e85372a2435368962009f480fc Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 18 Oct 2022 14:32:01 +0800 Subject: net: sched: sfb: fix null pointer access issue when sfb_init() fails When the default qdisc is sfb, if the qdisc of dev_queue fails to be inited during mqprio_init(), sfb_reset() is invoked to clear resources. In this case, the q->qdisc is NULL, and it will cause gpf issue. The process is as follows: qdisc_create_dflt() sfb_init() tcf_block_get() --->failed, q->qdisc is NULL ... qdisc_put() ... sfb_reset() qdisc_reset(q->qdisc) --->q->qdisc is NULL ops = qdisc->ops The following is the Call Trace information: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] RIP: 0010:qdisc_reset+0x2b/0x6f0 Call Trace: sfb_reset+0x37/0xd0 qdisc_reset+0xed/0x6f0 qdisc_destroy+0x82/0x4c0 qdisc_put+0x9e/0xb0 qdisc_create_dflt+0x2c3/0x4a0 mqprio_init+0xa71/0x1760 qdisc_create+0x3eb/0x1000 tc_modify_qdisc+0x408/0x1720 rtnetlink_rcv_msg+0x38e/0xac0 netlink_rcv_skb+0x12d/0x3a0 netlink_unicast+0x4a2/0x740 netlink_sendmsg+0x826/0xcc0 sock_sendmsg+0xc5/0x100 ____sys_sendmsg+0x583/0x690 ___sys_sendmsg+0xe8/0x160 __sys_sendmsg+0xbf/0x160 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f2164122d04 Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- net/sched/sch_sfb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index e2389fa3cff8..73ae2e726512 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -455,7 +455,8 @@ static void sfb_reset(struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); - qdisc_reset(q->qdisc); + if (likely(q->qdisc)) + qdisc_reset(q->qdisc); q->slot = 0; q->double_buffering = false; sfb_zero_all_buckets(q); -- cgit From 672e97ef689a38cb20c2cc6a1814298fea34461e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 18 Oct 2022 10:34:38 +0300 Subject: net: Fix return value of qdisc ingress handling on success Currently qdisc ingress handling (sch_handle_ingress()) doesn't set a return value and it is left to the old return value of the caller (__netif_receive_skb_core()) which is RX drop, so if the packet is consumed, caller will stop and return this value as if the packet was dropped. This causes a problem in the kernel tcp stack when having a egress tc rule forwarding to a ingress tc rule. The tcp stack sending packets on the device having the egress rule will see the packets as not successfully transmitted (although they actually were), will not advance it's internal state of sent data, and packets returning on such tcp stream will be dropped by the tcp stack with reason ack-of-unsent-data. See reproduction in [0] below. Fix that by setting the return value to RX success if the packet was handled successfully. [0] Reproduction steps: $ ip link add veth1 type veth peer name peer1 $ ip link add veth2 type veth peer name peer2 $ ifconfig peer1 5.5.5.6/24 up $ ip netns add ns0 $ ip link set dev peer2 netns ns0 $ ip netns exec ns0 ifconfig peer2 5.5.5.5/24 up $ ifconfig veth2 0 up $ ifconfig veth1 0 up #ingress forwarding veth1 <-> veth2 $ tc qdisc add dev veth2 ingress $ tc qdisc add dev veth1 ingress $ tc filter add dev veth2 ingress prio 1 proto all flower \ action mirred egress redirect dev veth1 $ tc filter add dev veth1 ingress prio 1 proto all flower \ action mirred egress redirect dev veth2 #steal packet from peer1 egress to veth2 ingress, bypassing the veth pipe $ tc qdisc add dev peer1 clsact $ tc filter add dev peer1 egress prio 20 proto ip flower \ action mirred ingress redirect dev veth1 #run iperf and see connection not running $ iperf3 -s& $ ip netns exec ns0 iperf3 -c 5.5.5.6 -i 1 #delete egress rule, and run again, now should work $ tc filter del dev peer1 egress $ ip netns exec ns0 iperf3 -c 5.5.5.6 -i 1 Fixes: f697c3e8b35c ("[NET]: Avoid unnecessary cloning for ingress filtering") Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index fa53830d0683..3be256051e99 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5136,11 +5136,13 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, case TC_ACT_SHOT: mini_qdisc_qstats_cpu_drop(miniq); kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + *ret = NET_RX_DROP; return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: consume_skb(skb); + *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_REDIRECT: /* skb_mac_header check was done by cls/act_bpf, so @@ -5153,8 +5155,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, *another = true; break; } + *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_CONSUMED: + *ret = NET_RX_SUCCESS; return NULL; default: break; -- cgit From fd602f5cb52e336d8c06f8da2d80c76ce2905030 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 18 Oct 2022 10:34:39 +0300 Subject: selftests: add selftest for chaining of tc ingress handling to egress This test runs a simple ingress tc setup between two veth pairs, then adds a egress->ingress rule to test the chaining of tc ingress pipeline to tc egress piepline. Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/test_ingress_egress_chaining.sh | 79 ++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 tools/testing/selftests/net/test_ingress_egress_chaining.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 2a6b0bc648c4..69c58362c0ed 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -70,6 +70,7 @@ TEST_PROGS += io_uring_zerocopy_tx.sh TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr +TEST_PROGS += test_ingress_egress_chaining.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/test_ingress_egress_chaining.sh b/tools/testing/selftests/net/test_ingress_egress_chaining.sh new file mode 100644 index 000000000000..08adff6bb3b6 --- /dev/null +++ b/tools/testing/selftests/net/test_ingress_egress_chaining.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test runs a simple ingress tc setup between two veth pairs, +# and chains a single egress rule to test ingress chaining to egress. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +needed_mods="act_mirred cls_flower sch_ingress" +for mod in $needed_mods; do + modinfo $mod &>/dev/null || { echo "SKIP: Need act_mirred module"; exit $ksft_skip; } +done + +ns="ns$((RANDOM%899+100))" +veth1="veth1$((RANDOM%899+100))" +veth2="veth2$((RANDOM%899+100))" +peer1="peer1$((RANDOM%899+100))" +peer2="peer2$((RANDOM%899+100))" +ip_peer1=198.51.100.5 +ip_peer2=198.51.100.6 + +function fail() { + echo "FAIL: $@" >> /dev/stderr + exit 1 +} + +function cleanup() { + killall -q -9 udpgso_bench_rx + ip link del $veth1 &> /dev/null + ip link del $veth2 &> /dev/null + ip netns del $ns &> /dev/null +} +trap cleanup EXIT + +function config() { + echo "Setup veth pairs [$veth1, $peer1], and veth pair [$veth2, $peer2]" + ip link add $veth1 type veth peer name $peer1 + ip link add $veth2 type veth peer name $peer2 + ip addr add $ip_peer1/24 dev $peer1 + ip link set $peer1 up + ip netns add $ns + ip link set dev $peer2 netns $ns + ip netns exec $ns ip addr add $ip_peer2/24 dev $peer2 + ip netns exec $ns ip link set $peer2 up + ip link set $veth1 up + ip link set $veth2 up + + echo "Add tc filter ingress->egress forwarding $veth1 <-> $veth2" + tc qdisc add dev $veth2 ingress + tc qdisc add dev $veth1 ingress + tc filter add dev $veth2 ingress prio 1 proto all flower \ + action mirred egress redirect dev $veth1 + tc filter add dev $veth1 ingress prio 1 proto all flower \ + action mirred egress redirect dev $veth2 + + echo "Add tc filter egress->ingress forwarding $peer1 -> $veth1, bypassing the veth pipe" + tc qdisc add dev $peer1 clsact + tc filter add dev $peer1 egress prio 20 proto ip flower \ + action mirred ingress redirect dev $veth1 +} + +function test_run() { + echo "Run tcp traffic" + ./udpgso_bench_rx -t & + sleep 1 + ip netns exec $ns timeout -k 2 10 ./udpgso_bench_tx -t -l 2 -4 -D $ip_peer1 || fail "traffic failed" + echo "Test passed" +} + +config +test_run +trap - EXIT +cleanup -- cgit From a1a824f448ba96c610b85288d844adc9f781828e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 18 Oct 2022 16:13:10 -0700 Subject: genetlink: fix kdoc warnings Address a bunch of kdoc warnings: include/net/genetlink.h:81: warning: Function parameter or member 'module' not described in 'genl_family' include/net/genetlink.h:243: warning: expecting prototype for struct genl_info. Prototype was for struct genl_dumpit_info instead include/net/genetlink.h:419: warning: Function parameter or member 'net' not described in 'genlmsg_unicast' include/net/genetlink.h:438: warning: expecting prototype for gennlmsg_data(). Prototype was for genlmsg_data() instead include/net/genetlink.h:244: warning: Function parameter or member 'op' not described in 'genl_dumpit_info' Link: https://lore.kernel.org/r/20221018231310.1040482-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8f780170e2f8..3d08e67b3cfc 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -37,6 +37,7 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks + * @module: pointer to the owning module (set to THIS_MODULE) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @resv_start_op: first operation for which reserved fields of the header @@ -173,9 +174,9 @@ struct genl_ops { }; /** - * struct genl_info - info that is available during dumpit op call + * struct genl_dumpit_info - info that is available during dumpit op call * @family: generic netlink family - for internal genl code usage - * @ops: generic netlink ops - for internal genl code usage + * @op: generic netlink ops - for internal genl code usage * @attrs: netlink attributes */ struct genl_dumpit_info { @@ -354,6 +355,7 @@ int genlmsg_multicast_allns(const struct genl_family *family, /** * genlmsg_unicast - unicast a netlink message + * @net: network namespace to look up @portid in * @skb: netlink message as socket buffer * @portid: netlink portid of the destination socket */ @@ -373,7 +375,7 @@ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) } /** - * gennlmsg_data - head of message payload + * genlmsg_data - head of message payload * @gnlh: genetlink message header */ static inline void *genlmsg_data(const struct genlmsghdr *gnlh) -- cgit From c2bf23e4a5af37a4d77901d9ff14c50a269f143d Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 18 Oct 2022 10:28:41 +0100 Subject: sfc: include vport_id in filter spec hash and equal() Filters on different vports are qualified by different implicit MACs and/or VLANs, so shouldn't be considered equal even if their other match fields are identical. Fixes: 7c460d9be610 ("sfc: Extend and abstract efx_filter_spec to cover Huntington/EF10") Co-developed-by: Edward Cree Signed-off-by: Edward Cree Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Martin Habets Link: https://lore.kernel.org/r/20221018092841.32206-1-pieter.jansen-van-vuuren@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/filter.h | 4 ++-- drivers/net/ethernet/sfc/rx_common.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h index be72e71da027..5f201a547e5b 100644 --- a/drivers/net/ethernet/sfc/filter.h +++ b/drivers/net/ethernet/sfc/filter.h @@ -162,9 +162,9 @@ struct efx_filter_spec { u32 priority:2; u32 flags:6; u32 dmaq_id:12; - u32 vport_id; u32 rss_context; - __be16 outer_vid __aligned(4); /* allow jhash2() of match values */ + u32 vport_id; + __be16 outer_vid; __be16 inner_vid; u8 loc_mac[ETH_ALEN]; u8 rem_mac[ETH_ALEN]; diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 4826e6a7e4ce..9220afeddee8 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -660,17 +660,17 @@ bool efx_filter_spec_equal(const struct efx_filter_spec *left, (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) return false; - return memcmp(&left->outer_vid, &right->outer_vid, + return memcmp(&left->vport_id, &right->vport_id, sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) == 0; + offsetof(struct efx_filter_spec, vport_id)) == 0; } u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) { - BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); - return jhash2((const u32 *)&spec->outer_vid, + BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3); + return jhash2((const u32 *)&spec->vport_id, (sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) / 4, + offsetof(struct efx_filter_spec, vport_id)) / 4, 0); } -- cgit From 258ad2fe5ede773625adfda88b173f4123e59f45 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 18 Oct 2022 21:16:07 +0800 Subject: wwan_hwsim: fix possible memory leak in wwan_hwsim_dev_new() Inject fault while probing module, if device_register() fails, but the refcount of kobject is not decreased to 0, the name allocated in dev_set_name() is leaked. Fix this by calling put_device(), so that name can be freed in callback function kobject_cleanup(). unreferenced object 0xffff88810152ad20 (size 8): comm "modprobe", pid 252, jiffies 4294849206 (age 22.713s) hex dump (first 8 bytes): 68 77 73 69 6d 30 00 ff hwsim0.. backtrace: [<000000009c3504ed>] __kmalloc_node_track_caller+0x44/0x1b0 [<00000000c0228a5e>] kvasprintf+0xb5/0x140 [<00000000cff8c21f>] kvasprintf_const+0x55/0x180 [<0000000055a1e073>] kobject_set_name_vargs+0x56/0x150 [<000000000a80b139>] dev_set_name+0xab/0xe0 Fixes: f36a111a74e7 ("wwan_hwsim: WWAN device simulator") Signed-off-by: Yang Yingliang Reviewed-by: Loic Poulain Acked-by: Sergey Ryazanov Link: https://lore.kernel.org/r/20221018131607.1901641-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/wwan_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c index ff09a8cedf93..2397a903d8f5 100644 --- a/drivers/net/wwan/wwan_hwsim.c +++ b/drivers/net/wwan/wwan_hwsim.c @@ -311,7 +311,7 @@ err_unreg_dev: return ERR_PTR(err); err_free_dev: - kfree(dev); + put_device(&dev->dev); return ERR_PTR(err); } -- cgit From ff2f5ec5d009844ec28f171123f9e58750cef4bf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 18 Oct 2022 20:24:51 +0800 Subject: net: hns: fix possible memory leak in hnae_ae_register() Inject fault while probing module, if device_register() fails, but the refcount of kobject is not decreased to 0, the name allocated in dev_set_name() is leaked. Fix this by calling put_device(), so that name can be freed in callback function kobject_cleanup(). unreferenced object 0xffff00c01aba2100 (size 128): comm "systemd-udevd", pid 1259, jiffies 4294903284 (age 294.152s) hex dump (first 32 bytes): 68 6e 61 65 30 00 00 00 18 21 ba 1a c0 00 ff ff hnae0....!...... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000034783f26>] slab_post_alloc_hook+0xa0/0x3e0 [<00000000748188f2>] __kmem_cache_alloc_node+0x164/0x2b0 [<00000000ab0743e8>] __kmalloc_node_track_caller+0x6c/0x390 [<000000006c0ffb13>] kvasprintf+0x8c/0x118 [<00000000fa27bfe1>] kvasprintf_const+0x60/0xc8 [<0000000083e10ed7>] kobject_set_name_vargs+0x3c/0xc0 [<000000000b87affc>] dev_set_name+0x7c/0xa0 [<000000003fd8fe26>] hnae_ae_register+0xcc/0x190 [hnae] [<00000000fe97edc9>] hns_dsaf_ae_init+0x9c/0x108 [hns_dsaf] [<00000000c36ff1eb>] hns_dsaf_probe+0x548/0x748 [hns_dsaf] Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") Signed-off-by: Yang Yingliang Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221018122451.1749171-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 00fafc0f8512..430eccea8e5e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -419,8 +419,10 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner) hdev->cls_dev.release = hnae_release; (void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id); ret = device_register(&hdev->cls_dev); - if (ret) + if (ret) { + put_device(&hdev->cls_dev); return ret; + } __module_get(THIS_MODULE); -- cgit From ebda44da44f6f309d302522b049f43d6f829f7aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2022 20:32:58 +0000 Subject: net: sched: fix race condition in qdisc_graft() We had one syzbot report [1] in syzbot queue for a while. I was waiting for more occurrences and/or a repro but Dmitry Vyukov spotted the issue right away. qdisc_graft() drops reference to qdisc in notify_and_destroy while it's still assigned to dev->qdisc Indeed, RCU rules are clear when replacing a data structure. The visible pointer (dev->qdisc in this case) must be updated to the new object _before_ RCU grace period is started (qdisc_put(old) in this case). [1] BUG: KASAN: use-after-free in __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 Read of size 4 at addr ffff88802065e038 by task syz-executor.4/21027 CPU: 0 PID: 21027 Comm: syz-executor.4 Not tainted 6.0.0-rc3-syzkaller-00363-g7726d4c3e60b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 __tcf_qdisc_find net/sched/cls_api.c:1051 [inline] tc_new_tfilter+0x34f/0x2200 net/sched/cls_api.c:2018 rtnetlink_rcv_msg+0x955/0xca0 net/core/rtnetlink.c:6081 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5efaa89279 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5efbc31168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f5efab9bf80 RCX: 00007f5efaa89279 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f5efaae32e9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f5efb0cfb1f R14: 00007f5efbc31300 R15: 0000000000022000 Allocated by task 21027: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:437 [inline] ____kasan_kmalloc mm/kasan/common.c:516 [inline] ____kasan_kmalloc mm/kasan/common.c:475 [inline] __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525 kmalloc_node include/linux/slab.h:623 [inline] kzalloc_node include/linux/slab.h:744 [inline] qdisc_alloc+0xb0/0xc50 net/sched/sch_generic.c:938 qdisc_create_dflt+0x71/0x4a0 net/sched/sch_generic.c:997 attach_one_default_qdisc net/sched/sch_generic.c:1152 [inline] netdev_for_each_tx_queue include/linux/netdevice.h:2437 [inline] attach_default_qdiscs net/sched/sch_generic.c:1170 [inline] dev_activate+0x760/0xcd0 net/sched/sch_generic.c:1229 __dev_open+0x393/0x4d0 net/core/dev.c:1441 __dev_change_flags+0x583/0x750 net/core/dev.c:8556 rtnl_configure_link+0xee/0x240 net/core/rtnetlink.c:3189 rtnl_newlink_create net/core/rtnetlink.c:3371 [inline] __rtnl_newlink+0x10b8/0x17e0 net/core/rtnetlink.c:3580 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 21020: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:367 [inline] ____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1754 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1780 slab_free mm/slub.c:3534 [inline] kfree+0xe2/0x580 mm/slub.c:4562 rcu_do_batch kernel/rcu/tree.c:2245 [inline] rcu_core+0x7b5/0x1890 kernel/rcu/tree.c:2505 __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 Last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 call_rcu+0x99/0x790 kernel/rcu/tree.c:2793 qdisc_put+0xcd/0xe0 net/sched/sch_generic.c:1083 notify_and_destroy net/sched/sch_api.c:1012 [inline] qdisc_graft+0xeb1/0x1270 net/sched/sch_api.c:1084 tc_modify_qdisc+0xbb7/0x1a00 net/sched/sch_api.c:1671 rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x940 kernel/rcu/tree.c:3322 neigh_destroy+0x431/0x630 net/core/neighbour.c:912 neigh_release include/net/neighbour.h:454 [inline] neigh_cleanup_and_release+0x1f8/0x330 net/core/neighbour.c:103 neigh_del net/core/neighbour.c:225 [inline] neigh_remove_one+0x37d/0x460 net/core/neighbour.c:246 neigh_forced_gc net/core/neighbour.c:276 [inline] neigh_alloc net/core/neighbour.c:447 [inline] ___neigh_create+0x18b5/0x29a0 net/core/neighbour.c:642 ip6_finish_output2+0xfb8/0x1520 net/ipv6/ip6_output.c:125 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x690/0x1160 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x1ed/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:451 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x991/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The buggy address belongs to the object at ffff88802065e000 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 56 bytes inside of 1024-byte region [ffff88802065e000, ffff88802065e400) The buggy address belongs to the physical page: page:ffffea0000819600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x20658 head:ffffea0000819600 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000001 ffff888011841dc0 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 3523, tgid 3523 (sshd), ts 41495190986, free_ts 41417713212 prep_new_page mm/page_alloc.c:2532 [inline] get_page_from_freelist+0x109b/0x2ce0 mm/page_alloc.c:4283 __alloc_pages+0x1c7/0x510 mm/page_alloc.c:5515 alloc_pages+0x1a6/0x270 mm/mempolicy.c:2270 alloc_slab_page mm/slub.c:1824 [inline] allocate_slab+0x27e/0x3d0 mm/slub.c:1969 new_slab mm/slub.c:2029 [inline] ___slab_alloc+0x7f1/0xe10 mm/slub.c:3031 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3118 slab_alloc_node mm/slub.c:3209 [inline] __kmalloc_node_track_caller+0x2f2/0x380 mm/slub.c:4955 kmalloc_reserve net/core/skbuff.c:358 [inline] __alloc_skb+0xd9/0x2f0 net/core/skbuff.c:430 alloc_skb_fclone include/linux/skbuff.h:1307 [inline] tcp_stream_alloc_skb+0x38/0x580 net/ipv4/tcp.c:861 tcp_sendmsg_locked+0xc36/0x2f80 net/ipv4/tcp.c:1325 tcp_sendmsg+0x2b/0x40 net/ipv4/tcp.c:1483 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 sock_write_iter+0x291/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:578 ksys_write+0x1e8/0x250 fs/read_write.c:631 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1449 [inline] free_pcp_prepare+0x5e4/0xd20 mm/page_alloc.c:1499 free_unref_page_prepare mm/page_alloc.c:3380 [inline] free_unref_page+0x19/0x4d0 mm/page_alloc.c:3476 __unfreeze_partials+0x17c/0x1a0 mm/slub.c:2548 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:447 kasan_slab_alloc include/linux/kasan.h:224 [inline] slab_post_alloc_hook mm/slab.h:727 [inline] slab_alloc_node mm/slub.c:3243 [inline] slab_alloc mm/slub.c:3251 [inline] __kmem_cache_alloc_lru mm/slub.c:3258 [inline] kmem_cache_alloc+0x267/0x3b0 mm/slub.c:3268 kmem_cache_zalloc include/linux/slab.h:723 [inline] alloc_buffer_head+0x20/0x140 fs/buffer.c:2974 alloc_page_buffers+0x280/0x790 fs/buffer.c:829 create_empty_buffers+0x2c/0xee0 fs/buffer.c:1558 ext4_block_write_begin+0x1004/0x1530 fs/ext4/inode.c:1074 ext4_da_write_begin+0x422/0xae0 fs/ext4/inode.c:2996 generic_perform_write+0x246/0x560 mm/filemap.c:3738 ext4_buffered_write_iter+0x15b/0x460 fs/ext4/file.c:270 ext4_file_write_iter+0x44a/0x1660 fs/ext4/file.c:679 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:578 Fixes: af356afa010f ("net_sched: reintroduce dev->qdisc for use by sch_api") Reported-by: syzbot Diagnosed-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221018203258.2793282-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c98af0ada706..4a27dfb1ba0f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1099,12 +1099,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - rtnl_dereference(dev->qdisc), new); + old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); + notify_and_destroy(net, skb, n, classid, old, new); + if (new && new->ops->attach) new->ops->attach(new); } else { -- cgit From 7f378c03aa4952507521174fb0da7b24a9ad0be6 Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Tue, 18 Oct 2022 12:47:54 +0200 Subject: net: phy: dp83822: disable MDI crossover status change interrupt If the cable is disconnected the PHY seems to toggle between MDI and MDI-X modes. With the MDI crossover status interrupt active this causes roughly 10 interrupts per second. As the crossover status isn't checked by the driver, the interrupt can be disabled to reduce the interrupt load. Fixes: 87461f7a58ab ("net: phy: DP83822 initial driver submission") Signed-off-by: Felix Riemann Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221018104755.30025-1-svc.sw.rte.linux@sma.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 8549e0e356c9..b60db8b6f477 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -254,8 +254,7 @@ static int dp83822_config_intr(struct phy_device *phydev) DP83822_EEE_ERROR_CHANGE_INT_EN); if (!dp83822->fx_enabled) - misr_status |= DP83822_MDI_XOVER_INT_EN | - DP83822_ANEG_ERR_INT_EN | + misr_status |= DP83822_ANEG_ERR_INT_EN | DP83822_WOL_PKT_INT_EN; err = phy_write(phydev, MII_DP83822_MISR2, misr_status); -- cgit