From 7a934b5cc3f452df6f9a4903450fc103dee98ee8 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 30 Oct 2023 15:17:34 +0800 Subject: wifi: mt76: mt7921: fix 6GHz disabled by the missing default CLC config No matter CLC is enabled or disabled, the driver should initialize the default value 0xff for channel configuration of CLC. Otherwise, the zero value would disable channels. Reported-and-tested-by: Ben Greear Closes: https://lore.kernel.org/all/2fb78387-d226-3193-8ca7-90040561b9ad@candelatech.com/ Fixes: 09382d8f8641 ("wifi: mt76: mt7921: update the channel usage when the regd domain changed") Signed-off-by: Ming Yen Hsieh Signed-off-by: Deren Wu Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/5a976ddf1f636b5cb809373501d3cfdc6d8de3e4.1698648737.git.deren.wu@mediatek.com --- drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index 63f3d4a5c9aa..2cc2d2788f83 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -375,6 +375,7 @@ static int mt7921_load_clc(struct mt792x_dev *dev, const char *fw_name) int ret, i, len, offset = 0; u8 *clc_base = NULL, hw_encap = 0; + dev->phy.clc_chan_conf = 0xff; if (mt7921_disable_clc || mt76_is_usb(&dev->mt76)) return 0; -- cgit From 695bfba7ca781dd41b5225148cc8cebd74c553c2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 13 Nov 2023 11:06:33 +0100 Subject: wifi: mt76: mt7925: fix typo in mt7925_init_he_caps Use iftype for interface type switch in mt7925_init_he_caps routine. This found during code review but later Coverity reported this with id 1549845. Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7de6e939dc75ee08f05bf1ee73253aa7eeccf28e.1699869649.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 15c2fb0bcb1b..aa918b9b0469 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -14,7 +14,7 @@ static void mt7925_init_he_caps(struct mt792x_phy *phy, enum nl80211_band band, struct ieee80211_sband_iftype_data *data, - enum nl80211_iftype iftype) + enum nl80211_iftype iftype) { struct ieee80211_sta_he_cap *he_cap = &data->he_cap; struct ieee80211_he_cap_elem *he_cap_elem = &he_cap->he_cap_elem; @@ -53,7 +53,7 @@ mt7925_init_he_caps(struct mt792x_phy *phy, enum nl80211_band band, IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO | IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO; - switch (i) { + switch (iftype) { case NL80211_IFTYPE_AP: he_cap_elem->mac_cap_info[2] |= IEEE80211_HE_MAC_CAP2_BSR; -- cgit From c0c6bde586c7dce82719b4ff32a2db6af9ee3d65 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 13 Nov 2023 20:54:52 -0800 Subject: netdevsim: Don't accept device bound programs Commit 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs") introduced device-bound programs by largely reusing existing offloading infrastructure. This changed the semantics of 'prog->aux->offload' a bit. Now, it's non-NULL for both offloaded and device-bound programs. Instead of looking at 'prog->aux->offload' let's call bpf_prog_is_offloaded which should be true iff the program is offloaded and not merely device-bound. Fixes: 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs") Reported-by: syzbot+44c2416196b7c607f226@syzkaller.appspotmail.com Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Cc: Dipendra Khadka Link: https://lore.kernel.org/bpf/20231114045453.1816995-2-sdf@google.com Signed-off-by: Alexei Starovoitov --- drivers/net/netdevsim/bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index f60eb97e3a62..608953d4f98d 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -93,7 +93,7 @@ static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) { struct nsim_bpf_bound_prog *state; - if (!prog || !prog->aux->offload) + if (!prog || !bpf_prog_is_offloaded(prog->aux)) return; state = prog->aux->offload->dev_priv; @@ -311,7 +311,7 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) if (!bpf->prog) return 0; - if (!bpf->prog->aux->offload) { + if (!bpf_prog_is_offloaded(bpf->prog->aux)) { NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); return -EINVAL; } -- cgit From 109b25d13e0054337860d44841b990d11b32d262 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Nov 2023 14:11:41 +0900 Subject: net: rswitch: Fix type of ret in rswitch_start_xmit() The type of ret in rswitch_start_xmit() should be netdev_tx_t. So, fix it. Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 43a7795d6591..d53d90020904 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1504,8 +1504,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd { struct rswitch_device *rdev = netdev_priv(ndev); struct rswitch_gwca_queue *gq = rdev->tx_queue; + netdev_tx_t ret = NETDEV_TX_OK; struct rswitch_ext_desc *desc; - int ret = NETDEV_TX_OK; dma_addr_t dma_addr; if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) { -- cgit From 1aaef8634a20b322c82e84f12a9b6aec1e2fd4fa Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Nov 2023 14:11:42 +0900 Subject: net: rswitch: Fix return value in rswitch_start_xmit() This .ndo_start_xmit() function should return netdev_tx_t value, not -ENOMEM. So, fix it. Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index d53d90020904..45bf9808c143 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1535,7 +1535,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); if (!ts_info) { dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); - return -ENOMEM; + return ret; } skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; -- cgit From 782486af9b5b76493012711413c141509ac45dec Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Nov 2023 14:11:43 +0900 Subject: net: rswitch: Fix missing dev_kfree_skb_any() in error path Before returning the rswitch_start_xmit() in the error path, dev_kfree_skb_any() should be called. So, fix it. Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 45bf9808c143..e77c6ff93d81 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1517,10 +1517,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd return ret; dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(ndev->dev.parent, dma_addr)) { - dev_kfree_skb_any(skb); - return ret; - } + if (dma_mapping_error(ndev->dev.parent, dma_addr)) + goto err_kfree; gq->skbs[gq->cur] = skb; desc = &gq->tx_ring[gq->cur]; @@ -1533,10 +1531,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd struct rswitch_gwca_ts_info *ts_info; ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); - if (!ts_info) { - dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); - return ret; - } + if (!ts_info) + goto err_unmap; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; rdev->ts_tag++; @@ -1558,6 +1554,14 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd gq->cur = rswitch_next_queue_index(gq, true, 1); rswitch_modify(rdev->addr, GWTRC(gq->index), 0, BIT(gq->index % 32)); + return ret; + +err_unmap: + dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); + +err_kfree: + dev_kfree_skb_any(skb); + return ret; } -- cgit From 237ff253f2d4f6307b7b20434d7cbcc67693298b Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Thu, 23 Nov 2023 09:23:39 +0800 Subject: mptcp: fix uninit-value in mptcp_incoming_options Added initialization use_ack to mptcp_parse_option(). Reported-by: syzbot+b834a6b2decad004cfa1@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index cd15ec73073e..c53914012d01 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -108,6 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; + mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } -- cgit From e2b706c691905fe78468c361aaabc719d0a496f1 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 23 Nov 2023 15:13:14 +0800 Subject: ipv4: igmp: fix refcnt uaf issue when receiving igmp query packet When I perform the following test operations: 1.ip link add br0 type bridge 2.brctl addif br0 eth0 3.ip addr add 239.0.0.1/32 dev eth0 4.ip addr add 239.0.0.1/32 dev br0 5.ip addr add 224.0.0.1/32 dev br0 6.while ((1)) do ifconfig br0 up ifconfig br0 down done 7.send IGMPv2 query packets to port eth0 continuously. For example, ./mausezahn ethX -c 0 "01 00 5e 00 00 01 00 72 19 88 aa 02 08 00 45 00 00 1c 00 01 00 00 01 02 0e 7f c0 a8 0a b7 e0 00 00 01 11 64 ee 9b 00 00 00 00" The preceding tests may trigger the refcnt uaf issue of the mc list. The stack is as follows: refcount_t: addition on 0; use-after-free. WARNING: CPU: 21 PID: 144 at lib/refcount.c:25 refcount_warn_saturate (lib/refcount.c:25) CPU: 21 PID: 144 Comm: ksoftirqd/21 Kdump: loaded Not tainted 6.7.0-rc1-next-20231117-dirty #80 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:refcount_warn_saturate (lib/refcount.c:25) RSP: 0018:ffffb68f00657910 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8a00c3bf96c0 RCX: ffff8a07b6160908 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff8a07b6160900 RBP: ffff8a00cba36862 R08: 0000000000000000 R09: 00000000ffff7fff R10: ffffb68f006577c0 R11: ffffffffb0fdcdc8 R12: ffff8a00c3bf9680 R13: ffff8a00c3bf96f0 R14: 0000000000000000 R15: ffff8a00d8766e00 FS: 0000000000000000(0000) GS:ffff8a07b6140000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f10b520b28 CR3: 000000039741a000 CR4: 00000000000006f0 Call Trace: igmp_heard_query (net/ipv4/igmp.c:1068) igmp_rcv (net/ipv4/igmp.c:1132) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205) ip_local_deliver_finish (net/ipv4/ip_input.c:234) __netif_receive_skb_one_core (net/core/dev.c:5529) netif_receive_skb_internal (net/core/dev.c:5729) netif_receive_skb (net/core/dev.c:5788) br_handle_frame_finish (net/bridge/br_input.c:216) nf_hook_bridge_pre (net/bridge/br_input.c:294) __netif_receive_skb_core (net/core/dev.c:5423) __netif_receive_skb_list_core (net/core/dev.c:5606) __netif_receive_skb_list (net/core/dev.c:5674) netif_receive_skb_list_internal (net/core/dev.c:5764) napi_gro_receive (net/core/gro.c:609) e1000_clean_rx_irq (drivers/net/ethernet/intel/e1000/e1000_main.c:4467) e1000_clean (drivers/net/ethernet/intel/e1000/e1000_main.c:3805) __napi_poll (net/core/dev.c:6533) net_rx_action (net/core/dev.c:6735) __do_softirq (kernel/softirq.c:554) run_ksoftirqd (kernel/softirq.c:913) smpboot_thread_fn (kernel/smpboot.c:164) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:250) The root causes are as follows: Thread A Thread B ... netif_receive_skb br_dev_stop ... br_multicast_leave_snoopers ... __ip_mc_dec_group ... __igmp_group_dropped igmp_rcv igmp_stop_timer igmp_heard_query //ref = 1 ip_ma_put igmp_mod_timer refcount_dec_and_test igmp_start_timer //ref = 0 ... refcount_inc //ref increases from 0 When the device receives an IGMPv2 Query message, it starts the timer immediately, regardless of whether the device is running. If the device is down and has left the multicast group, it will cause the mc list refcount uaf issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c3ea75b8dd..efeeca2b1328 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = get_random_u32_below(max_delay); im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) -- cgit From 71b5e40651d89a8685bea1592dfcd2aa61559628 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Sep 2023 15:40:41 +0300 Subject: wifi: iwlwifi: mvm: fix an error code in iwl_mvm_mld_add_sta() This error path should return -EINVAL instead of success. Fixes: 57974a55d995 ("wifi: iwlwifi: mvm: refactor iwl_mvm_mac_sta_state_common()") Signed-off-by: Dan Carpenter Acked-by: Gregory Greenman Link: https://lore.kernel.org/r/75e4ea09-db58-462f-bd4e-5ad4e5e5dcb5@moroto.mountain Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index ca5e4fbcf8ce..6af606e5da65 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -707,8 +707,10 @@ int iwl_mvm_mld_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, rcu_dereference_protected(mvm_sta->link[link_id], lockdep_is_held(&mvm->mutex)); - if (WARN_ON(!link_conf || !mvm_link_sta)) + if (WARN_ON(!link_conf || !mvm_link_sta)) { + ret = -EINVAL; goto err; + } ret = iwl_mvm_mld_cfg_sta(mvm, sta, vif, link_sta, link_conf, mvm_link_sta); -- cgit From 3e3a2b645c043f7e3e488d5011478cefb69bbe8b Mon Sep 17 00:00:00 2001 From: Oldřich Jedlička Date: Sat, 4 Nov 2023 15:13:33 +0100 Subject: wifi: mac80211: do not pass AP_VLAN vif pointer to drivers during flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes WARN_ONs when using AP_VLANs after station removal. The flush call passed AP_VLAN vif to driver, but because these vifs are virtual and not registered with drivers, we need to translate to the correct AP vif first. Closes: https://github.com/openwrt/openwrt/issues/12420 Fixes: 0b75a1b1e42e ("wifi: mac80211: flush queues on STA removal") Fixes: d00800a289c9 ("wifi: mac80211: add flush_sta method") Tested-by: Konstantin Demin Tested-by: Koen Vandeputte Signed-off-by: Oldřich Jedlička Link: https://lore.kernel.org/r/20231104141333.3710-1-oldium.pro@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 568633b38c47..f690c385a345 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -23,7 +23,7 @@ static inline struct ieee80211_sub_if_data * get_bss_sdata(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata && sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -695,11 +695,14 @@ static inline void drv_flush(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { - struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + struct ieee80211_vif *vif; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + sdata = get_bss_sdata(sdata); + vif = sdata ? &sdata->vif : NULL; + if (sdata && !check_sdata_in_driver(sdata)) return; @@ -716,6 +719,8 @@ static inline void drv_flush_sta(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + sdata = get_bss_sdata(sdata); + if (sdata && !check_sdata_in_driver(sdata)) return; -- cgit From 7e7efdda6adb385fbdfd6f819d76bc68c923c394 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 Nov 2023 23:17:16 +0100 Subject: wifi: cfg80211: fix CQM for non-range use My prior race fix here broke CQM when ranges aren't used, as the reporting worker now requires the cqm_config to be set in the wdev, but isn't set when there's no range configured. Rather than continuing to special-case the range version, set the cqm_config always and configure accordingly, also tracking if range was used or not to be able to clear the configuration appropriately with the same API, which was actually not right if both were implemented by a driver for some reason, as is the case with mac80211 (though there the implementations are equivalent so it doesn't matter.) Also, the original multiple-RSSI commit lost checking for the callback, so might have potentially crashed if a driver had neither implementation, and userspace tried to use it despite not being advertised as supported. Cc: stable@vger.kernel.org Fixes: 4a4b8169501b ("cfg80211: Accept multiple RSSI thresholds for CQM") Fixes: 37c20b2effe9 ("wifi: cfg80211: fix cqm_config access race") Signed-off-by: Johannes Berg --- net/wireless/core.h | 1 + net/wireless/nl80211.c | 50 +++++++++++++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index 4c692c7faf30..cb61d33d4f1e 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -293,6 +293,7 @@ struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; + bool use_range_api; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 569234bc2be6..dbfed5a2d7b6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12787,10 +12787,6 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int i, n, low_index; int err; - /* RSSI reporting disabled? */ - if (!cqm_config) - return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); - /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a @@ -12865,23 +12861,25 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { - if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ - return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); - - return rdev_set_cqm_rssi_config(rdev, dev, - thresholds[0], hysteresis); - } - - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_CQM_RSSI_LIST)) - return -EOPNOTSUPP; - if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; old = wiphy_dereference(wdev->wiphy, wdev->cqm_config); + /* if already disabled just succeed */ + if (!n_thresholds && !old) + return 0; + + if (n_thresholds > 1) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST) || + !rdev->ops->set_cqm_rssi_range_config) + return -EOPNOTSUPP; + } else { + if (!rdev->ops->set_cqm_rssi_config) + return -EOPNOTSUPP; + } + if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), @@ -12894,13 +12892,26 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); + cqm_config->use_range_api = n_thresholds > 1 || + !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); + + if (cqm_config->use_range_api) + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); + else + err = rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], + hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); + /* if enabled as range also disable via range */ + if (old->use_range_api) + err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + else + err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } - err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); @@ -19009,10 +19020,11 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) s32 rssi_level; cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config); - if (!wdev->cqm_config) + if (!cqm_config) return; - cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); + if (cqm_config->use_range_api) + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; -- cgit From 8e2f6f2366219b3304b227bdd2f04b64c92e3e12 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Nov 2023 13:41:25 +0100 Subject: wifi: cfg80211: lock wiphy mutex for rfkill poll We want to guarantee the mutex is held for pretty much all operations, so ensure that here as well. Reported-by: syzbot+7e59a5bfc7a897247e18@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 758c9a2a12c0..3f756ce46602 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -221,7 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) { struct cfg80211_registered_device *rdev = data; + wiphy_lock(&rdev->wiphy); rdev_rfkill_poll(rdev); + wiphy_unlock(&rdev->wiphy); } void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, -- cgit From 103317670e6bf2542309db28d52444a83d84ed28 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Nov 2023 13:06:16 +0100 Subject: wifi: cfg80211: hold wiphy mutex for send_interface Given all the locking rework in mac80211, we pretty much need to get into the driver with the wiphy mutex held in all callbacks. This is already mostly the case, but as Johan reported, in the get_txpower it may not be true. Lock the wiphy mutex around nl80211_send_iface(), then is also around callers of nl80211_notify_iface(). This is easy to do, fixes the problem, and aligns the locking between various calls to it in different parts of the code of cfg80211. Fixes: 0e8185ce1dde ("wifi: mac80211: check wiphy mutex in ops") Reported-by: Johan Hovold Closes: https://lore.kernel.org/r/ZVOXX6qg4vXEx8dX@hovoldconsulting.com Tested-by: Johan Hovold Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 ++-- net/wireless/nl80211.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 3f756ce46602..409d74c57ca0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -191,13 +191,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, return err; } + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); } - wiphy_lock(&rdev->wiphy); nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); wiphy_net_set(&rdev->wiphy, net); @@ -206,13 +206,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, WARN_ON(err); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); - wiphy_unlock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } + wiphy_unlock(&rdev->wiphy); return 0; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dbfed5a2d7b6..1cbbb11ea503 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3822,6 +3822,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag struct net_device *dev = wdev->netdev; void *hdr; + lockdep_assert_wiphy(&rdev->wiphy); + WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && cmd != NL80211_CMD_DEL_INTERFACE && cmd != NL80211_CMD_SET_INTERFACE); @@ -3989,6 +3991,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -3998,10 +4001,12 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { + wiphy_unlock(&rdev->wiphy); goto out; } if_idx++; } + wiphy_unlock(&rdev->wiphy); wp_idx++; } -- cgit From ef5828805842204dd0259ecfc132b5916c8a77ae Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Wed, 22 Nov 2023 11:02:37 +0800 Subject: wifi: avoid offset calculation on NULL pointer ieee80211_he_6ghz_oper() can be passed a NULL pointer and checks for that, but already did the calculation to inside of it before. Move it after the check. Signed-off-by: Michael-CY Lee Link: https://lore.kernel.org/r/20231122030237.31276-1-michael-cy.lee@mediatek.com [rewrite commit message] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 958771bac9c0..c2ac9e9e7ee9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2830,12 +2830,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { - const u8 *ret = (const void *)&he_oper->optional; + const u8 *ret; u32 he_oper_params; if (!he_oper) return NULL; + ret = (const void *)&he_oper->optional; + he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) -- cgit From 00f7d153f3358a7c7e35aef66fcd9ceb95d90430 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 9 Nov 2023 10:22:01 -0800 Subject: wifi: mac80211: handle 320 MHz in ieee80211_ht_cap_ie_to_sta_ht_cap The new 320 MHz channel width wasn't handled, so connecting a station to a 320 MHz AP would limit the station to 20 MHz (on HT) after a warning, handle 320 MHz to fix that. Signed-off-by: Ben Greear Link: https://lore.kernel.org/r/20231109182201.495381-1-greearb@candelatech.com [write a proper commit message] Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 68cea2685224..749f4ecab990 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -271,6 +271,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_320: bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; break; -- cgit From b3f1a164c7f742503dc7159011f7ad6b092b660e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 24 Nov 2023 14:15:28 +1000 Subject: net: dsa: mv88e6xxx: fix marvell 6350 switch probing As of commit de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Marvell 88e6350 switches fail to be probed: ... mv88e6085 d0072004.mdio-mii:11: switch 0x3710 detected: Marvell 88E6350, revision 2 mv88e6085 d0072004.mdio-mii:11: phylink: error: empty supported_interfaces error creating PHYLINK: -22 mv88e6085: probe of d0072004.mdio-mii:11 failed with error -22 ... The problem stems from the use of mv88e6185_phylink_get_caps() to get the device capabilities. Create a new dedicated phylink_get_caps for the 6351 family (which the 6350 is one of) to properly support their set of capabilities. According to chip.h the 6351 switch family includes the 6171, 6175, 6350 and 6351 switches, so update each of these to use the correct phylink_get_caps. Fixes: de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Signed-off-by: Greg Ungerer Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 42b1acaca33a..d8a67bf4e595 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -577,6 +577,18 @@ static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100; } +static void mv88e6351_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + unsigned long *supported = config->supported_interfaces; + + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; +} + static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip) { u16 reg, val; @@ -4340,7 +4352,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .stu_getnext = mv88e6352_g1_stu_getnext, .stu_loadpurge = mv88e6352_g1_stu_loadpurge, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6172_ops = { @@ -4440,7 +4452,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .stu_getnext = mv88e6352_g1_stu_getnext, .stu_loadpurge = mv88e6352_g1_stu_loadpurge, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6176_ops = { @@ -5069,7 +5081,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .stu_getnext = mv88e6352_g1_stu_getnext, .stu_loadpurge = mv88e6352_g1_stu_loadpurge, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6351_ops = { @@ -5117,7 +5129,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .stu_loadpurge = mv88e6352_g1_stu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6352_ops = { -- cgit From a524eabcd72d28425d9db242cf375d0389d74eba Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 24 Nov 2023 14:15:29 +1000 Subject: net: dsa: mv88e6xxx: fix marvell 6350 probe crash As of commit b92143d4420f ("net: dsa: mv88e6xxx: add infrastructure for phylink_pcs") probing of a Marvell 88e6350 switch causes a NULL pointer de-reference like this example: ... mv88e6085 d0072004.mdio-mii:11: switch 0x3710 detected: Marvell 88E6350, revision 2 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 when read [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] ARM Modules linked in: CPU: 0 PID: 8 Comm: kworker/u2:0 Not tainted 6.7.0-rc2-dirty #26 Hardware name: Marvell Armada 370/XP (Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at mv88e6xxx_port_setup+0x1c/0x44 LR is at dsa_port_devlink_setup+0x74/0x154 pc : [] lr : [] psr: a0000013 sp : c184fce0 ip : c542b8f4 fp : 00000000 r10: 00000001 r9 : c542a540 r8 : c542bc00 r7 : c542b838 r6 : c5244580 r5 : 00000005 r4 : c5244580 r3 : 00000000 r2 : c542b840 r1 : 00000005 r0 : c1a02040 ... The Marvell 6350 switch has no SERDES interface and so has no corresponding pcs_ops defined for it. But during probing a call is made to mv88e6xxx_port_setup() which unconditionally expects pcs_ops to exist - though the presence of the pcs_ops->pcs_init function is optional. Modify code to check for pcs_ops first, before checking for and calling pcs_ops->pcs_init. Modify checking and use of pcs_ops->pcs_teardown which may potentially suffer the same problem. Fixes: b92143d4420f ("net: dsa: mv88e6xxx: add infrastructure for phylink_pcs") Signed-off-by: Greg Ungerer Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d8a67bf4e595..07a22c74fe81 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3892,7 +3892,8 @@ static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port) struct mv88e6xxx_chip *chip = ds->priv; int err; - if (chip->info->ops->pcs_ops->pcs_init) { + if (chip->info->ops->pcs_ops && + chip->info->ops->pcs_ops->pcs_init) { err = chip->info->ops->pcs_ops->pcs_init(chip, port); if (err) return err; @@ -3907,7 +3908,8 @@ static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port) mv88e6xxx_teardown_devlink_regions_port(ds, port); - if (chip->info->ops->pcs_ops->pcs_teardown) + if (chip->info->ops->pcs_ops && + chip->info->ops->pcs_ops->pcs_teardown) chip->info->ops->pcs_ops->pcs_teardown(chip, port); } -- cgit From f422abe3f23d483cf01f386819f26fb3fe0dbb2b Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 24 Nov 2023 12:28:04 +0200 Subject: dpaa2-eth: increase the needed headroom to account for alignment Increase the needed headroom to account for a 64 byte alignment restriction which, with this patch, we make mandatory on the Tx path. The case in which the amount of headroom needed is not available is already handled by the driver which instead sends a S/G frame with the first buffer only holding the SW and HW annotation areas. Without this patch, we can empirically see data corruption happening between Tx and Tx confirmation which sometimes leads to the SW annotation area being overwritten. Since this is an old IP where the hardware team cannot help to understand the underlying behavior, we make the Tx alignment mandatory for all frames to avoid the crash on Tx conf. Also, remove the comment that suggested that this is just an optimization. This patch also sets the needed_headroom net device field to the usual value that the driver would need on the Tx path: - 64 bytes for the software annotation area - 64 bytes to account for a 64 byte aligned buffer address Fixes: 6e2387e8f19e ("staging: fsl-dpaa2/eth: Add Freescale DPAA2 Ethernet driver") Closes: https://lore.kernel.org/netdev/aa784d0c-85eb-4e5d-968b-c8f74fa86be6@gin.de/ Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 8 ++++---- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 15bab41cee48..774377db0b4b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1073,14 +1073,12 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, dma_addr_t addr; buffer_start = skb->data - dpaa2_eth_needed_headroom(skb); - - /* If there's enough room to align the FD address, do it. - * It will help hardware optimize accesses. - */ aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN, DPAA2_ETH_TX_BUF_ALIGN); if (aligned_start >= skb->head) buffer_start = aligned_start; + else + return -ENOMEM; /* Store a backpointer to the skb at the beginning of the buffer * (in the private data area) such that we can release it @@ -4967,6 +4965,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) if (err) goto err_dl_port_add; + net_dev->needed_headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN; + err = register_netdev(net_dev); if (err < 0) { dev_err(dev, "register_netdev() failed\n"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index bfb6c96c3b2f..834cba8c3a41 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -740,7 +740,7 @@ static inline bool dpaa2_eth_rx_pause_enabled(u64 link_options) static inline unsigned int dpaa2_eth_needed_headroom(struct sk_buff *skb) { - unsigned int headroom = DPAA2_ETH_SWA_SIZE; + unsigned int headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN; /* If we don't have an skb (e.g. XDP buffer), we only need space for * the software annotation area -- cgit From beb1930f966d1517921488bd5d64147f58f79abf Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 24 Nov 2023 12:28:05 +0200 Subject: dpaa2-eth: recycle the RX buffer only after all processing done The blamed commit added support for Rx copybreak. This meant that for certain frame sizes, a new skb was allocated and the initial data buffer was recycled. Instead of waiting to recycle the Rx buffer only after all processing was done on it (like accessing the parse results or timestamp information), the code path just went ahead and re-used the buffer right away. This sometimes lead to corrupted HW and SW annotation areas. Fix this by delaying the moment when the buffer is recycled. Fixes: 50f826999a80 ("dpaa2-eth: add rx copybreak support") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 774377db0b4b..888509cf1f21 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -516,8 +516,6 @@ struct sk_buff *dpaa2_eth_alloc_skb(struct dpaa2_eth_priv *priv, memcpy(skb->data, fd_vaddr + fd_offset, fd_length); - dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd)); - return skb; } @@ -589,6 +587,7 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, struct rtnl_link_stats64 *percpu_stats; struct dpaa2_eth_drv_stats *percpu_extras; struct device *dev = priv->net_dev->dev.parent; + bool recycle_rx_buf = false; void *buf_data; u32 xdp_act; @@ -618,6 +617,8 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, dma_unmap_page(dev, addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr); + } else { + recycle_rx_buf = true; } } else if (fd_format == dpaa2_fd_sg) { WARN_ON(priv->xdp_prog); @@ -637,6 +638,9 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, goto err_build_skb; dpaa2_eth_receive_skb(priv, ch, fd, vaddr, fq, percpu_stats, skb); + + if (recycle_rx_buf) + dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd)); return; err_build_skb: -- cgit From 75a442581d05edaee168222ffbe00d4389785636 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 11 Nov 2023 12:38:21 +0800 Subject: bpf: Add missed allocation hint for bpf_mem_cache_alloc_flags() bpf_mem_cache_alloc_flags() may call __alloc() directly when there is no free object in free list, but it doesn't initialize the allocation hint for the returned pointer. It may lead to bad memory dereference when freeing the pointer, so fix it by initializing the allocation hint. Fixes: 822fb26bdb55 ("bpf: Add a hint to allocated objects.") Signed-off-by: Hou Tao Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231111043821.2258513-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 63b909d277d4..6a51cfe4c2d6 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -978,6 +978,8 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT); + if (ret) + *(struct bpf_mem_cache **)ret = c; set_active_memcg(old_memcg); mem_cgroup_put(memcg); } -- cgit From 0ed04a1847a10297595ac24dc7d46b35fb35f90a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2023 17:25:24 +0100 Subject: debugfs: fix automount d_fsdata usage debugfs_create_automount() stores a function pointer in d_fsdata, but since commit 7c8d469877b1 ("debugfs: add support for more elaborate ->d_fsdata") debugfs_release_dentry() will free it, now conditionally on DEBUGFS_FSDATA_IS_REAL_FOPS_BIT, but that's not set for the function pointer in automount. As a result, removing an automount dentry would attempt to free the function pointer. Luckily, the only user of this (tracing) never removes it. Nevertheless, it's safer if we just handle the fsdata in one way, namely either DEBUGFS_FSDATA_IS_REAL_FOPS_BIT or allocated. Thus, change the automount to allocate it, and use the real_fops in the data to indicate whether or not automount is filled, rather than adding a type tag. At least for now this isn't actually needed, but the next changes will require it. Also check in debugfs_file_get() that it gets only called on regular files, just to make things clearer. Acked-by: Greg Kroah-Hartman Signed-off-by: Johannes Berg --- fs/debugfs/file.c | 8 ++++++++ fs/debugfs/inode.c | 27 ++++++++++++++++++++------- fs/debugfs/internal.h | 10 ++++++++-- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 87b3753aa4b1..e00189aebbf4 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -84,6 +84,14 @@ int debugfs_file_get(struct dentry *dentry) struct debugfs_fsdata *fsd; void *d_fsd; + /* + * This could only happen if some debugfs user erroneously calls + * debugfs_file_get() on a dentry that isn't even a file, let + * them know about it. + */ + if (WARN_ON(!d_is_reg(dentry))) + return -EINVAL; + d_fsd = READ_ONCE(dentry->d_fsdata); if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { fsd = d_fsd; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 83e57e9f9fa0..dcde4199a625 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -236,17 +236,19 @@ static const struct super_operations debugfs_super_operations = { static void debugfs_release_dentry(struct dentry *dentry) { - void *fsd = dentry->d_fsdata; + struct debugfs_fsdata *fsd = dentry->d_fsdata; - if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) - kfree(dentry->d_fsdata); + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) + return; + + kfree(fsd); } static struct vfsmount *debugfs_automount(struct path *path) { - debugfs_automount_t f; - f = (debugfs_automount_t)path->dentry->d_fsdata; - return f(path->dentry, d_inode(path->dentry)->i_private); + struct debugfs_fsdata *fsd = path->dentry->d_fsdata; + + return fsd->automount(path->dentry, d_inode(path->dentry)->i_private); } static const struct dentry_operations debugfs_dops = { @@ -634,13 +636,23 @@ struct dentry *debugfs_create_automount(const char *name, void *data) { struct dentry *dentry = start_creating(name, parent); + struct debugfs_fsdata *fsd; struct inode *inode; if (IS_ERR(dentry)) return dentry; + fsd = kzalloc(sizeof(*fsd), GFP_KERNEL); + if (!fsd) { + failed_creating(dentry); + return ERR_PTR(-ENOMEM); + } + + fsd->automount = f; + if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); + kfree(fsd); return ERR_PTR(-EPERM); } @@ -648,13 +660,14 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", name); + kfree(fsd); return failed_creating(dentry); } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; - dentry->d_fsdata = (void *)f; + dentry->d_fsdata = fsd; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index 92af8ae31313..f7c489b5a368 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -17,8 +17,14 @@ extern const struct file_operations debugfs_full_proxy_file_operations; struct debugfs_fsdata { const struct file_operations *real_fops; - refcount_t active_users; - struct completion active_users_drained; + union { + /* automount_fn is used when real_fops is NULL */ + debugfs_automount_t automount; + struct { + refcount_t active_users; + struct completion active_users_drained; + }; + }; }; /* -- cgit From f4acfcd4deb158b96595250cc332901b282d15b0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2023 17:25:25 +0100 Subject: debugfs: annotate debugfs handlers vs. removal with lockdep When you take a lock in a debugfs handler but also try to remove the debugfs file under that lock, things can deadlock since the removal has to wait for all users to finish. Add lockdep annotations in debugfs_file_get()/_put() to catch such issues. Acked-by: Greg Kroah-Hartman Signed-off-by: Johannes Berg --- fs/debugfs/file.c | 10 ++++++++++ fs/debugfs/inode.c | 12 ++++++++++++ fs/debugfs/internal.h | 6 ++++++ 3 files changed, 28 insertions(+) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index e00189aebbf4..3eff92450fd5 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -108,6 +108,12 @@ int debugfs_file_get(struct dentry *dentry) kfree(fsd); fsd = READ_ONCE(dentry->d_fsdata); } +#ifdef CONFIG_LOCKDEP + fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry); + lockdep_register_key(&fsd->key); + lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs", + &fsd->key, 0); +#endif } /* @@ -124,6 +130,8 @@ int debugfs_file_get(struct dentry *dentry) if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; + lock_map_acquire_read(&fsd->lockdep_map); + return 0; } EXPORT_SYMBOL_GPL(debugfs_file_get); @@ -141,6 +149,8 @@ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); + lock_map_release(&fsd->lockdep_map); + if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index dcde4199a625..80f4f000dcc1 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -241,6 +241,14 @@ static void debugfs_release_dentry(struct dentry *dentry) if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; + /* check it wasn't a dir (no fsdata) or automount (no real_fops) */ + if (fsd && fsd->real_fops) { +#ifdef CONFIG_LOCKDEP + lockdep_unregister_key(&fsd->key); + kfree(fsd->lock_name); +#endif + } + kfree(fsd); } @@ -744,6 +752,10 @@ static void __debugfs_file_removed(struct dentry *dentry) fsd = READ_ONCE(dentry->d_fsdata); if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; + + lock_map_acquire(&fsd->lockdep_map); + lock_map_release(&fsd->lockdep_map); + if (!refcount_dec_and_test(&fsd->active_users)) wait_for_completion(&fsd->active_users_drained); } diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index f7c489b5a368..c7d61cfc97d2 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -7,6 +7,7 @@ #ifndef _DEBUGFS_INTERNAL_H_ #define _DEBUGFS_INTERNAL_H_ +#include struct file_operations; @@ -23,6 +24,11 @@ struct debugfs_fsdata { struct { refcount_t active_users; struct completion active_users_drained; +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; + struct lock_class_key key; + char *lock_name; +#endif }; }; }; -- cgit From 8c88a474357ead632b07c70bf7f119ace8c3b39e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2023 17:25:26 +0100 Subject: debugfs: add API to allow debugfs operations cancellation In some cases there might be longer-running hardware accesses in debugfs files, or attempts to acquire locks, and we want to still be able to quickly remove the files. Introduce a cancellations API to use inside the debugfs handler functions to be able to cancel such operations on a per-file basis. Acked-by: Greg Kroah-Hartman Signed-off-by: Johannes Berg --- fs/debugfs/file.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/debugfs/inode.c | 32 ++++++++++++++++++- fs/debugfs/internal.h | 5 +++ include/linux/debugfs.h | 19 ++++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 3eff92450fd5..5568cdea3490 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -114,6 +114,8 @@ int debugfs_file_get(struct dentry *dentry) lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs", &fsd->key, 0); #endif + INIT_LIST_HEAD(&fsd->cancellations); + mutex_init(&fsd->cancellations_mtx); } /* @@ -156,6 +158,86 @@ void debugfs_file_put(struct dentry *dentry) } EXPORT_SYMBOL_GPL(debugfs_file_put); +/** + * debugfs_enter_cancellation - enter a debugfs cancellation + * @file: the file being accessed + * @cancellation: the cancellation object, the cancel callback + * inside of it must be initialized + * + * When a debugfs file is removed it needs to wait for all active + * operations to complete. However, the operation itself may need + * to wait for hardware or completion of some asynchronous process + * or similar. As such, it may need to be cancelled to avoid long + * waits or even deadlocks. + * + * This function can be used inside a debugfs handler that may + * need to be cancelled. As soon as this function is called, the + * cancellation's 'cancel' callback may be called, at which point + * the caller should proceed to call debugfs_leave_cancellation() + * and leave the debugfs handler function as soon as possible. + * Note that the 'cancel' callback is only ever called in the + * context of some kind of debugfs_remove(). + * + * This function must be paired with debugfs_leave_cancellation(). + */ +void debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation) +{ + struct debugfs_fsdata *fsd; + struct dentry *dentry = F_DENTRY(file); + + INIT_LIST_HEAD(&cancellation->list); + + if (WARN_ON(!d_is_reg(dentry))) + return; + + if (WARN_ON(!cancellation->cancel)) + return; + + fsd = READ_ONCE(dentry->d_fsdata); + if (WARN_ON(!fsd || + ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + return; + + mutex_lock(&fsd->cancellations_mtx); + list_add(&cancellation->list, &fsd->cancellations); + mutex_unlock(&fsd->cancellations_mtx); + + /* if we're already removing wake it up to cancel */ + if (d_unlinked(dentry)) + complete(&fsd->active_users_drained); +} +EXPORT_SYMBOL_GPL(debugfs_enter_cancellation); + +/** + * debugfs_leave_cancellation - leave cancellation section + * @file: the file being accessed + * @cancellation: the cancellation previously registered with + * debugfs_enter_cancellation() + * + * See the documentation of debugfs_enter_cancellation(). + */ +void debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation) +{ + struct debugfs_fsdata *fsd; + struct dentry *dentry = F_DENTRY(file); + + if (WARN_ON(!d_is_reg(dentry))) + return; + + fsd = READ_ONCE(dentry->d_fsdata); + if (WARN_ON(!fsd || + ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + return; + + mutex_lock(&fsd->cancellations_mtx); + if (!list_empty(&cancellation->list)) + list_del(&cancellation->list); + mutex_unlock(&fsd->cancellations_mtx); +} +EXPORT_SYMBOL_GPL(debugfs_leave_cancellation); + /* * Only permit access to world-readable files when the kernel is locked down. * We also need to exclude any file that has ways to write or alter it as root diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 80f4f000dcc1..d53c2860b03c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -247,6 +247,8 @@ static void debugfs_release_dentry(struct dentry *dentry) lockdep_unregister_key(&fsd->key); kfree(fsd->lock_name); #endif + WARN_ON(!list_empty(&fsd->cancellations)); + mutex_destroy(&fsd->cancellations_mtx); } kfree(fsd); @@ -756,8 +758,36 @@ static void __debugfs_file_removed(struct dentry *dentry) lock_map_acquire(&fsd->lockdep_map); lock_map_release(&fsd->lockdep_map); - if (!refcount_dec_and_test(&fsd->active_users)) + /* if we hit zero, just wait for all to finish */ + if (!refcount_dec_and_test(&fsd->active_users)) { wait_for_completion(&fsd->active_users_drained); + return; + } + + /* if we didn't hit zero, try to cancel any we can */ + while (refcount_read(&fsd->active_users)) { + struct debugfs_cancellation *c; + + /* + * Lock the cancellations. Note that the cancellations + * structs are meant to be on the stack, so we need to + * ensure we either use them here or don't touch them, + * and debugfs_leave_cancellation() will wait for this + * to be finished processing before exiting one. It may + * of course win and remove the cancellation, but then + * chances are we never even got into this bit, we only + * do if the refcount isn't zero already. + */ + mutex_lock(&fsd->cancellations_mtx); + while ((c = list_first_entry_or_null(&fsd->cancellations, + typeof(*c), list))) { + list_del_init(&c->list); + c->cancel(dentry, c->cancel_data); + } + mutex_unlock(&fsd->cancellations_mtx); + + wait_for_completion(&fsd->active_users_drained); + } } static void remove_one(struct dentry *victim) diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index c7d61cfc97d2..0c4c68cf161f 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -8,6 +8,7 @@ #ifndef _DEBUGFS_INTERNAL_H_ #define _DEBUGFS_INTERNAL_H_ #include +#include struct file_operations; @@ -29,6 +30,10 @@ struct debugfs_fsdata { struct lock_class_key key; char *lock_name; #endif + + /* protect cancellations */ + struct mutex cancellations_mtx; + struct list_head cancellations; }; }; }; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index ea2d919fd9c7..c9c65b132c0f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos); +/** + * struct debugfs_cancellation - cancellation data + * @list: internal, for keeping track + * @cancel: callback to call + * @cancel_data: extra data for the callback to call + */ +struct debugfs_cancellation { + struct list_head list; + void (*cancel)(struct dentry *, void *); + void *cancel_data; +}; + +void __acquires(cancellation) +debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); +void __releases(cancellation) +debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); + #else #include -- cgit From b590b9ae1efc30e52f81d95cdb2519a4c248b965 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2023 17:25:27 +0100 Subject: wifi: cfg80211: add locked debugfs wrappers Add wrappers for debugfs files that should be called with the wiphy mutex held, while the file is also to be removed under the wiphy mutex. This could otherwise deadlock when a file is trying to acquire the wiphy mutex while the code removing it holds the mutex but waits for the removal. This actually works by pushing the execution of the read or write handler to a wiphy work that can be cancelled using the debugfs cancellation API. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 46 ++++++++++++++ net/wireless/debugfs.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b137a33a1b68..4ecfb06c413d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -9299,4 +9299,50 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, */ void cfg80211_links_removed(struct net_device *dev, u16 link_mask); +#ifdef CONFIG_CFG80211_DEBUGFS +/** + * wiphy_locked_debugfs_read - do a locked read in debugfs + * @wiphy: the wiphy to use + * @file: the file being read + * @buf: the buffer to fill and then read from + * @bufsize: size of the buffer + * @userbuf: the user buffer to copy to + * @count: read count + * @ppos: read position + * @handler: the read handler to call (under wiphy lock) + * @data: additional data to pass to the read handler + */ +ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data), + void *data); + +/** + * wiphy_locked_debugfs_write - do a locked write in debugfs + * @wiphy: the wiphy to use + * @file: the file being written to + * @buf: the buffer to copy the user data to + * @bufsize: size of the buffer + * @userbuf: the user buffer to copy from + * @count: read count + * @handler: the write handler to call (under wiphy lock) + * @data: additional data to pass to the write handler + */ +ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + const char __user *userbuf, size_t count, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data), + void *data); +#endif + #endif /* __NET_CFG80211_H */ diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 0878b162890a..40e49074e2ee 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -4,6 +4,7 @@ * * Copyright 2009 Luis R. Rodriguez * Copyright 2007 Johannes Berg + * Copyright (C) 2023 Intel Corporation */ #include @@ -109,3 +110,162 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) DEBUGFS_ADD(long_retry_limit); DEBUGFS_ADD(ht40allow_map); } + +struct debugfs_read_work { + struct wiphy_work work; + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data); + struct wiphy *wiphy; + struct file *file; + char *buf; + size_t bufsize; + void *data; + ssize_t ret; + struct completion completion; +}; + +static void wiphy_locked_debugfs_read_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct debugfs_read_work *w = container_of(work, typeof(*w), work); + + w->ret = w->handler(w->wiphy, w->file, w->buf, w->bufsize, w->data); + complete(&w->completion); +} + +static void wiphy_locked_debugfs_read_cancel(struct dentry *dentry, + void *data) +{ + struct debugfs_read_work *w = data; + + wiphy_work_cancel(w->wiphy, &w->work); + complete(&w->completion); +} + +ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data), + void *data) +{ + struct debugfs_read_work work = { + .handler = handler, + .wiphy = wiphy, + .file = file, + .buf = buf, + .bufsize = bufsize, + .data = data, + .ret = -ENODEV, + .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion), + }; + struct debugfs_cancellation cancellation = { + .cancel = wiphy_locked_debugfs_read_cancel, + .cancel_data = &work, + }; + + /* don't leak stack data or whatever */ + memset(buf, 0, bufsize); + + wiphy_work_init(&work.work, wiphy_locked_debugfs_read_work); + wiphy_work_queue(wiphy, &work.work); + + debugfs_enter_cancellation(file, &cancellation); + wait_for_completion(&work.completion); + debugfs_leave_cancellation(file, &cancellation); + + if (work.ret < 0) + return work.ret; + + if (WARN_ON(work.ret > bufsize)) + return -EINVAL; + + return simple_read_from_buffer(userbuf, count, ppos, buf, work.ret); +} +EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_read); + +struct debugfs_write_work { + struct wiphy_work work; + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data); + struct wiphy *wiphy; + struct file *file; + char *buf; + size_t count; + void *data; + ssize_t ret; + struct completion completion; +}; + +static void wiphy_locked_debugfs_write_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct debugfs_write_work *w = container_of(work, typeof(*w), work); + + w->ret = w->handler(w->wiphy, w->file, w->buf, w->count, w->data); + complete(&w->completion); +} + +static void wiphy_locked_debugfs_write_cancel(struct dentry *dentry, + void *data) +{ + struct debugfs_write_work *w = data; + + wiphy_work_cancel(w->wiphy, &w->work); + complete(&w->completion); +} + +ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, + struct file *file, char *buf, size_t bufsize, + const char __user *userbuf, size_t count, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data), + void *data) +{ + struct debugfs_write_work work = { + .handler = handler, + .wiphy = wiphy, + .file = file, + .buf = buf, + .count = count, + .data = data, + .ret = -ENODEV, + .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion), + }; + struct debugfs_cancellation cancellation = { + .cancel = wiphy_locked_debugfs_write_cancel, + .cancel_data = &work, + }; + + /* mostly used for strings so enforce NUL-termination for safety */ + if (count >= bufsize) + return -EINVAL; + + memset(buf, 0, bufsize); + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + wiphy_work_init(&work.work, wiphy_locked_debugfs_write_work); + wiphy_work_queue(wiphy, &work.work); + + debugfs_enter_cancellation(file, &cancellation); + wait_for_completion(&work.completion); + debugfs_leave_cancellation(file, &cancellation); + + return work.ret; +} +EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_write); -- cgit From 3d529cd11f2b6c1c3b8e084269152eb30fbb96f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2023 17:25:28 +0100 Subject: wifi: mac80211: use wiphy locked debugfs helpers for agg_status The read is currently with RCU and the write can deadlock, convert both for the sake of illustration. Make mac80211 depend on cfg80211 debugfs to get the helpers, but mac80211 debugfs without it does nothing anyway. This also required some adjustments in ath9k. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/Kconfig | 4 +- net/mac80211/Kconfig | 2 +- net/mac80211/debugfs_sta.c | 74 +++++++++++++++++++--------------- 3 files changed, 44 insertions(+), 36 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index e150d82eddb6..0c47be06c153 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -57,8 +57,7 @@ config ATH9K_AHB config ATH9K_DEBUGFS bool "Atheros ath9k debugging" - depends on ATH9K && DEBUG_FS - select MAC80211_DEBUGFS + depends on ATH9K && DEBUG_FS && MAC80211_DEBUGFS select ATH9K_COMMON_DEBUG help Say Y, if you need access to ath9k's statistics for @@ -70,7 +69,6 @@ config ATH9K_DEBUGFS config ATH9K_STATION_STATISTICS bool "Detailed station statistics" depends on ATH9K && ATH9K_DEBUGFS && DEBUG_FS - select MAC80211_DEBUGFS default n help This option enables detailed statistics for association stations. diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 037ab74f5ade..cb0291decf2e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -88,7 +88,7 @@ config MAC80211_LEDS config MAC80211_DEBUGFS bool "Export mac80211 internals in DebugFS" - depends on MAC80211 && DEBUG_FS + depends on MAC80211 && CFG80211_DEBUGFS help Select this to see extensive information about the internal state of mac80211 in debugfs. diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 06e3613bf46b..5bf507ebb096 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -312,23 +312,14 @@ static ssize_t sta_aql_write(struct file *file, const char __user *userbuf, STA_OPS_RW(aql); -static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsz, void *data) { - char *buf, *p; - ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + struct sta_info *sta = data; + char *p = buf; int i; - struct sta_info *sta = file->private_data; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; - ssize_t ret; - - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - rcu_read_lock(); p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); @@ -338,8 +329,8 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, for (i = 0; i < IEEE80211_NUM_TIDS; i++) { bool tid_rx_valid; - tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); - tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); + tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]); + tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]); tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); p += scnprintf(p, bufsz + buf - p, "%02d", i); @@ -358,31 +349,39 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, tid_tx ? skb_queue_len(&tid_tx->pending) : 0); p += scnprintf(p, bufsz + buf - p, "\n"); } - rcu_read_unlock(); - ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + return p - buf; +} + +static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct wiphy *wiphy = sta->local->hw.wiphy; + size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + char *buf = kmalloc(bufsz, GFP_KERNEL); + ssize_t ret; + + if (!buf) + return -ENOMEM; + + ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz, + userbuf, count, ppos, + sta_agg_status_do_read, sta); kfree(buf); + return ret; } -static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file, + char *buf, size_t count, void *data) { - char _buf[25] = {}, *buf = _buf; - struct sta_info *sta = file->private_data; + struct sta_info *sta = data; bool start, tx; unsigned long tid; - char *pos; + char *pos = buf; int ret, timeout = 5000; - if (count > sizeof(_buf)) - return -EINVAL; - - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - - buf[sizeof(_buf) - 1] = '\0'; - pos = buf; buf = strsep(&pos, " "); if (!buf) return -EINVAL; @@ -420,7 +419,6 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (ret || tid >= IEEE80211_NUM_TIDS) return -EINVAL; - wiphy_lock(sta->local->hw.wiphy); if (tx) { if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid, @@ -432,10 +430,22 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu 3, true); ret = 0; } - wiphy_unlock(sta->local->hw.wiphy); return ret ?: count; } + +static ssize_t sta_agg_status_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct wiphy *wiphy = sta->local->hw.wiphy; + char _buf[26]; + + return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf), + userbuf, count, + sta_agg_status_do_write, sta); +} STA_OPS_RW(agg_status); /* link sta attributes */ -- cgit From 4ded3bfe1db655367642aadba91aee770cbab317 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Nov 2023 17:25:29 +0100 Subject: wifi: mac80211: use wiphy locked debugfs for sdata/link The debugfs files for netdevs (sdata) and links are removed with the wiphy mutex held, which may deadlock. Use the new wiphy locked debugfs to avoid that. Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 150 +++++++++++++++++++++++++++++------------- 1 file changed, 105 insertions(+), 45 deletions(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index ec91e131b29e..80aeb25f1b68 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -22,88 +22,148 @@ #include "debugfs_netdev.h" #include "driver-ops.h" +struct ieee80211_if_read_sdata_data { + ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int); + struct ieee80211_sub_if_data *sdata; +}; + +static ssize_t ieee80211_if_read_sdata_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data) +{ + struct ieee80211_if_read_sdata_data *d = data; + + return d->format(d->sdata, buf, bufsize); +} + static ssize_t ieee80211_if_read_sdata( - struct ieee80211_sub_if_data *sdata, + struct file *file, char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int)) { + struct ieee80211_sub_if_data *sdata = file->private_data; + struct ieee80211_if_read_sdata_data data = { + .format = format, + .sdata = sdata, + }; char buf[200]; - ssize_t ret = -EINVAL; - wiphy_lock(sdata->local->hw.wiphy); - ret = (*format)(sdata, buf, sizeof(buf)); - wiphy_unlock(sdata->local->hw.wiphy); + return wiphy_locked_debugfs_read(sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, ppos, + ieee80211_if_read_sdata_handler, + &data); +} - if (ret >= 0) - ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); +struct ieee80211_if_write_sdata_data { + ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int); + struct ieee80211_sub_if_data *sdata; +}; + +static ssize_t ieee80211_if_write_sdata_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data) +{ + struct ieee80211_if_write_sdata_data *d = data; - return ret; + return d->write(d->sdata, buf, count); } static ssize_t ieee80211_if_write_sdata( - struct ieee80211_sub_if_data *sdata, + struct file *file, const char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int)) { + struct ieee80211_sub_if_data *sdata = file->private_data; + struct ieee80211_if_write_sdata_data data = { + .write = write, + .sdata = sdata, + }; char buf[64]; - ssize_t ret; - if (count >= sizeof(buf)) - return -E2BIG; + return wiphy_locked_debugfs_write(sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, + ieee80211_if_write_sdata_handler, + &data); +} - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - buf[count] = '\0'; +struct ieee80211_if_read_link_data { + ssize_t (*format)(const struct ieee80211_link_data *, char *, int); + struct ieee80211_link_data *link; +}; - wiphy_lock(sdata->local->hw.wiphy); - ret = (*write)(sdata, buf, count); - wiphy_unlock(sdata->local->hw.wiphy); +static ssize_t ieee80211_if_read_link_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data) +{ + struct ieee80211_if_read_link_data *d = data; - return ret; + return d->format(d->link, buf, bufsize); } static ssize_t ieee80211_if_read_link( - struct ieee80211_link_data *link, + struct file *file, char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_link_data *link, char *, int)) { + struct ieee80211_link_data *link = file->private_data; + struct ieee80211_if_read_link_data data = { + .format = format, + .link = link, + }; char buf[200]; - ssize_t ret = -EINVAL; - wiphy_lock(link->sdata->local->hw.wiphy); - ret = (*format)(link, buf, sizeof(buf)); - wiphy_unlock(link->sdata->local->hw.wiphy); + return wiphy_locked_debugfs_read(link->sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, ppos, + ieee80211_if_read_link_handler, + &data); +} + +struct ieee80211_if_write_link_data { + ssize_t (*write)(struct ieee80211_link_data *, const char *, int); + struct ieee80211_link_data *link; +}; - if (ret >= 0) - ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); +static ssize_t ieee80211_if_write_link_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data) +{ + struct ieee80211_if_write_sdata_data *d = data; - return ret; + return d->write(d->sdata, buf, count); } static ssize_t ieee80211_if_write_link( - struct ieee80211_link_data *link, + struct file *file, const char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*write)(struct ieee80211_link_data *link, const char *, int)) { + struct ieee80211_link_data *link = file->private_data; + struct ieee80211_if_write_link_data data = { + .write = write, + .link = link, + }; char buf[64]; - ssize_t ret; - - if (count >= sizeof(buf)) - return -E2BIG; - - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - buf[count] = '\0'; - - wiphy_lock(link->sdata->local->hw.wiphy); - ret = (*write)(link, buf, count); - wiphy_unlock(link->sdata->local->hw.wiphy); - return ret; + return wiphy_locked_debugfs_write(link->sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, + ieee80211_if_write_link_handler, + &data); } #define IEEE80211_IF_FMT(name, type, field, format_string) \ @@ -173,7 +233,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_read_sdata(file->private_data, \ + return ieee80211_if_read_sdata(file, \ userbuf, count, ppos, \ ieee80211_if_fmt_##name); \ } @@ -183,7 +243,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_write_sdata(file->private_data, userbuf, \ + return ieee80211_if_write_sdata(file, userbuf, \ count, ppos, \ ieee80211_if_parse_##name); \ } @@ -211,7 +271,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_read_link(file->private_data, \ + return ieee80211_if_read_link(file, \ userbuf, count, ppos, \ ieee80211_if_fmt_##name); \ } @@ -221,7 +281,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_write_link(file->private_data, userbuf, \ + return ieee80211_if_write_link(file, userbuf, \ count, ppos, \ ieee80211_if_parse_##name); \ } -- cgit From 0bad281d0ecdf8391b0f42678b663336e7c3ceb0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 27 Nov 2023 21:05:33 +0100 Subject: netkit: Reject IFLA_NETKIT_PEER_INFO in netkit_change_link The IFLA_NETKIT_PEER_INFO attribute can only be used during device creation, but not via changelink callback. Hence reject it there. Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") Signed-off-by: Daniel Borkmann Acked-by: Nikolay Aleksandrov Cc: Jakub Kicinski Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/e86a277a1e8d3b19890312779e42f790b0605ea4.1701115314.git.daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 97bd6705c241..39171380ccf2 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -851,6 +851,12 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], return -EACCES; } + if (data[IFLA_NETKIT_PEER_INFO]) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO], + "netkit peer info cannot be changed after device creation"); + return -EINVAL; + } + if (data[IFLA_NETKIT_POLICY]) { attr = data[IFLA_NETKIT_POLICY]; policy = nla_get_u32(attr); -- cgit From 088559815477c6f623a5db5993491ddd7facbec7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:19 -0500 Subject: selftests/net: ipsec: fix constant out of range Fix a small compiler warning. nr_process must be a signed long: it is assigned a signed long by strtol() and is compared against LONG_MIN and LONG_MAX. ipsec.c:2280:65: error: result of comparison of constant -9223372036854775808 with expression of type 'unsigned int' is always false [-Werror,-Wtautological-constant-out-of-range-compare] if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) Fixes: bc2652b7ae1e ("selftest/net/xfrm: Add test for ipsec tunnel") Signed-off-by: Willem de Bruijn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20231124171645.1011043-2-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ipsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 9a8229abfa02..be4a30a0d02a 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -2263,7 +2263,7 @@ static int check_results(void) int main(int argc, char **argv) { - unsigned int nr_process = 1; + long nr_process = 1; int route_sock = -1, ret = KSFT_SKIP; int test_desc_fd[2]; uint32_t route_seq; @@ -2284,7 +2284,7 @@ int main(int argc, char **argv) exit_usage(argv); } - if (nr_process > MAX_PROCESSES || !nr_process) { + if (nr_process > MAX_PROCESSES || nr_process < 1) { printk("nr_process should be between [1; %u]", MAX_PROCESSES); exit_usage(argv); -- cgit From 7b29828c5af6841bdeb9fafa32fdfeff7ab9c407 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:20 -0500 Subject: selftests/net: fix a char signedness issue Signedness of char is signed on x86_64, but unsigned on arm64. Fix the warning building cmsg_sender.c on signed platforms or forced with -fsigned-char: msg_sender.c:455:12: error: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Werror,-Wconstant-conversion] buf[0] = ICMPV6_ECHO_REQUEST; constant ICMPV6_ECHO_REQUEST is 128. Link: https://lwn.net/Articles/911914 Fixes: de17e305a810 ("selftests: net: cmsg_sender: support icmp and raw sockets") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231124171645.1011043-3-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/cmsg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..6ff3e732f449 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -416,9 +416,9 @@ int main(int argc, char *argv[]) { struct addrinfo hints, *ai; struct iovec iov[1]; + unsigned char *buf; struct msghdr msg; char cbuf[1024]; - char *buf; int err; int fd; -- cgit From 59fef379d453781f0dabfa1f1a1e86e78aee919a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:21 -0500 Subject: selftests/net: unix: fix unused variable compiler warning Remove an unused variable. diag_uid.c:151:24: error: unused variable 'udr' [-Werror,-Wunused-variable] Fixes: ac011361bd4f ("af_unix: Add test for sock_diag and UDIAG_SHOW_UID.") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231124171645.1011043-4-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/diag_uid.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 5b88f7129fea..79a3dd75590e 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata, .msg_iov = &iov, .msg_iovlen = 1 }; - struct unix_diag_req *udr; struct nlmsghdr *nlh; int ret; -- cgit From 00a4f8fd9c750f20d8fd4535c71c9caa7ef5ff2f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:22 -0500 Subject: selftests/net: mptcp: fix uninitialized variable warnings Same init_rng() in both tests. The function reads /dev/urandom to initialize srand(). In case of failure, it falls back onto the entropy in the uninitialized variable. Not sure if this is on purpose. But failure reading urandom should be rare, so just fail hard. While at it, convert to getrandom(). Which man 4 random suggests is simpler and more robust. mptcp_inq.c:525:6: mptcp_connect.c:1131:6: error: variable 'foo' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Fixes: b51880568f20 ("selftests: mptcp: add inq test case") Cc: Florian Westphal Signed-off-by: Willem de Bruijn ---- When input is randomized because this is expected to meaningfully explore edge cases, should we also add 1. logging the random seed to stdout and 2. adding a command line argument to replay from a specific seed I can do this in net-next, if authors find it useful in this case. Reviewed-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231124171645.1011043-5-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 11 ++++------- tools/testing/selftests/net/mptcp/mptcp_inq.c | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c7f9ebeebc2c..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -1125,15 +1126,11 @@ again: static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 8672d898f8cd..218aac467321 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -519,15 +520,11 @@ static int client(int unixfd) static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); -- cgit From ad31c629ca3c87f6d557488c1f9faaebfbcd203c Mon Sep 17 00:00:00 2001 From: Elena Salomatkina Date: Sat, 25 Nov 2023 00:08:02 +0300 Subject: octeontx2-af: Fix possible buffer overflow A loop in rvu_mbox_handler_nix_bandprof_free() contains a break if (idx == MAX_BANDPROF_PER_PFFUNC), but if idx may reach MAX_BANDPROF_PER_PFFUNC buffer '(*req->prof_idx)[layer]' overflow happens before that check. The patch moves the break to the beginning of the loop. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: e8e095b3b370 ("octeontx2-af: cn10k: Bandwidth profiles config support"). Signed-off-by: Elena Salomatkina Reviewed-by: Simon Horman Reviewed-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/20231124210802.109763-1-elena.salomatkina.cmc@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 23c2f2ed2fb8..c112c71ff576 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -5505,6 +5505,8 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu, ipolicer = &nix_hw->ipolicer[layer]; for (idx = 0; idx < req->prof_count[layer]; idx++) { + if (idx == MAX_BANDPROF_PER_PFFUNC) + break; prof_idx = req->prof_idx[layer][idx]; if (prof_idx >= ipolicer->band_prof.max || ipolicer->pfvf_map[prof_idx] != pcifunc) @@ -5518,8 +5520,6 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu, ipolicer->pfvf_map[prof_idx] = 0x00; ipolicer->match_id[prof_idx] = 0; rvu_free_rsrc(&ipolicer->band_prof, prof_idx); - if (idx == MAX_BANDPROF_PER_PFFUNC) - break; } } mutex_unlock(&rvu->rsrc_lock); -- cgit From e54d628a2721bfbb002c19f6e8ca6746cec7640f Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Sat, 25 Nov 2023 14:01:26 +0800 Subject: net: stmmac: xgmac: Disable FPE MMC interrupts Commit aeb18dd07692 ("net: stmmac: xgmac: Disable MMC interrupts by default") tries to disable MMC interrupts to avoid a storm of unhandled interrupts, but leaves the FPE(Frame Preemption) MMC interrupts enabled, FPE MMC interrupts can cause the same problem. Now we mask FPE TX and RX interrupts to disable all MMC interrupts. Fixes: aeb18dd07692 ("net: stmmac: xgmac: Disable MMC interrupts by default") Reviewed-by: Larysa Zaremba Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Serge Semin Reviewed-by: Wojciech Drewek Link: https://lore.kernel.org/r/20231125060126.2328690-1-0x1207@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/mmc_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index ea4910ae0921..6a7c1d325c46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -177,8 +177,10 @@ #define MMC_XGMAC_RX_DISCARD_OCT_GB 0x1b4 #define MMC_XGMAC_RX_ALIGN_ERR_PKT 0x1bc +#define MMC_XGMAC_TX_FPE_INTR_MASK 0x204 #define MMC_XGMAC_TX_FPE_FRAG 0x208 #define MMC_XGMAC_TX_HOLD_REQ 0x20c +#define MMC_XGMAC_RX_FPE_INTR_MASK 0x224 #define MMC_XGMAC_RX_PKT_ASSEMBLY_ERR 0x228 #define MMC_XGMAC_RX_PKT_SMD_ERR 0x22c #define MMC_XGMAC_RX_PKT_ASSEMBLY_OK 0x230 @@ -352,6 +354,8 @@ static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr) { writel(0x0, mmcaddr + MMC_RX_INTR_MASK); writel(0x0, mmcaddr + MMC_TX_INTR_MASK); + writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_TX_FPE_INTR_MASK); + writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_FPE_INTR_MASK); writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK); } -- cgit From 51597219e0cd5157401d4d0ccb5daa4d9961676f Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sat, 25 Nov 2023 22:04:02 +0530 Subject: octeontx2-pf: Fix adding mbox work queue entry when num_vfs > 64 When more than 64 VFs are enabled for a PF then mbox communication between VF and PF is not working as mbox work queueing for few VFs are skipped due to wrong calculation of VF numbers. Fixes: d424b6c02415 ("octeontx2-pf: Enable SRIOV and added VF mbox handling") Signed-off-by: Geetha sowjanya Signed-off-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/1700930042-5400-1-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index ba95ac913274..6d56fc191845 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -566,7 +566,9 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr); otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr, TYPE_PFVF); - vfs -= 64; + if (intr) + trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); + vfs = 64; } intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0)); @@ -574,7 +576,8 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF); - trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); + if (intr) + trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); return IRQ_HANDLED; } -- cgit From fd7f98b2e12a3d96a92bde6640657ec7116f4372 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Nov 2023 22:06:57 +0530 Subject: octeontx2-pf: Restore TC ingress police rules when interface is up TC ingress policer rules depends on interface receive queue contexts since the bandwidth profiles are attached to RQ contexts. When an interface is brought down all the queue contexts are freed. This in turn frees bandwidth profiles in hardware causing ingress police rules non-functional after the interface is brought up. Fix this by applying all the ingress police rules config to hardware in otx2_open. Also allow adding ingress rules only when interface is running since no contexts exist for the interface when it is down. Fixes: 68fbff68dbea ("octeontx2-pf: Add police action for TC flower") Signed-off-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/1700930217-5707-1-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c | 3 + .../ethernet/marvell/octeontx2/nic/otx2_common.h | 2 + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 + .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 120 ++++++++++++++++----- 4 files changed, 102 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index a4a258da8dd5..c1c99d7054f8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -450,6 +450,9 @@ int cn10k_set_ipolicer_rate(struct otx2_nic *pfvf, u16 profile, aq->prof.pebs_mantissa = 0; aq->prof_mask.pebs_mantissa = 0xFF; + aq->prof.hl_en = 0; + aq->prof_mask.hl_en = 1; + /* Fill AQ info */ aq->qidx = profile; aq->ctype = NIX_AQ_CTYPE_BANDPROF; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index e7c69b57147e..06910307085e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -1070,6 +1070,8 @@ int otx2_init_tc(struct otx2_nic *nic); void otx2_shutdown_tc(struct otx2_nic *nic); int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data); +void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic); + /* CGX/RPM DMAC filters support */ int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf); int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u32 bit_pos); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 6d56fc191845..532e324bdcc8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1873,6 +1873,8 @@ int otx2_open(struct net_device *netdev) if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT) otx2_dmacflt_reinstall_flows(pf); + otx2_tc_apply_ingress_police_rules(pf); + err = otx2_rxtx_enable(pf, true); /* If a mbox communication error happens at this point then interface * will end up in a state such that it is in down state but hardware diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 8a5e3987a482..db1e0e0e812d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -47,6 +47,9 @@ struct otx2_tc_flow { bool is_act_police; u32 prio; struct npc_install_flow_req req; + u64 rate; + u32 burst; + bool is_pps; }; static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst, @@ -284,21 +287,10 @@ static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic, return err; } -static int otx2_tc_act_set_police(struct otx2_nic *nic, - struct otx2_tc_flow *node, - struct flow_cls_offload *f, - u64 rate, u32 burst, u32 mark, - struct npc_install_flow_req *req, bool pps) +static int otx2_tc_act_set_hw_police(struct otx2_nic *nic, + struct otx2_tc_flow *node) { - struct netlink_ext_ack *extack = f->common.extack; - struct otx2_hw *hw = &nic->hw; - int rq_idx, rc; - - rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues); - if (rq_idx >= hw->rx_queues) { - NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded"); - return -EINVAL; - } + int rc; mutex_lock(&nic->mbox.lock); @@ -308,23 +300,17 @@ static int otx2_tc_act_set_police(struct otx2_nic *nic, return rc; } - rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, burst, rate, pps); + rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, + node->burst, node->rate, node->is_pps); if (rc) goto free_leaf; - rc = cn10k_map_unmap_rq_policer(nic, rq_idx, node->leaf_profile, true); + rc = cn10k_map_unmap_rq_policer(nic, node->rq, node->leaf_profile, true); if (rc) goto free_leaf; mutex_unlock(&nic->mbox.lock); - req->match_id = mark & 0xFFFFULL; - req->index = rq_idx; - req->op = NIX_RX_ACTIONOP_UCAST; - set_bit(rq_idx, &nic->rq_bmap); - node->is_act_police = true; - node->rq = rq_idx; - return 0; free_leaf: @@ -336,6 +322,39 @@ free_leaf: return rc; } +static int otx2_tc_act_set_police(struct otx2_nic *nic, + struct otx2_tc_flow *node, + struct flow_cls_offload *f, + u64 rate, u32 burst, u32 mark, + struct npc_install_flow_req *req, bool pps) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct otx2_hw *hw = &nic->hw; + int rq_idx, rc; + + rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues); + if (rq_idx >= hw->rx_queues) { + NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded"); + return -EINVAL; + } + + req->match_id = mark & 0xFFFFULL; + req->index = rq_idx; + req->op = NIX_RX_ACTIONOP_UCAST; + + node->is_act_police = true; + node->rq = rq_idx; + node->burst = burst; + node->rate = rate; + node->is_pps = pps; + + rc = otx2_tc_act_set_hw_police(nic, node); + if (!rc) + set_bit(rq_idx, &nic->rq_bmap); + + return rc; +} + static int otx2_tc_parse_actions(struct otx2_nic *nic, struct flow_action *flow_action, struct npc_install_flow_req *req, @@ -1044,6 +1063,11 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, } if (flow_node->is_act_police) { + __clear_bit(flow_node->rq, &nic->rq_bmap); + + if (nic->flags & OTX2_FLAG_INTF_DOWN) + goto free_mcam_flow; + mutex_lock(&nic->mbox.lock); err = cn10k_map_unmap_rq_policer(nic, flow_node->rq, @@ -1059,11 +1083,10 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, "Unable to free leaf bandwidth profile(%d)\n", flow_node->leaf_profile); - __clear_bit(flow_node->rq, &nic->rq_bmap); - mutex_unlock(&nic->mbox.lock); } +free_mcam_flow: otx2_del_mcam_flow_entry(nic, flow_node->entry, NULL); otx2_tc_update_mcam_table(nic, flow_cfg, flow_node, false); kfree_rcu(flow_node, rcu); @@ -1083,6 +1106,11 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)) return -ENOMEM; + if (nic->flags & OTX2_FLAG_INTF_DOWN) { + NL_SET_ERR_MSG_MOD(extack, "Interface not initialized"); + return -EINVAL; + } + if (flow_cfg->nr_flows == flow_cfg->max_flows) { NL_SET_ERR_MSG_MOD(extack, "Free MCAM entry not available to add the flow"); @@ -1442,3 +1470,45 @@ void otx2_shutdown_tc(struct otx2_nic *nic) otx2_destroy_tc_flow_list(nic); } EXPORT_SYMBOL(otx2_shutdown_tc); + +static void otx2_tc_config_ingress_rule(struct otx2_nic *nic, + struct otx2_tc_flow *node) +{ + struct npc_install_flow_req *req; + + if (otx2_tc_act_set_hw_police(nic, node)) + return; + + mutex_lock(&nic->mbox.lock); + + req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox); + if (!req) + goto err; + + memcpy(req, &node->req, sizeof(struct npc_install_flow_req)); + + if (otx2_sync_mbox_msg(&nic->mbox)) + netdev_err(nic->netdev, + "Failed to install MCAM flow entry for ingress rule"); +err: + mutex_unlock(&nic->mbox.lock); +} + +void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic) +{ + struct otx2_flow_config *flow_cfg = nic->flow_cfg; + struct otx2_tc_flow *node; + + /* If any ingress policer rules exist for the interface then + * apply those rules. Ingress policer rules depend on bandwidth + * profiles linked to the receive queues. Since no receive queues + * exist when interface is down, ingress policer rules are stored + * and configured in hardware after all receive queues are allocated + * in otx2_open. + */ + list_for_each_entry(node, &flow_cfg->flow_list_tc, list) { + if (node->is_act_police) + otx2_tc_config_ingress_rule(nic, node); + } +} +EXPORT_SYMBOL(otx2_tc_apply_ingress_police_rules); -- cgit From 45b3fae4675dc1d4ee2d7aefa19d85ee4f891377 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 25 Nov 2023 15:33:58 -0600 Subject: neighbour: Fix __randomize_layout crash in struct neighbour Previously, one-element and zero-length arrays were treated as true flexible arrays, even though they are actually "fake" flex arrays. The __randomize_layout would leave them untouched at the end of the struct, similarly to proper C99 flex-array members. However, this approach changed with commit 1ee60356c2dc ("gcc-plugins: randstruct: Only warn about true flexible arrays"). Now, only C99 flexible-array members will remain untouched at the end of the struct, while one-element and zero-length arrays will be subject to randomization. Fix a `__randomize_layout` crash in `struct neighbour` by transforming zero-length array `primary_key` into a proper C99 flexible-array member. Fixes: 1ee60356c2dc ("gcc-plugins: randstruct: Only warn about true flexible arrays") Closes: https://lore.kernel.org/linux-hardening/20231124102458.GB1503258@e124191.cambridge.arm.com/ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Tested-by: Joey Gouly Link: https://lore.kernel.org/r/ZWJoRsJGnCPdJ3+2@work Signed-off-by: Paolo Abeni --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 07022bb0d44d..0d28172193fa 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -162,7 +162,7 @@ struct neighbour { struct rcu_head rcu; struct net_device *dev; netdevice_tracker dev_tracker; - u8 primary_key[0]; + u8 primary_key[]; } __randomize_layout; struct neigh_ops { -- cgit From 59d395ed606d8df14615712b0cdcdadb2d962175 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 26 Nov 2023 19:36:46 +0100 Subject: r8169: fix deadlock on RTL8125 in jumbo mtu mode The original change results in a deadlock if jumbo mtu mode is used. Reason is that the phydev lock is held when rtl_reset_work() is called here, and rtl_jumbo_config() calls phy_start_aneg() which also tries to acquire the phydev lock. Fix this by calling rtl_reset_work() asynchronously. Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125") Reported-by: Ian Chen Tested-by: Ian Chen Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/caf6a487-ef8c-4570-88f9-f47a659faf33@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/r8169_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 295366a85c63..a43e33e4b25e 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -579,6 +579,7 @@ struct rtl8169_tc_offsets { enum rtl_flag { RTL_FLAG_TASK_ENABLED = 0, RTL_FLAG_TASK_RESET_PENDING, + RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, RTL_FLAG_TASK_TX_TIMEOUT, RTL_FLAG_MAX }; @@ -4582,6 +4583,8 @@ static void rtl_task(struct work_struct *work) reset: rtl_reset_work(tp); netif_wake_queue(tp->dev); + } else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) { + rtl_reset_work(tp); } out_unlock: rtnl_unlock(); @@ -4615,7 +4618,7 @@ static void r8169_phylink_handler(struct net_device *ndev) } else { /* In few cases rx is broken after link-down otherwise */ if (rtl_is_8125(tp)) - rtl_reset_work(tp); + rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE); pm_runtime_idle(d); } -- cgit From 91d3d149978ba7b238198dd80e4b823756aa7cfa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 26 Nov 2023 23:01:02 +0100 Subject: r8169: prevent potential deadlock in rtl8169_close ndo_stop() is RTNL-protected by net core, and the worker function takes RTNL as well. Therefore we will deadlock when trying to execute a pending work synchronously. To fix this execute any pending work asynchronously. This will do no harm because netif_running() is false in ndo_stop(), and therefore the work function is effectively a no-op. However we have to ensure that no task is running or pending after rtl_remove_one(), therefore add a call to cancel_work_sync(). Fixes: abe5fc42f9ce ("r8169: use RTNL to protect critical sections") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/12395867-1d17-4cac-aa7d-c691938fcddf@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/r8169_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a43e33e4b25e..62cabeeb842a 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4694,7 +4694,7 @@ static int rtl8169_close(struct net_device *dev) rtl8169_down(tp); rtl8169_rx_clear(tp); - cancel_work_sync(&tp->wk.work); + cancel_work(&tp->wk.work); free_irq(tp->irq, tp); @@ -4928,6 +4928,8 @@ static void rtl_remove_one(struct pci_dev *pdev) if (pci_dev_run_wake(pdev)) pm_runtime_get_noresume(&pdev->dev); + cancel_work_sync(&tp->wk.work); + unregister_netdev(tp->dev); if (tp->dash_type != RTL_DASH_NONE) -- cgit From 9870257a0a338cd8d6c1cddab74e703f490f6779 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 27 Nov 2023 21:24:20 +0900 Subject: ravb: Fix races between ravb_tx_timeout_work() and net related ops Fix races between ravb_tx_timeout_work() and functions of net_device_ops and ethtool_ops by using rtnl_trylock() and rtnl_unlock(). Note that since ravb_close() is under the rtnl lock and calls cancel_work_sync(), ravb_tx_timeout_work() should calls rtnl_trylock(). Otherwise, a deadlock may happen in ravb_tx_timeout_work() like below: CPU0 CPU1 ravb_tx_timeout() schedule_work() ... __dev_close_many() // Under rtnl lock ravb_close() cancel_work_sync() // Waiting ravb_tx_timeout_work() rtnl_lock() // This is possible to cause a deadlock If rtnl_trylock() fails, rescheduling the work with sleep for 1 msec. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20231127122420.3706751-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c70cff80cc99..7c007ecd3ff6 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1874,6 +1874,12 @@ static void ravb_tx_timeout_work(struct work_struct *work) struct net_device *ndev = priv->ndev; int error; + if (!rtnl_trylock()) { + usleep_range(1000, 2000); + schedule_work(&priv->work); + return; + } + netif_tx_stop_all_queues(ndev); /* Stop PTP Clock driver */ @@ -1907,7 +1913,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) */ netdev_err(ndev, "%s: ravb_dmac_init() failed, error %d\n", __func__, error); - return; + goto out_unlock; } ravb_emac_init(ndev); @@ -1917,6 +1923,9 @@ out: ravb_ptp_init(ndev, priv->pdev); netif_tx_start_all_queues(ndev); + +out_unlock: + rtnl_unlock(); } /* Packet transmit function for Ethernet AVB */ -- cgit From cbeb989e41f4094f54bec2cecce993f26f547bea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 14:58:06 -0800 Subject: ethtool: don't propagate EOPNOTSUPP from dumps The default dump handler needs to clear ret before returning. Otherwise if the last interface returns an inconsequential error this error will propagate to user space. This may confuse user space (ethtool CLI seems to ignore it, but YNL doesn't). It will also terminate the dump early for mutli-skb dump, because netlink core treats EOPNOTSUPP as a real error. Fixes: 728480f12442 ("ethtool: default handlers for GET requests") Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231126225806.2143528-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 3bbd5afb7b31..fe3553f60bf3 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -505,6 +505,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb, ret = skb->len; break; } + ret = 0; } rtnl_unlock(); -- cgit From 83f2df9d66bc9e1e0dbd5d5586a701088f6a1d42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 14:58:58 -0800 Subject: tools: ynl-gen: always construct struct ynl_req_state struct ynl_req_state carries reply-related info from generated code into generic YNL code. While we don't need reply info to execute a request without a reply, we still need to pass in the struct, because it's also where we get the pointer to struct ynl_sock from. Passing NULL results in crashes if kernel returns an error or an unexpected reply. Fixes: dc0956c98f11 ("tools: ynl-gen: move the response reading logic into YNL") Link: https://lore.kernel.org/r/20231126225858.2144136-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/generated/devlink-user.c | 87 +++++++++++++++++++++----------- tools/net/ynl/generated/ethtool-user.c | 51 ++++++++++++------- tools/net/ynl/generated/fou-user.c | 6 ++- tools/net/ynl/generated/handshake-user.c | 3 +- tools/net/ynl/ynl-gen-c.py | 10 ++-- 5 files changed, 102 insertions(+), 55 deletions(-) diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index c12ca87ca2bb..8e757e249dab 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -2399,6 +2399,7 @@ void devlink_port_set_req_free(struct devlink_port_set_req *req) int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2416,7 +2417,7 @@ int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req) if (req->_present.port_function) devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2537,6 +2538,7 @@ void devlink_port_del_req_free(struct devlink_port_del_req *req) int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2550,7 +2552,7 @@ int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req) if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2568,6 +2570,7 @@ void devlink_port_split_req_free(struct devlink_port_split_req *req) int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2583,7 +2586,7 @@ int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req) if (req->_present.port_split_count) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2602,6 +2605,7 @@ void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req) int devlink_port_unsplit(struct ynl_sock *ys, struct devlink_port_unsplit_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2615,7 +2619,7 @@ int devlink_port_unsplit(struct ynl_sock *ys, if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2926,6 +2930,7 @@ void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req) int devlink_sb_pool_set(struct ynl_sock *ys, struct devlink_sb_pool_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2945,7 +2950,7 @@ int devlink_sb_pool_set(struct ynl_sock *ys, if (req->_present.sb_pool_size) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3126,6 +3131,7 @@ devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req) int devlink_sb_port_pool_set(struct ynl_sock *ys, struct devlink_sb_port_pool_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3145,7 +3151,7 @@ int devlink_sb_port_pool_set(struct ynl_sock *ys, if (req->_present.sb_threshold) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3334,6 +3340,7 @@ devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, struct devlink_sb_tc_pool_bind_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3357,7 +3364,7 @@ int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, if (req->_present.sb_threshold) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3376,6 +3383,7 @@ void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req) int devlink_sb_occ_snapshot(struct ynl_sock *ys, struct devlink_sb_occ_snapshot_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3389,7 +3397,7 @@ int devlink_sb_occ_snapshot(struct ynl_sock *ys, if (req->_present.sb_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3409,6 +3417,7 @@ devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req) int devlink_sb_occ_max_clear(struct ynl_sock *ys, struct devlink_sb_occ_max_clear_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3422,7 +3431,7 @@ int devlink_sb_occ_max_clear(struct ynl_sock *ys, if (req->_present.sb_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3544,6 +3553,7 @@ void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req) int devlink_eswitch_set(struct ynl_sock *ys, struct devlink_eswitch_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3561,7 +3571,7 @@ int devlink_eswitch_set(struct ynl_sock *ys, if (req->_present.eswitch_encap_mode) mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3895,6 +3905,7 @@ devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_se int devlink_dpipe_table_counters_set(struct ynl_sock *ys, struct devlink_dpipe_table_counters_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3910,7 +3921,7 @@ int devlink_dpipe_table_counters_set(struct ynl_sock *ys, if (req->_present.dpipe_table_counters_enabled) mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3929,6 +3940,7 @@ void devlink_resource_set_req_free(struct devlink_resource_set_req *req) int devlink_resource_set(struct ynl_sock *ys, struct devlink_resource_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3944,7 +3956,7 @@ int devlink_resource_set(struct ynl_sock *ys, if (req->_present.resource_size) mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4319,6 +4331,7 @@ void devlink_param_set_req_free(struct devlink_param_set_req *req) int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4336,7 +4349,7 @@ int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req) if (req->_present.param_value_cmode) mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4631,6 +4644,7 @@ void devlink_region_del_req_free(struct devlink_region_del_req *req) int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4648,7 +4662,7 @@ int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req) if (req->_present.region_snapshot_id) mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4922,6 +4936,7 @@ void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req) int devlink_port_param_set(struct ynl_sock *ys, struct devlink_port_param_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4935,7 +4950,7 @@ int devlink_port_param_set(struct ynl_sock *ys, if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5360,6 +5375,7 @@ devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req int devlink_health_reporter_set(struct ynl_sock *ys, struct devlink_health_reporter_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5381,7 +5397,7 @@ int devlink_health_reporter_set(struct ynl_sock *ys, if (req->_present.health_reporter_auto_dump) mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5402,6 +5418,7 @@ devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_ int devlink_health_reporter_recover(struct ynl_sock *ys, struct devlink_health_reporter_recover_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5417,7 +5434,7 @@ int devlink_health_reporter_recover(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5438,6 +5455,7 @@ devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnos int devlink_health_reporter_diagnose(struct ynl_sock *ys, struct devlink_health_reporter_diagnose_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5453,7 +5471,7 @@ int devlink_health_reporter_diagnose(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5556,6 +5574,7 @@ devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_ int devlink_health_reporter_dump_clear(struct ynl_sock *ys, struct devlink_health_reporter_dump_clear_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5571,7 +5590,7 @@ int devlink_health_reporter_dump_clear(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5592,6 +5611,7 @@ void devlink_flash_update_req_free(struct devlink_flash_update_req *req) int devlink_flash_update(struct ynl_sock *ys, struct devlink_flash_update_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5609,7 +5629,7 @@ int devlink_flash_update(struct ynl_sock *ys, if (req->_present.flash_update_overwrite_mask) mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5780,6 +5800,7 @@ void devlink_trap_set_req_free(struct devlink_trap_set_req *req) int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5795,7 +5816,7 @@ int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req) if (req->_present.trap_action) mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5968,6 +5989,7 @@ void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req) int devlink_trap_group_set(struct ynl_sock *ys, struct devlink_trap_group_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5985,7 +6007,7 @@ int devlink_trap_group_set(struct ynl_sock *ys, if (req->_present.trap_policer_id) mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6152,6 +6174,7 @@ devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req) int devlink_trap_policer_set(struct ynl_sock *ys, struct devlink_trap_policer_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6169,7 +6192,7 @@ int devlink_trap_policer_set(struct ynl_sock *ys, if (req->_present.trap_policer_burst) mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6190,6 +6213,7 @@ devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *r int devlink_health_reporter_test(struct ynl_sock *ys, struct devlink_health_reporter_test_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6205,7 +6229,7 @@ int devlink_health_reporter_test(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6384,6 +6408,7 @@ void devlink_rate_set_req_free(struct devlink_rate_set_req *req) int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6407,7 +6432,7 @@ int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req) if (req->_present.rate_parent_node_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6427,6 +6452,7 @@ void devlink_rate_new_req_free(struct devlink_rate_new_req *req) int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6450,7 +6476,7 @@ int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req) if (req->_present.rate_parent_node_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6469,6 +6495,7 @@ void devlink_rate_del_req_free(struct devlink_rate_del_req *req) int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6482,7 +6509,7 @@ int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req) if (req->_present.rate_node_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6645,6 +6672,7 @@ void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req) int devlink_linecard_set(struct ynl_sock *ys, struct devlink_linecard_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6660,7 +6688,7 @@ int devlink_linecard_set(struct ynl_sock *ys, if (req->_present.linecard_type_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6810,6 +6838,7 @@ void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req) int devlink_selftests_run(struct ynl_sock *ys, struct devlink_selftests_run_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6823,7 +6852,7 @@ int devlink_selftests_run(struct ynl_sock *ys, if (req->_present.selftests) devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/generated/ethtool-user.c b/tools/net/ynl/generated/ethtool-user.c index 74b883a14958..660435639e2b 100644 --- a/tools/net/ynl/generated/ethtool-user.c +++ b/tools/net/ynl/generated/ethtool-user.c @@ -1843,6 +1843,7 @@ void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req) int ethtool_linkinfo_set(struct ynl_sock *ys, struct ethtool_linkinfo_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -1862,7 +1863,7 @@ int ethtool_linkinfo_set(struct ynl_sock *ys, if (req->_present.transceiver) mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2067,6 +2068,7 @@ void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req) int ethtool_linkmodes_set(struct ynl_sock *ys, struct ethtool_linkmodes_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2094,7 +2096,7 @@ int ethtool_linkmodes_set(struct ynl_sock *ys, if (req->_present.rate_matching) mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2398,6 +2400,7 @@ void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req) int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2409,7 +2412,7 @@ int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req) if (req->_present.msgmask) ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2577,6 +2580,7 @@ void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req) int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2590,7 +2594,7 @@ int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req) if (req->_present.sopass_len) mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3045,6 +3049,7 @@ void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req) int ethtool_privflags_set(struct ynl_sock *ys, struct ethtool_privflags_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3056,7 +3061,7 @@ int ethtool_privflags_set(struct ynl_sock *ys, if (req->_present.flags) ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3273,6 +3278,7 @@ void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req) int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3312,7 +3318,7 @@ int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req) if (req->_present.tx_push_buf_len_max) mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3495,6 +3501,7 @@ void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req) int ethtool_channels_set(struct ynl_sock *ys, struct ethtool_channels_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3520,7 +3527,7 @@ int ethtool_channels_set(struct ynl_sock *ys, if (req->_present.combined_count) mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3798,6 +3805,7 @@ void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req) int ethtool_coalesce_set(struct ynl_sock *ys, struct ethtool_coalesce_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3861,7 +3869,7 @@ int ethtool_coalesce_set(struct ynl_sock *ys, if (req->_present.tx_aggr_time_usecs) mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4036,6 +4044,7 @@ void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req) int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4055,7 +4064,7 @@ int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req) if (req->_present.stats_src) mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4242,6 +4251,7 @@ void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req) int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4263,7 +4273,7 @@ int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req) if (req->_present.tx_lpi_timer) mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4437,6 +4447,7 @@ void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req) int ethtool_cable_test_act(struct ynl_sock *ys, struct ethtool_cable_test_act_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4446,7 +4457,7 @@ int ethtool_cable_test_act(struct ynl_sock *ys, if (req->_present.header) ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4465,6 +4476,7 @@ ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req) int ethtool_cable_test_tdr_act(struct ynl_sock *ys, struct ethtool_cable_test_tdr_act_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4474,7 +4486,7 @@ int ethtool_cable_test_tdr_act(struct ynl_sock *ys, if (req->_present.header) ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4782,6 +4794,7 @@ void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req) int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4799,7 +4812,7 @@ int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req) if (req->_present.stats) ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5235,6 +5248,7 @@ void ethtool_module_set_req_free(struct ethtool_module_set_req *req) int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5248,7 +5262,7 @@ int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req) if (req->_present.power_mode) mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5397,6 +5411,7 @@ void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req) int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5412,7 +5427,7 @@ int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req) if (req->_present.pw_d_status) mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5746,6 +5761,7 @@ void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req) int ethtool_plca_set_cfg(struct ynl_sock *ys, struct ethtool_plca_set_cfg_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5771,7 +5787,7 @@ int ethtool_plca_set_cfg(struct ynl_sock *ys, if (req->_present.burst_tmr) mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6124,6 +6140,7 @@ void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req) int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6143,7 +6160,7 @@ int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req) if (req->_present.tx_min_frag_size) mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c index 4271b5d43c58..f30bef23bc31 100644 --- a/tools/net/ynl/generated/fou-user.c +++ b/tools/net/ynl/generated/fou-user.c @@ -72,6 +72,7 @@ void fou_add_req_free(struct fou_add_req *req) int fou_add(struct ynl_sock *ys, struct fou_add_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -99,7 +100,7 @@ int fou_add(struct ynl_sock *ys, struct fou_add_req *req) if (req->_present.ifindex) mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -117,6 +118,7 @@ void fou_del_req_free(struct fou_del_req *req) int fou_del(struct ynl_sock *ys, struct fou_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -140,7 +142,7 @@ int fou_del(struct ynl_sock *ys, struct fou_del_req *req) if (req->_present.peer_v6_len) mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c index 7c67765daf90..6901f8462cca 100644 --- a/tools/net/ynl/generated/handshake-user.c +++ b/tools/net/ynl/generated/handshake-user.c @@ -295,6 +295,7 @@ void handshake_done_req_free(struct handshake_done_req *req) int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -308,7 +309,7 @@ int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req) for (unsigned int i = 0; i < req->n_remote_auth; i++) mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 3bd6b928c14f..8337aa6de25e 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1709,14 +1709,14 @@ def print_req(ri): ret_ok = '0' ret_err = '-1' direction = "request" - local_vars = ['struct nlmsghdr *nlh;', + local_vars = ['struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };', + 'struct nlmsghdr *nlh;', 'int err;'] if 'reply' in ri.op[ri.op_mode]: ret_ok = 'rsp' ret_err = 'NULL' - local_vars += [f'{type_name(ri, rdir(direction))} *rsp;', - 'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };'] + local_vars += [f'{type_name(ri, rdir(direction))} *rsp;'] print_prototype(ri, direction, terminate=False) ri.cw.block_start() @@ -1732,7 +1732,6 @@ def print_req(ri): attr.attr_put(ri, "req") ri.cw.nl() - parse_arg = "NULL" if 'reply' in ri.op[ri.op_mode]: ri.cw.p('rsp = calloc(1, sizeof(*rsp));') ri.cw.p('yrs.yarg.data = rsp;') @@ -1742,8 +1741,7 @@ def print_req(ri): else: ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};') ri.cw.nl() - parse_arg = '&yrs' - ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});") + ri.cw.p("err = ynl_exec(ys, nlh, &yrs);") ri.cw.p('if (err < 0)') if 'reply' in ri.op[ri.op_mode]: ri.cw.p('goto err_free;') -- cgit From 8866730aed5100f06d3d965c22f1c61f74942541 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 28 Nov 2023 17:25:56 -0800 Subject: bpf, sockmap: af_unix stream sockets need to hold ref for pair sock AF_UNIX stream sockets are a paired socket. So sending on one of the pairs will lookup the paired socket as part of the send operation. It is possible however to put just one of the pairs in a BPF map. This currently increments the refcnt on the sock in the sockmap to ensure it is not free'd by the stack before sockmap cleans up its state and stops any skbs being sent/recv'd to that socket. But we missed a case. If the peer socket is closed it will be free'd by the stack. However, the paired socket can still be referenced from BPF sockmap side because we hold a reference there. Then if we are sending traffic through BPF sockmap to that socket it will try to dereference the free'd pair in its send logic creating a use after free. And following splat: [59.900375] BUG: KASAN: slab-use-after-free in sk_wake_async+0x31/0x1b0 [59.901211] Read of size 8 at addr ffff88811acbf060 by task kworker/1:2/954 [...] [59.905468] Call Trace: [59.905787] [59.906066] dump_stack_lvl+0x130/0x1d0 [59.908877] print_report+0x16f/0x740 [59.910629] kasan_report+0x118/0x160 [59.912576] sk_wake_async+0x31/0x1b0 [59.913554] sock_def_readable+0x156/0x2a0 [59.914060] unix_stream_sendmsg+0x3f9/0x12a0 [59.916398] sock_sendmsg+0x20e/0x250 [59.916854] skb_send_sock+0x236/0xac0 [59.920527] sk_psock_backlog+0x287/0xaa0 To fix let BPF sockmap hold a refcnt on both the socket in the sockmap and its paired socket. It wasn't obvious how to contain the fix to bpf_unix logic. The primarily problem with keeping this logic in bpf_unix was: In the sock close() we could handle the deref by having a close handler. But, when we are destroying the psock through a map delete operation we wouldn't have gotten any signal thorugh the proto struct other than it being replaced. If we do the deref from the proto replace its too early because we need to deref the sk_pair after the backlog worker has been stopped. Given all this it seems best to just cache it at the end of the psock and eat 8B for the af_unix and vsock users. Notice dgram sockets are OK because they handle locking already. Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20231129012557.95371-2-john.fastabend@gmail.com --- include/linux/skmsg.h | 1 + include/net/af_unix.h | 1 + net/core/skmsg.c | 2 ++ net/unix/af_unix.c | 2 -- net/unix/unix_bpf.c | 5 +++++ 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c1637515a8a4..c953b8c0d2f4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -106,6 +106,7 @@ struct sk_psock { struct mutex work_mutex; struct sk_psock_work_state work_state; struct delayed_work work; + struct sock *sk_pair; struct rcu_work rwork; }; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 824c258143a3..49c4640027d8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -75,6 +75,7 @@ struct unix_sock { }; #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) +#define unix_peer(sk) (unix_sk(sk)->peer) #define peer_wait peer_wq.wait diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6c31eefbd777..93ecfceac1bc 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -826,6 +826,8 @@ static void sk_psock_destroy(struct work_struct *work) if (psock->sk_redir) sock_put(psock->sk_redir); + if (psock->sk_pair) + sock_put(psock->sk_pair); sock_put(psock->sk); kfree(psock); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a357dc5f2404..ac1f2bc18fc9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -213,8 +213,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ -#define unix_peer(sk) (unix_sk(sk)->peer) - static inline int unix_our_peer(struct sock *sk, struct sock *osk) { return unix_peer(osk) == sk; diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 2f9d8271c6ec..7ea7c3a0d0d0 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -159,12 +159,17 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { + struct sock *sk_pair; + if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } + sk_pair = unix_peer(sk); + sock_hold(sk_pair); + psock->sk_pair = sk_pair; unix_stream_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; -- cgit From 51354f700d400e55b329361e1386b04695e6e5c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 28 Nov 2023 17:25:57 -0800 Subject: bpf, sockmap: Add af_unix test with both sockets in map This adds a test where both pairs of a af_unix paired socket are put into a BPF map. This ensures that when we tear down the af_unix pair we don't have any issues on sockmap side with ordering and reference counting. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20231129012557.95371-3-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 51 +++++++++++++++++----- .../selftests/bpf/progs/test_sockmap_listen.c | 7 +++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index a934d430c20c..a92807bfcd13 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1337,7 +1337,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, - int sock_mapfd, int verd_mapfd, enum redir_mode mode) + int sock_mapfd, int nop_mapfd, + int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); unsigned int pass; @@ -1351,6 +1352,12 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, if (err) return; + if (nop_mapfd >= 0) { + err = add_to_sockmap(nop_mapfd, cli0, cli1); + if (err) + return; + } + n = write(cli1, "a", 1); if (n < 0) FAIL_ERRNO("%s: write", log_prefix); @@ -1387,7 +1394,7 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, goto close0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1677,7 +1684,7 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1735,7 +1742,7 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1770,8 +1777,10 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static void unix_inet_redir_to_connected(int family, int type, + int sock_mapfd, int nop_mapfd, + int verd_mapfd, + enum redir_mode mode) { int c0, c1, p0, p1; int sfd[2]; @@ -1785,7 +1794,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, + sock_mapfd, nop_mapfd, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1799,6 +1809,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, struct bpf_map *inner_map, int family) { int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int nop_map = bpf_map__fd(skel->maps.nop_map); int verdict_map = bpf_map__fd(skel->maps.verdict_map); int sock_map = bpf_map__fd(inner_map); int err; @@ -1808,14 +1819,32 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, return; skel->bss->test_ingress = false; - unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, REDIR_EGRESS); - unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, + REDIR_EGRESS); + + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, nop_map, verdict_map, + REDIR_EGRESS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, REDIR_EGRESS); skel->bss->test_ingress = true; - unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, + REDIR_INGRESS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, -1, verdict_map, + REDIR_INGRESS); + + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, nop_map, verdict_map, REDIR_INGRESS); - unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, REDIR_INGRESS); xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 464d35bd57c7..b7250eb9c30c 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -14,6 +14,13 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} nop_map SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_SOCKHASH); __uint(max_entries, 2); -- cgit From 9f74a3dfcf83e11aedcb98250b8040dbc6d9659a Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 27 Nov 2023 13:23:38 -0800 Subject: ice: Fix VF Reset paths when interface in a failed over aggregate There is an error when an interface has the following conditions: - PF is in an aggregate (bond) - PF has VFs created on it - bond is in a state where it is failed-over to the secondary interface - A VF reset is issued on one or more of those VFs The issue is generated by the originating PF trying to rebuild or reconfigure the VF resources. Since the bond is failed over to the secondary interface the queue contexts are in a modified state. To fix this issue, have the originating interface reclaim its resources prior to the tear-down and rebuild or reconfigure. Then after the process is complete, move the resources back to the currently active interface. There are multiple paths that can be used depending on what triggered the event, so create a helper function to move the queues and use paired calls to the helper (back to origin, process, then move back to active interface) under the same lag_mutex lock. Fixes: 1e0f9881ef79 ("ice: Flesh out implementation of support for SRIOV on bonded interface") Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20231127212340.1137657-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lag.c | 122 +++++++++++++++----------- drivers/net/ethernet/intel/ice/ice_lag.h | 1 + drivers/net/ethernet/intel/ice/ice_vf_lib.c | 20 +++++ drivers/net/ethernet/intel/ice/ice_virtchnl.c | 25 ++++++ 4 files changed, 118 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index cd065ec48c87..280994ee5933 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -569,6 +569,50 @@ resume_traffic: dev_dbg(dev, "Problem restarting traffic for LAG node move\n"); } +/** + * ice_lag_build_netdev_list - populate the lag struct's netdev list + * @lag: local lag struct + * @ndlist: pointer to netdev list to populate + */ +static void ice_lag_build_netdev_list(struct ice_lag *lag, + struct ice_lag_netdev_list *ndlist) +{ + struct ice_lag_netdev_list *nl; + struct net_device *tmp_nd; + + INIT_LIST_HEAD(&ndlist->node); + rcu_read_lock(); + for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { + nl = kzalloc(sizeof(*nl), GFP_ATOMIC); + if (!nl) + break; + + nl->netdev = tmp_nd; + list_add(&nl->node, &ndlist->node); + } + rcu_read_unlock(); + lag->netdev_head = &ndlist->node; +} + +/** + * ice_lag_destroy_netdev_list - free lag struct's netdev list + * @lag: pointer to local lag struct + * @ndlist: pointer to lag struct netdev list + */ +static void ice_lag_destroy_netdev_list(struct ice_lag *lag, + struct ice_lag_netdev_list *ndlist) +{ + struct ice_lag_netdev_list *entry, *n; + + rcu_read_lock(); + list_for_each_entry_safe(entry, n, &ndlist->node, node) { + list_del(&entry->node); + kfree(entry); + } + rcu_read_unlock(); + lag->netdev_head = NULL; +} + /** * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF * @lag: primary interface LAG struct @@ -597,7 +641,6 @@ ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport, void ice_lag_move_new_vf_nodes(struct ice_vf *vf) { struct ice_lag_netdev_list ndlist; - struct list_head *tmp, *n; u8 pri_port, act_port; struct ice_lag *lag; struct ice_vsi *vsi; @@ -621,38 +664,15 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf) pri_port = pf->hw.port_info->lport; act_port = lag->active_port; - if (lag->upper_netdev) { - struct ice_lag_netdev_list *nl; - struct net_device *tmp_nd; - - INIT_LIST_HEAD(&ndlist.node); - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { - nl = kzalloc(sizeof(*nl), GFP_ATOMIC); - if (!nl) - break; - - nl->netdev = tmp_nd; - list_add(&nl->node, &ndlist.node); - } - rcu_read_unlock(); - } - - lag->netdev_head = &ndlist.node; + if (lag->upper_netdev) + ice_lag_build_netdev_list(lag, &ndlist); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && lag->bonded && lag->primary && pri_port != act_port && !list_empty(lag->netdev_head)) ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx); - list_for_each_safe(tmp, n, &ndlist.node) { - struct ice_lag_netdev_list *entry; - - entry = list_entry(tmp, struct ice_lag_netdev_list, node); - list_del(&entry->node); - kfree(entry); - } - lag->netdev_head = NULL; + ice_lag_destroy_netdev_list(lag, &ndlist); new_vf_unlock: mutex_unlock(&pf->lag_mutex); @@ -679,6 +699,29 @@ static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport) ice_lag_move_single_vf_nodes(lag, oldport, newport, i); } +/** + * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context + * @lag: local lag struct + * @src_prt: lport value for source port + * @dst_prt: lport value for destination port + * + * This function is used to move nodes during an out-of-netdev-event situation, + * primarily when the driver needs to reconfigure or recreate resources. + * + * Must be called while holding the lag_mutex to avoid lag events from + * processing while out-of-sync moves are happening. Also, paired moves, + * such as used in a reset flow, should both be called under the same mutex + * lock to avoid changes between start of reset and end of reset. + */ +void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt) +{ + struct ice_lag_netdev_list ndlist; + + ice_lag_build_netdev_list(lag, &ndlist); + ice_lag_move_vf_nodes(lag, src_prt, dst_prt); + ice_lag_destroy_netdev_list(lag, &ndlist); +} + #define ICE_LAG_SRIOV_CP_RECIPE 10 #define ICE_LAG_SRIOV_TRAIN_PKT_LEN 16 @@ -2051,7 +2094,6 @@ void ice_lag_rebuild(struct ice_pf *pf) { struct ice_lag_netdev_list ndlist; struct ice_lag *lag, *prim_lag; - struct list_head *tmp, *n; u8 act_port, loc_port; if (!pf->lag || !pf->lag->bonded) @@ -2063,21 +2105,7 @@ void ice_lag_rebuild(struct ice_pf *pf) if (lag->primary) { prim_lag = lag; } else { - struct ice_lag_netdev_list *nl; - struct net_device *tmp_nd; - - INIT_LIST_HEAD(&ndlist.node); - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { - nl = kzalloc(sizeof(*nl), GFP_ATOMIC); - if (!nl) - break; - - nl->netdev = tmp_nd; - list_add(&nl->node, &ndlist.node); - } - rcu_read_unlock(); - lag->netdev_head = &ndlist.node; + ice_lag_build_netdev_list(lag, &ndlist); prim_lag = ice_lag_find_primary(lag); } @@ -2107,13 +2135,7 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_clear_rdma_cap(pf); lag_rebuild_out: - list_for_each_safe(tmp, n, &ndlist.node) { - struct ice_lag_netdev_list *entry; - - entry = list_entry(tmp, struct ice_lag_netdev_list, node); - list_del(&entry->node); - kfree(entry); - } + ice_lag_destroy_netdev_list(lag, &ndlist); mutex_unlock(&pf->lag_mutex); } diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 9557e8605a07..ede833dfa658 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -65,4 +65,5 @@ int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); bool ice_lag_is_switchdev_running(struct ice_pf *pf); +void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt); #endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index aca1f2ea5034..b7ae09952156 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -829,12 +829,16 @@ static void ice_notify_vf_reset(struct ice_vf *vf) int ice_reset_vf(struct ice_vf *vf, u32 flags) { struct ice_pf *pf = vf->pf; + struct ice_lag *lag; struct ice_vsi *vsi; + u8 act_prt, pri_prt; struct device *dev; int err = 0; bool rsd; dev = ice_pf_to_dev(pf); + act_prt = ICE_LAG_INVALID_PORT; + pri_prt = pf->hw.port_info->lport; if (flags & ICE_VF_RESET_NOTIFY) ice_notify_vf_reset(vf); @@ -845,6 +849,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + lag = pf->lag; + mutex_lock(&pf->lag_mutex); + if (lag && lag->bonded && lag->primary) { + act_prt = lag->active_port; + if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT && + lag->upper_netdev) + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + else + act_prt = ICE_LAG_INVALID_PORT; + } + if (flags & ICE_VF_RESET_LOCK) mutex_lock(&vf->cfg_lock); else @@ -937,6 +952,11 @@ out_unlock: if (flags & ICE_VF_RESET_LOCK) mutex_unlock(&vf->cfg_lock); + if (lag && lag->bonded && lag->primary && + act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + mutex_unlock(&pf->lag_mutex); + return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index cdf17b1e2f25..de11b3186bd7 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1603,9 +1603,24 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; struct ice_pf *pf = vf->pf; + struct ice_lag *lag; struct ice_vsi *vsi; + u8 act_prt, pri_prt; int i = -1, q_idx; + lag = pf->lag; + mutex_lock(&pf->lag_mutex); + act_prt = ICE_LAG_INVALID_PORT; + pri_prt = pf->hw.port_info->lport; + if (lag && lag->bonded && lag->primary) { + act_prt = lag->active_port; + if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT && + lag->upper_netdev) + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + else + act_prt = ICE_LAG_INVALID_PORT; + } + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) goto error_param; @@ -1729,6 +1744,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } } + if (lag && lag->bonded && lag->primary && + act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + mutex_unlock(&pf->lag_mutex); + /* send the response to the VF */ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, VIRTCHNL_STATUS_SUCCESS, NULL, 0); @@ -1743,6 +1763,11 @@ error_param: vf->vf_id, i); } + if (lag && lag->bonded && lag->primary && + act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + mutex_unlock(&pf->lag_mutex); + ice_lag_move_new_vf_nodes(vf); /* send the response to the VF */ -- cgit From 91fdb30ddfdb651509914d3ed0a0302712540fed Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 28 Nov 2023 17:59:28 +0800 Subject: net: libwx: fix memory leak on msix entry Since pci_free_irq_vectors() set pdev->msix_enabled as 0 in the calling of pci_msix_shutdown(), wx->msix_entries is never freed. Reordering the lines to fix the memory leak. Cc: stable@vger.kernel.org Fixes: 3f703186113f ("net: libwx: Add irq flow functions") Signed-off-by: Jiawen Wu Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20231128095928.1083292-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 2823861e5a92..a5a50b5a8816 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1972,11 +1972,11 @@ void wx_reset_interrupt_capability(struct wx *wx) if (!pdev->msi_enabled && !pdev->msix_enabled) return; - pci_free_irq_vectors(wx->pdev); if (pdev->msix_enabled) { kfree(wx->msix_entries); wx->msix_entries = NULL; } + pci_free_irq_vectors(wx->pdev); } EXPORT_SYMBOL(wx_reset_interrupt_capability); -- cgit From d8eb6ea4b302e7ff78535c205510e359ac10a0bd Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:34 +0200 Subject: net: ravb: Check return value of reset_control_deassert() reset_control_deassert() could return an error. Some devices cannot work if reset signal de-assert operation fails. To avoid this check the return code of reset_control_deassert() in ravb_probe() and take proper action. Along with it, the free_netdev() call from the error path was moved after reset_control_assert() on its own label (out_free_netdev) to free netdev in case reset_control_deassert() fails. Fixes: 0d13a1a464a0 ("ravb: Add reset support") Reviewed-by: Sergey Shtylyov Reviewed-by: Philipp Zabel Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 7c007ecd3ff6..35a6b0a26dcd 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2654,7 +2654,10 @@ static int ravb_probe(struct platform_device *pdev) ndev->features = info->net_features; ndev->hw_features = info->net_hw_features; - reset_control_deassert(rstc); + error = reset_control_deassert(rstc); + if (error) + goto out_free_netdev; + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -2881,11 +2884,11 @@ out_disable_gptp_clk: out_disable_refclk: clk_disable_unprepare(priv->refclk); out_release: - free_netdev(ndev); - pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(rstc); +out_free_netdev: + free_netdev(ndev); return error; } -- cgit From 88b74831faaee455c2af380382d979fc38e79270 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:35 +0200 Subject: net: ravb: Use pm_runtime_resume_and_get() pm_runtime_get_sync() may return an error. In case it returns with an error dev->power.usage_count needs to be decremented. pm_runtime_resume_and_get() takes care of this. Thus use it. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 35a6b0a26dcd..96b38d1e39ae 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2659,7 +2659,9 @@ static int ravb_probe(struct platform_device *pdev) goto out_free_netdev; pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); + error = pm_runtime_resume_and_get(&pdev->dev); + if (error < 0) + goto out_rpm_disable; if (info->multi_irqs) { if (info->err_mgmt_irqs) @@ -2885,6 +2887,7 @@ out_disable_refclk: clk_disable_unprepare(priv->refclk); out_release: pm_runtime_put(&pdev->dev); +out_rpm_disable: pm_runtime_disable(&pdev->dev); reset_control_assert(rstc); out_free_netdev: -- cgit From d78c0ced60d5e2f8b5a4a0468a5c400b24aeadf2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:36 +0200 Subject: net: ravb: Make write access to CXR35 first before accessing other EMAC registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardware manual of RZ/G3S (and RZ/G2L) specifies the following on the description of CXR35 register (chapter "PHY interface select register (CXR35)"): "After release reset, make write-access to this register before making write-access to other registers (except MDIOMOD). Even if not need to change the value of this register, make write-access to this register at least one time. Because RGMII/MII MODE is recognized by accessing this register". The setup procedure for EMAC module (chapter "Setup procedure" of RZ/G3S, RZ/G2L manuals) specifies the E-MAC.CXR35 register is the first EMAC register that is to be configured. Note [A] from chapter "PHY interface select register (CXR35)" specifies the following: [A] The case which CXR35 SEL_XMII is used for the selection of RGMII/MII in APB Clock 100 MHz. (1) To use RGMII interface, Set ‘H’03E8_0000’ to this register. (2) To use MII interface, Set ‘H’03E8_0002’ to this register. Take into account these indication. Fixes: 1089877ada8d ("ravb: Add RZ/G2L MII interface support") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 96b38d1e39ae..4d57ecfdfe61 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -515,6 +515,15 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); + if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { + ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35); + ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0); + } else { + ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_RGMII, CXR35); + ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, + CXR31_SEL_LINK0); + } + /* Receive frame limit set register */ ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR); @@ -537,14 +546,6 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) /* E-MAC interrupt enable register */ ravb_write(ndev, ECSIPR_ICDIP, ECSIPR); - - if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { - ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0); - ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35); - } else { - ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, - CXR31_SEL_LINK0); - } } static void ravb_emac_init_rcar(struct net_device *ndev) -- cgit From 6f32c086602050fc11157adeafaa1c1eb393f0af Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:37 +0200 Subject: net: ravb: Start TX queues after HW initialization succeeded ravb_phy_start() may fail. If that happens, the TX queues will remain started. Thus, move the netif_tx_start_all_queues() after PHY is successfully initialized. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Reviewed-by: Kalesh AP Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4d57ecfdfe61..5f1fb94c5ab7 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1812,13 +1812,13 @@ static int ravb_open(struct net_device *ndev) if (info->gptp) ravb_ptp_init(ndev, priv->pdev); - netif_tx_start_all_queues(ndev); - /* PHY control start */ error = ravb_phy_start(ndev); if (error) goto out_ptp_stop; + netif_tx_start_all_queues(ndev); + return 0; out_ptp_stop: -- cgit From eac16a733427ba0de2449ffc7bd3da32ddb65cb7 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:38 +0200 Subject: net: ravb: Stop DMA in case of failures on ravb_open() In case ravb_phy_start() returns with error the settings applied in ravb_dmac_init() are not reverted (e.g. config mode). For this call ravb_stop_dma() on failure path of ravb_open(). Fixes: a0d2f20650e8 ("Renesas Ethernet AVB PTP clock driver") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 5f1fb94c5ab7..6c9e92676106 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1825,6 +1825,7 @@ out_ptp_stop: /* Stop PTP Clock driver */ if (info->gptp) ravb_ptp_stop(ndev); + ravb_stop_dma(ndev); out_free_irq_mgmta: if (!info->multi_irqs) goto out_free_irq; -- cgit From edf9bc396e05081ca281ffb0cd41e44db478ff26 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:39 +0200 Subject: net: ravb: Keep reverse order of operations in ravb_remove() On RZ/G3S SMARC Carrier II board having RGMII connections b/w Ethernet MACs and PHYs it has been discovered that doing unbind/bind for ravb driver in a loop leads to wrong speed and duplex for Ethernet links and broken connectivity (the connectivity cannot be restored even with bringing interface down/up). Before doing unbind/bind the Ethernet interfaces were configured though systemd. The sh instructions used to do unbind/bind were: $ cd /sys/bus/platform/drivers/ravb/ $ while :; do echo 11c30000.ethernet > unbind ; \ echo 11c30000.ethernet > bind; done It has been discovered that there is a race b/w IOCTLs initialized by systemd at the response of success binding and the "ravb_write(ndev, CCC_OPC_RESET, CCC)" call in ravb_remove() as follows: 1/ as a result of bind success the user space open/configures the interfaces tough an IOCTL; the following stack trace has been identified on RZ/G3S: Call trace: dump_backtrace+0x9c/0x100 show_stack+0x20/0x38 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x28 ravb_open+0x70/0xa58 __dev_open+0xf4/0x1e8 __dev_change_flags+0x198/0x218 dev_change_flags+0x2c/0x80 devinet_ioctl+0x640/0x708 inet_ioctl+0x1e4/0x200 sock_do_ioctl+0x50/0x108 sock_ioctl+0x240/0x358 __arm64_sys_ioctl+0xb0/0x100 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0xb8 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x190/0x198 2/ this call may execute concurrently with ravb_remove() as the unbind/bind operation was executed in a loop 3/ if the operation mode is changed to RESET (through ravb_write(ndev, CCC_OPC_RESET, CCC) call in ravb_remove()) while the above ravb_open() is in progress it may lead to MAC (or PHY, or MAC-PHY connection, the right point hasn't been identified at the moment) to be broken, thus the Ethernet connectivity fails to restore. The simple fix for this is to move ravb_write(ndev, CCC_OPC_RESET, CCC)) after unregister_netdev() to avoid resetting the controller while the netdev interface is still registered. To avoid future issues in ravb_remove(), the patch follows the proper order of operations in ravb_remove(): reverse order compared with ravb_probe(). This avoids described races as the IOCTLs as well as unregister_netdev() (called now at the beginning of ravb_remove()) calls rtnl_lock() before continuing and IOCTLs check (though devinet_ioctl()) if device is still registered just after taking the lock: int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { // ... rtnl_lock(); ret = -ENODEV; dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) goto done; // ... done: rtnl_unlock(); out: return ret; } Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6c9e92676106..664eda4b5a11 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2903,22 +2903,26 @@ static void ravb_remove(struct platform_device *pdev) struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - /* Stop PTP Clock driver */ - if (info->ccc_gac) - ravb_ptp_stop(ndev); - - clk_disable_unprepare(priv->gptp_clk); - clk_disable_unprepare(priv->refclk); - - /* Set reset mode */ - ravb_write(ndev, CCC_OPC_RESET, CCC); unregister_netdev(ndev); if (info->nc_queues) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); + + /* Stop PTP Clock driver */ + if (info->ccc_gac) + ravb_ptp_stop(ndev); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); + + /* Set reset mode */ + ravb_write(ndev, CCC_OPC_RESET, CCC); + + clk_disable_unprepare(priv->gptp_clk); + clk_disable_unprepare(priv->refclk); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(priv->rstc); -- cgit