From 48c06708e63e71b4395e4159797366aa03be10ff Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 18 Nov 2021 12:58:24 +0100 Subject: mac80211: fix TCP performance on mesh interface sta is NULL for mesh point (resolved later), so sk pacing parameters were not applied. Signed-off-by: Maxime Bizon Link: https://lore.kernel.org/r/66f51659416ac35d6b11a313bd3ffe8b8a43dd55.camel@freebox.fr Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 278945e3e08a..51ec5f9bc2e0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4191,11 +4191,11 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_aggr_check(sdata, sta, skb); + sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); + if (sta) { struct ieee80211_fast_tx *fast_tx; - sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); - fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && -- cgit From d5e568c3a4ec2ddd23e7dc5ad5b0c64e4f22981a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Nov 2021 12:47:40 +0100 Subject: mac80211: track only QoS data frames for admission control For admission control, obviously all of that only works for QoS data frames, otherwise we cannot even access the QoS field in the header. Syzbot reported (see below) an uninitialized value here due to a status of a non-QoS nullfunc packet, which isn't even long enough to contain the QoS header. Fix this to only do anything for QoS data packets. Reported-by: syzbot+614e82b88a1a4973e534@syzkaller.appspotmail.com Fixes: 02219b3abca5 ("mac80211: add WMM admission control support") Link: https://lore.kernel.org/r/20211122124737.dad29e65902a.Ieb04587afacb27c14e0de93ec1bfbefb238cc2a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 54ab0e1ef6ca..37f7d975f3da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2452,11 +2452,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = ieee80211_get_tid(hdr); - int ac = ieee80211_ac_from_tid(tid); - struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; + u16 tid; + int ac; + struct ieee80211_sta_tx_tspec *tx_tspec; unsigned long now = jiffies; + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + tid = ieee80211_get_tid(hdr); + ac = ieee80211_ac_from_tid(tid); + tx_tspec = &ifmgd->tx_tspec[ac]; + if (likely(!tx_tspec->admitted_time)) return; -- cgit From 18688c80ad8a8dd50523dc9276e929932cac86d4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Nov 2021 21:43:23 +0100 Subject: mac80211: fix rate control for retransmitted frames Since retransmission clears info->control, rate control needs to be called again, otherwise the driver might crash due to invalid rates. Cc: stable@vger.kernel.org # 5.14+ Reported-by: Aaro Koskinen Reported-by: Robert W Fixes: 03c3911d2d67 ("mac80211: call ieee80211_tx_h_rate_ctrl() when dequeue") Signed-off-by: Felix Fietkau Tested-by: Aaro Koskinen Link: https://lore.kernel.org/r/20211122204323.9787-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 51ec5f9bc2e0..86a54df3aabd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1822,15 +1822,15 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_CONTINUE; + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_rate_ctrl); + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; goto txh_done; } - if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) - CALL_TXH(ieee80211_tx_h_rate_ctrl); - CALL_TXH(ieee80211_tx_h_michael_mic_add); CALL_TXH(ieee80211_tx_h_sequence); CALL_TXH(ieee80211_tx_h_fragment); -- cgit From 73111efacd3c6d9e644acca1d132566932be8af0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Nov 2021 10:40:24 +0100 Subject: mac80211: fix regression in SSN handling of addba tx Some drivers that do their own sequence number allocation (e.g. ath9k) rely on being able to modify params->ssn on starting tx ampdu sessions. This was broken by a change that modified it to use sta->tid_seq[tid] instead. Cc: stable@vger.kernel.org Fixes: 31d8bb4e07f8 ("mac80211: agg-tx: refactor sending addba") Reported-by: Eneas U de Queiroz Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211124094024.43222-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 4 ++-- net/mac80211/sta_info.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 430a58587538..c1558dd2d244 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -480,8 +480,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, - sta->tid_seq[tid] >> 4, + tid_tx->dialog_token, tid_tx->ssn, buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); @@ -523,6 +522,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { return; } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ba2796782008..e7443fc4669c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -199,6 +199,7 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u16 buf_size; + u16 ssn; u16 failed_bar_ssn; bool bar_pending; -- cgit From 942bd1070c3a39d1302fc5db73d60c86e3033c81 Mon Sep 17 00:00:00 2001 From: Xing Song Date: Tue, 23 Nov 2021 11:31:23 +0800 Subject: mac80211: set up the fwd_skb->dev for mesh forwarding Mesh forwarding requires that the fwd_skb->dev is set up for TX handling, otherwise the following warning will be generated, so set it up for the pending frames. [ 72.835674 ] WARNING: CPU: 0 PID: 1193 at __skb_flow_dissect+0x284/0x1298 [ 72.842379 ] Modules linked in: ksmbd pppoe ppp_async l2tp_ppp ... [ 72.962020 ] CPU: 0 PID: 1193 Comm: kworker/u5:1 Tainted: P S 5.4.137 #0 [ 72.969938 ] Hardware name: MT7622_MT7531 RFB (DT) [ 72.974659 ] Workqueue: napi_workq napi_workfn [ 72.979025 ] pstate: 60000005 (nZCv daif -PAN -UAO) [ 72.983822 ] pc : __skb_flow_dissect+0x284/0x1298 [ 72.988444 ] lr : __skb_flow_dissect+0x54/0x1298 [ 72.992977 ] sp : ffffffc010c738c0 [ 72.996293 ] x29: ffffffc010c738c0 x28: 0000000000000000 [ 73.001615 ] x27: 000000000000ffc2 x26: ffffff800c2eb818 [ 73.006937 ] x25: ffffffc010a987c8 x24: 00000000000000ce [ 73.012259 ] x23: ffffffc010c73a28 x22: ffffffc010a99c60 [ 73.017581 ] x21: 000000000000ffc2 x20: ffffff80094da800 [ 73.022903 ] x19: 0000000000000000 x18: 0000000000000014 [ 73.028226 ] x17: 00000000084d16af x16: 00000000d1fc0bab [ 73.033548 ] x15: 00000000715f6034 x14: 000000009dbdd301 [ 73.038870 ] x13: 00000000ea4dcbc3 x12: 0000000000000040 [ 73.044192 ] x11: 000000000eb00ff0 x10: 0000000000000000 [ 73.049513 ] x9 : 000000000eb00073 x8 : 0000000000000088 [ 73.054834 ] x7 : 0000000000000000 x6 : 0000000000000001 [ 73.060155 ] x5 : 0000000000000000 x4 : 0000000000000000 [ 73.065476 ] x3 : ffffffc010a98000 x2 : 0000000000000000 [ 73.070797 ] x1 : 0000000000000000 x0 : 0000000000000000 [ 73.076120 ] Call trace: [ 73.078572 ] __skb_flow_dissect+0x284/0x1298 [ 73.082846 ] __skb_get_hash+0x7c/0x228 [ 73.086629 ] ieee80211_txq_may_transmit+0x7fc/0x17b8 [mac80211] [ 73.092564 ] ieee80211_tx_prepare_skb+0x20c/0x268 [mac80211] [ 73.098238 ] ieee80211_tx_pending+0x144/0x330 [mac80211] [ 73.103560 ] tasklet_action_common.isra.16+0xb4/0x158 [ 73.108618 ] tasklet_action+0x2c/0x38 [ 73.112286 ] __do_softirq+0x168/0x3b0 [ 73.115954 ] do_softirq.part.15+0x88/0x98 [ 73.119969 ] __local_bh_enable_ip+0xb0/0xb8 [ 73.124156 ] napi_workfn+0x58/0x90 [ 73.127565 ] process_one_work+0x20c/0x478 [ 73.131579 ] worker_thread+0x50/0x4f0 [ 73.135249 ] kthread+0x124/0x128 [ 73.138484 ] ret_from_fork+0x10/0x1c Signed-off-by: Xing Song Tested-By: Frank Wunderlich Link: https://lore.kernel.org/r/20211123033123.2684-1-xing.song@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9541a4c30aca..0544563ede52 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2944,6 +2944,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!fwd_skb) goto out; + fwd_skb->dev = sdata->dev; fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); info = IEEE80211_SKB_CB(fwd_skb); -- cgit From 8f9dcc29566626f683843ccac6113a12208315ca Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Sat, 2 Oct 2021 08:53:29 -0600 Subject: mac80211: fix a memory leak where sta_info is not freed The following is from a system that went OOM due to a memory leak: wlan0: Allocated STA 74:83:c2:64:0b:87 wlan0: Allocated STA 74:83:c2:64:0b:87 wlan0: IBSS finish 74:83:c2:64:0b:87 (---from ieee80211_ibss_add_sta) wlan0: Adding new IBSS station 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 3 wlan0: Inserted STA 74:83:c2:64:0b:87 wlan0: IBSS finish 74:83:c2:64:0b:87 (---from ieee80211_ibss_work) wlan0: Adding new IBSS station 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 3 . . wlan0: expiring inactive not authorized STA 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 1 wlan0: Removed STA 74:83:c2:64:0b:87 wlan0: Destroyed STA 74:83:c2:64:0b:87 The ieee80211_ibss_finish_sta() is called twice on the same STA from 2 different locations. On the second attempt, the allocated STA is not destroyed creating a kernel memory leak. This is happening because sta_info_insert_finish() does not call sta_info_free() the second time when the STA already exists (returns -EEXIST). Note that the caller sta_info_insert_rcu() assumes STA is destroyed upon errors. Same fix is applied to -ENOMEM. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20211002145329.3125293-1-anzaki@gmail.com [change the error path label to use the existing code] Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 51b49f0d3ad4..840ad1a860fa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -644,13 +644,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; - goto out_err; + goto out_cleanup; } sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); if (!sinfo) { err = -ENOMEM; - goto out_err; + goto out_cleanup; } local->num_sta++; @@ -706,8 +706,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); + out_cleanup: cleanup_single_sta(sta); - out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); rcu_read_lock(); -- cgit From af9d3a2984dc3501acd612c657127517a1beaf9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 09:19:49 +0100 Subject: mac80211: add docs for ssn in struct tid_ampdu_tx As pointed out by Stephen, add the missing docs. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20211129091948.1327ec82beab.Iecc5975406a3028d35c65ff8d2dec31a693888d3@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e7443fc4669c..379fd367197f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -176,6 +176,7 @@ struct sta_info; * @failed_bar_ssn: ssn of the last failed BAR tx attempt * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU withing A-MDPU + * @ssn: starting sequence number of the session * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. -- cgit From 1eda919126b420fee6b8d546f7f728fbbd4b8f11 Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Sat, 27 Nov 2021 11:28:53 +0100 Subject: nl80211: reset regdom when reloading regdb Reload the regdom when the regulatory db is reloaded. Otherwise, the user had to change the regulatoy domain to a different one and then reset it to the correct one to have a new regulatory db take effect after a reload. Signed-off-by: Finn Behrens Link: https://lore.kernel.org/r/YaIIZfxHgqc/UTA7@gimli.kloenk.dev [edit commit message] Signed-off-by: Johannes Berg --- include/net/regulatory.h | 1 + net/wireless/reg.c | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47f06f6f5a67..0cf9335431e0 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -83,6 +83,7 @@ struct regulatory_request { enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; + bool reload; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index df87c7f3a049..61f1bf1bc4a7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -133,6 +133,7 @@ static u32 reg_is_indoor_portid; static void restore_regulatory_settings(bool reset_user, bool cached); static void print_regdomain(const struct ieee80211_regdomain *rd); +static void reg_process_hint(struct regulatory_request *reg_request); static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -1098,6 +1099,8 @@ int reg_reload_regdb(void) const struct firmware *fw; void *db; int err; + const struct ieee80211_regdomain *current_regdomain; + struct regulatory_request *request; err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); if (err) @@ -1118,8 +1121,27 @@ int reg_reload_regdb(void) if (!IS_ERR_OR_NULL(regdb)) kfree(regdb); regdb = db; - rtnl_unlock(); + /* reset regulatory domain */ + current_regdomain = get_cfg80211_regdom(); + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->alpha2[0] = current_regdomain->alpha2[0]; + request->alpha2[1] = current_regdomain->alpha2[1]; + request->initiator = NL80211_USER_REG_HINT_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; + request->reload = true; + + reg_process_hint(request); + +out_unlock: + rtnl_unlock(); out: release_firmware(fw); return err; @@ -2690,7 +2712,8 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) + (treatment == REG_REQ_ALREADY_SET && + !user_request->reload)) return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; -- cgit From 06d59d626a0a49c7bfeae52aa6f793538209f2fc Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 1 Dec 2021 15:39:52 +0200 Subject: MAINTAINERS: update Kalle Valo's email I switched to using kvalo@kernel.org, update MAINTAINERS file. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211201133952.31744-1-kvalo@kernel.org --- MAINTAINERS | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7a2345ce8521..22bea147c7d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3056,7 +3056,7 @@ F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt F: drivers/phy/qualcomm/phy-ath79-usb.c ATHEROS ATH GENERIC UTILITIES -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/ath/* @@ -3071,7 +3071,7 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/ath5k F: drivers/net/wireless/ath/ath5k/ ATHEROS ATH6KL WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath6kl @@ -13238,7 +13238,7 @@ F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h NETWORKING DRIVERS (WIRELESS) -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-wireless/list/ @@ -15694,7 +15694,7 @@ T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/qt1010* QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k @@ -15702,7 +15702,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath10k/ QUALCOMM ATHEROS ATH11K WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: ath11k@lists.infradead.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -15866,7 +15866,7 @@ F: Documentation/devicetree/bindings/media/*venus* F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: wcn36xx@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx -- cgit From d599f714b73e4177dfdfe64fce09175568288ee9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 Dec 2021 14:04:24 +0200 Subject: iwlwifi: mvm: don't crash on invalid rate w/o STA If we get to the WARN_ONCE(..., "Got a HT rate (...)", ...) here with a NULL sta, then we crash because mvmsta is bad and we try to dereference it. Fix that by printing -1 as the state if no station was given. Signed-off-by: Johannes Berg Fixes: 6761a718263a ("iwlwifi: mvm: add explicit check for non-data frames in get Tx rate") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20211203140410.1a1541d7dcb5.I606c746e11447fe168cf046376b70b04e278c3b4@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index bdd4ee432548..76e0b7b45980 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -269,17 +269,18 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm, u8 rate_plcp; u32 rate_flags = 0; bool is_cck; - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); /* info->control is only relevant for non HW rate control */ if (!ieee80211_hw_check(mvm->hw, HAS_RATE_CONTROL)) { + struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + /* HT rate doesn't make sense for a non data frame */ WARN_ONCE(info->control.rates[0].flags & IEEE80211_TX_RC_MCS && !ieee80211_is_data(fc), "Got a HT rate (flags:0x%x/mcs:%d/fc:0x%x/state:%d) for a non data frame\n", info->control.rates[0].flags, info->control.rates[0].idx, - le16_to_cpu(fc), mvmsta->sta_state); + le16_to_cpu(fc), sta ? mvmsta->sta_state : -1); rate_idx = info->control.rates[0].idx; } -- cgit From efdbfa0ad03e764419378485d1b8f6e7706fb1a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:33 +0100 Subject: iwlwifi: fix LED dependencies The dependencies for LED configuration are highly inconsistent and too complicated at the moment. One of the results is a randconfig failure I get very rarely when LEDS_CLASS is in a loadable module, but the wireless core is built-in: WARNING: unmet direct dependencies detected for MAC80211_LEDS Depends on [n]: NET [=y] && WIRELESS [=y] && MAC80211 [=y] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=MAC80211 [=y]) Selected by [m]: - IWLEGACY [=m] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_INTEL [=y] - IWLWIFI_LEDS [=y] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_INTEL [=y] && IWLWIFI [=m] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=IWLWIFI [=m]) && (IWLMVM [=m] || IWLDVM [=m]) aarch64-linux-ld: drivers/net/wireless/ath/ath5k/led.o: in function `ath5k_register_led': led.c:(.text+0x60): undefined reference to `led_classdev_register_ext' aarch64-linux-ld: drivers/net/wireless/ath/ath5k/led.o: in function `ath5k_unregister_leds': led.c:(.text+0x200): undefined reference to `led_classdev_unregister' For iwlwifi, the dependency is wrong, since this config prevents the MAC80211_LEDS code from being part of a built-in MAC80211 driver. For iwlegacy, this is worse because the driver tries to force-enable the other subsystems, which is both a layering violation and a bug because it will still fail with MAC80211=y and IWLEGACY=m, leading to LEDS_CLASS being a module as well. The actual link failure in the ath5k driver is a result of MAC80211_LEDS being enabled but not usable. With the Kconfig logic fixed in the Intel drivers, the ath5k driver works as expected again. Signed-off-by: Arnd Bergmann Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-1-arnd@kernel.org --- drivers/net/wireless/intel/iwlegacy/Kconfig | 4 ++-- drivers/net/wireless/intel/iwlwifi/Kconfig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/Kconfig b/drivers/net/wireless/intel/iwlegacy/Kconfig index 24fe3f63c321..7eacc8e58ee1 100644 --- a/drivers/net/wireless/intel/iwlegacy/Kconfig +++ b/drivers/net/wireless/intel/iwlegacy/Kconfig @@ -2,14 +2,13 @@ config IWLEGACY tristate select FW_LOADER - select NEW_LEDS - select LEDS_CLASS select LEDS_TRIGGERS select MAC80211_LEDS config IWL4965 tristate "Intel Wireless WiFi 4965AGN (iwl4965)" depends on PCI && MAC80211 + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select IWLEGACY help This option enables support for @@ -38,6 +37,7 @@ config IWL4965 config IWL3945 tristate "Intel PRO/Wireless 3945ABG/BG Network Connection (iwl3945)" depends on PCI && MAC80211 + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select IWLEGACY help Select to build the driver supporting the: diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 1085afbefba8..418ae4f870ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -47,7 +47,7 @@ if IWLWIFI config IWLWIFI_LEDS bool - depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 depends on IWLMVM || IWLDVM select LEDS_TRIGGERS select MAC80211_LEDS -- cgit From c68115fc537518b8ff2c54b3e2984a60977affed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:34 +0100 Subject: brcmsmac: rework LED dependencies This is now the only driver that selects the LEDS_CLASS framework, which is normally user-selectable. While it doesn't strictly cause a bug, rework the Kconfig logic to be more consistent with what other drivers do, and only enable LED support in brcmsmac if the dependencies are all there, rather than using 'select' to enable what it needs. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-2-arnd@kernel.org --- drivers/net/wireless/broadcom/brcm80211/Kconfig | 14 +++++++++----- drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/Kconfig b/drivers/net/wireless/broadcom/brcm80211/Kconfig index 5bf2318763c5..3a1a35b5672f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/Kconfig +++ b/drivers/net/wireless/broadcom/brcm80211/Kconfig @@ -7,16 +7,20 @@ config BRCMSMAC depends on MAC80211 depends on BCMA_POSSIBLE select BCMA - select NEW_LEDS if BCMA_DRIVER_GPIO - select LEDS_CLASS if BCMA_DRIVER_GPIO select BRCMUTIL select FW_LOADER select CORDIC help This module adds support for PCIe wireless adapters based on Broadcom - IEEE802.11n SoftMAC chipsets. It also has WLAN led support, which will - be available if you select BCMA_DRIVER_GPIO. If you choose to build a - module, the driver will be called brcmsmac.ko. + IEEE802.11n SoftMAC chipsets. If you choose to build a module, the + driver will be called brcmsmac.ko. + +config BRCMSMAC_LEDS + def_bool BRCMSMAC && BCMA_DRIVER_GPIO && MAC80211_LEDS + help + The brcmsmac LED support depends on the presence of the + BCMA_DRIVER_GPIO driver, and it only works if LED support + is enabled and reachable from the driver module. source "drivers/net/wireless/broadcom/brcm80211/brcmfmac/Kconfig" diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile index 482d7737764d..090757730ba6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile @@ -42,6 +42,6 @@ brcmsmac-y := \ brcms_trace_events.o \ debug.o -brcmsmac-$(CONFIG_BCMA_DRIVER_GPIO) += led.o +brcmsmac-$(CONFIG_BRCMSMAC_LEDS) += led.o obj-$(CONFIG_BRCMSMAC) += brcmsmac.o diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h index d65f5c268fd7..2a5cbeb9e783 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h @@ -24,7 +24,7 @@ struct brcms_led { struct gpio_desc *gpiod; }; -#ifdef CONFIG_BCMA_DRIVER_GPIO +#ifdef CONFIG_BRCMSMAC_LEDS void brcms_led_unregister(struct brcms_info *wl); int brcms_led_register(struct brcms_info *wl); #else -- cgit From f7d55d2e439fa4430755d69a5d7ad16d43a5ebe6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:35 +0100 Subject: mt76: mt7921: fix build regression After mt7921s got added, there are two possible build problems: a) mt7921s does not get built at all if mt7921e is not also enabled b) there is a link error when mt7921e is a loadable module, but mt7921s configured as built-in: ERROR: modpost: "mt7921_mac_sta_add" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_sta_assoc" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_sta_remove" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_write_txwi" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_drv_pmctrl" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_fill_message" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_parse_response" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_ops" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_queue_rx_skb" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_update_channel" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! Fix both by making sure that Kbuild enters the subdirectory when either one is enabled. Fixes: 48fab5bbef40 ("mt76: mt7921: introduce mt7921s support") Signed-off-by: Arnd Bergmann Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-3-arnd@kernel.org --- drivers/net/wireless/mediatek/mt76/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile index 79ab850a45a2..c78ae4b89761 100644 --- a/drivers/net/wireless/mediatek/mt76/Makefile +++ b/drivers/net/wireless/mediatek/mt76/Makefile @@ -34,4 +34,4 @@ obj-$(CONFIG_MT76x2_COMMON) += mt76x2/ obj-$(CONFIG_MT7603E) += mt7603/ obj-$(CONFIG_MT7615_COMMON) += mt7615/ obj-$(CONFIG_MT7915E) += mt7915/ -obj-$(CONFIG_MT7921E) += mt7921/ +obj-$(CONFIG_MT7921_COMMON) += mt7921/ -- cgit From 92816e2629808726af015c7f5b14adc8e4f8b147 Mon Sep 17 00:00:00 2001 From: Jie2x Zhou Date: Thu, 9 Dec 2021 10:02:30 +0800 Subject: selftests: net: Correct ping6 expected rc from 2 to 1 ./fcnal-test.sh -v -t ipv6_ping TEST: ping out, VRF bind - ns-B IPv6 LLA [FAIL] TEST: ping out, VRF bind - multicast IP [FAIL] ping6 is failing as it should. COMMAND: ip netns exec ns-A /bin/ping6 -c1 -w1 fe80::7c4c:bcff:fe66:a63a%red strace of ping6 shows it is failing with '1', so change the expected rc from 2 to 1. Fixes: c0644e71df33 ("selftests: Add ipv6 ping tests to fcnal-test") Reported-by: kernel test robot Suggested-by: David Ahern Signed-off-by: Jie2x Zhou Link: https://lore.kernel.org/r/20211209020230.37270-1-jie2x.zhou@intel.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index a1da013d847b..29cc72d7c3d0 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -2191,7 +2191,7 @@ ipv6_ping_vrf() log_start show_hint "Fails since VRF device does not support linklocal or multicast" run_cmd ${ping6} -c1 -w1 ${a} - log_test_addr ${a} $? 2 "ping out, VRF bind" + log_test_addr ${a} $? 1 "ping out, VRF bind" done for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} -- cgit From ab443c53916730862cec202078d36fd4008bea79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Dec 2021 06:20:46 -0800 Subject: sch_cake: do not call cake_destroy() from cake_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qdiscs are not supposed to call their own destroy() method from init(), because core stack already does that. syzbot was able to trigger use after free: DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 21902 at kernel/locking/mutex.c:586 __mutex_lock_common kernel/locking/mutex.c:586 [inline] WARNING: CPU: 0 PID: 21902 at kernel/locking/mutex.c:586 __mutex_lock+0x9ec/0x12f0 kernel/locking/mutex.c:740 Modules linked in: CPU: 0 PID: 21902 Comm: syz-executor189 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__mutex_lock_common kernel/locking/mutex.c:586 [inline] RIP: 0010:__mutex_lock+0x9ec/0x12f0 kernel/locking/mutex.c:740 Code: 08 84 d2 0f 85 19 08 00 00 8b 05 97 38 4b 04 85 c0 0f 85 27 f7 ff ff 48 c7 c6 20 00 ac 89 48 c7 c7 a0 fe ab 89 e8 bf 76 ba ff <0f> 0b e9 0d f7 ff ff 48 8b 44 24 40 48 8d b8 c8 08 00 00 48 89 f8 RSP: 0018:ffffc9000627f290 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802315d700 RSI: ffffffff815f1db8 RDI: fffff52000c4fe44 RBP: ffff88818f28e000 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815ebb5e R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: ffffc9000627f458 R15: 0000000093c30000 FS: 0000555556abc400(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fda689c3303 CR3: 000000001cfbb000 CR4: 0000000000350ef0 Call Trace: tcf_chain0_head_change_cb_del+0x2e/0x3d0 net/sched/cls_api.c:810 tcf_block_put_ext net/sched/cls_api.c:1381 [inline] tcf_block_put_ext net/sched/cls_api.c:1376 [inline] tcf_block_put+0xbc/0x130 net/sched/cls_api.c:1394 cake_destroy+0x3f/0x80 net/sched/sch_cake.c:2695 qdisc_create.constprop.0+0x9da/0x10f0 net/sched/sch_api.c:1293 tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2496 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xdf0 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f1bb06badb9 Code: Unable to access opcode bytes at RIP 0x7f1bb06bad8f. RSP: 002b:00007fff3012a658 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f1bb06badb9 RDX: 0000000000000000 RSI: 00000000200007c0 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000003 R10: 0000000000000003 R11: 0000000000000246 R12: 00007fff3012a688 R13: 00007fff3012a6a0 R14: 00007fff3012a6e0 R15: 00000000000013c2 Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20211210142046.698336-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 3c2300d14468..857aaebd49f4 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2736,7 +2736,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) - goto nomem; + return -ENOMEM; for (i = 0; i < CAKE_MAX_TINS; i++) { struct cake_tin_data *b = q->tins + i; @@ -2766,10 +2766,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->min_netlen = ~0; q->min_adjlen = ~0; return 0; - -nomem: - cake_destroy(sch); - return -ENOMEM; } static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) -- cgit From 345e004d023343d38088fdfea39688aa11e06ccf Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 10 Dec 2021 00:46:31 +0100 Subject: bpf: Fix incorrect state pruning for <8B spill/fill Commit 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") introduced support in the verifier to track <8B spill/fills of scalars. The backtracking logic for the precision bit was however skipping spill/fills of less than 8B. That could cause state pruning to consider two states equivalent when they shouldn't be. As an example, consider the following bytecode snippet: 0: r7 = r1 1: call bpf_get_prandom_u32 2: r6 = 2 3: if r0 == 0 goto pc+1 4: r6 = 3 ... 8: [state pruning point] ... /* u32 spill/fill */ 10: *(u32 *)(r10 - 8) = r6 11: r8 = *(u32 *)(r10 - 8) 12: r0 = 0 13: if r8 == 3 goto pc+1 14: r0 = 1 15: exit The verifier first walks the path with R6=3. Given the support for <8B spill/fills, at instruction 13, it knows the condition is true and skips instruction 14. At that point, the backtracking logic kicks in but stops at the fill instruction since it only propagates the precision bit for 8B spill/fill. When the verifier then walks the path with R6=2, it will consider it safe at instruction 8 because R6 is not marked as needing precision. Instruction 14 is thus never walked and is then incorrectly removed as 'dead code'. It's also possible to lead the verifier to accept e.g. an out-of-bound memory access instead of causing an incorrect dead code elimination. This regression was found via Cilium's bpf-next CI where it was causing a conntrack map update to be silently skipped because the code had been removed by the verifier. This commit fixes it by enabling support for <8B spill/fills in the bactracking logic. In case of a <8B spill/fill, the full 8B stack slot will be marked as needing precision. Then, in __mark_chain_precision, any tracked register spilled in a marked slot will itself be marked as needing precision, regardless of the spill size. This logic makes two assumptions: (1) only 8B-aligned spill/fill are tracked and (2) spilled registers are only tracked if the spill and fill sizes are equal. Commit ef979017b837 ("bpf: selftest: Add verifier tests for <8-byte scalar spill and refill") covers the first assumption and the next commit in this patchset covers the second. Fixes: 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f3001937bbb9..f2f1ed34cfe9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2379,8 +2379,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be @@ -2403,8 +2401,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, /* scalars can only be spilled into stack */ if (insn->dst_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; spi = (-insn->off - 1) / BPF_REG_SIZE; if (spi >= 64) { verbose(env, "BUG spi %d\n", spi); -- cgit From 0be2516f865f5a876837184a8385163ff64a5889 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 10 Dec 2021 00:47:00 +0100 Subject: selftests/bpf: Tests for state pruning with u32 spill/fill This patch adds tests for the verifier's tracking for spilled, <8B registers. The first two test cases ensure the verifier doesn't incorrectly prune states in case of <8B spill/fills. The last one simply checks that a filled u64 register is marked unknown if the register spilled in the same slack slot was less than 8B. The map value access at the end of the first program is only incorrect for the path R6=32. If the precision bit for register R8 isn't backtracked through the u32 spill/fill, the R6=32 path is pruned at instruction 9 and the program is incorrectly accepted. The second program is a variation of the same with u32 spills and a u64 fill. The additional instructions to introduce the first pruning point may be a bit fragile as they depend on the heuristics for pruning points in the verifier (currently at least 8 instructions and 2 jumps). If the heuristics are changed, the pruning point may move (e.g., to the subsequent jump) or disappear, which would cause the test to always pass. Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/search_pruning.c | 71 ++++++++++++++++++++++ tools/testing/selftests/bpf/verifier/spill_fill.c | 32 ++++++++++ 2 files changed, 103 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 7e50cb80873a..682519769fe3 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -132,6 +132,77 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "precision tracking for u32 spill/fill", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV32_IMM(BPF_REG_6, 32), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_6, 4), + /* Additional insns to introduce a pruning point. */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + /* u32 spill/fill */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_10, -8), + /* out-of-bound map value access for r6=32 */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 15 }, + .result = REJECT, + .errstr = "R0 min value is outside of the allowed memory range", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "precision tracking for u32 spills, u64 fill", + .insns = { + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_7, 0xffffffff), + /* Additional insns to introduce a pruning point. */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), + /* u32 spills, u64 fill */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, -8), + /* if r8 != X goto pc+1 r8 known in fallthrough branch */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0xffffffff, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + /* if r8 == X goto pc+1 condition always true on first + * traversal, so starts backtracking to mark r8 as requiring + * precision. r7 marked as needing precision. r6 not marked + * since it's not tracked. + */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0xffffffff, 1), + /* fails if r8 correctly marked unknown after fill. */ + BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "div by zero", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, { "allocated_stack", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 7ab3de108761..6c907144311f 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -175,6 +175,38 @@ .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "Spill u32 const scalars. Refill as u64. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r6 = 0 */ + BPF_MOV32_IMM(BPF_REG_6, 0), + /* r7 = 20 */ + BPF_MOV32_IMM(BPF_REG_7, 20), + /* *(u32 *)(r10 -4) = r6 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), + /* *(u32 *)(r10 -8) = r7 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), + /* r4 = *(u64 *)(r10 -8) */ + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, { "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data", .insns = { -- cgit From bcd0f93353326954817a4f9fa55ec57fb38acbb0 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 9 Dec 2021 16:28:39 +0800 Subject: phonet: refcount leak in pep_sock_accep sock_hold(sk) is invoked in pep_sock_accept(), but __sock_put(sk) is not invoked in subsequent failure branches(pep_accept_conn() != 0). Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20211209082839.33985-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/phonet/pep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index a1525916885a..b4f90afb0638 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; -- cgit From 71ddeac8cd1d217744a0e060ff520e147c9328d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 10:50:58 -0800 Subject: inet_diag: fix kernel-infoleak for UDP sockets KMSAN reported a kernel-infoleak [1], that can exploited by unpriv users. After analysis it turned out UDP was not initializing r->idiag_expires. Other users of inet_sk_diag_fill() might make the same mistake in the future, so fix this in inet_sk_diag_fill(). [1] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:156 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x69d/0x25c0 lib/iov_iter.c:670 instrument_copy_to_user include/linux/instrumented.h:121 [inline] copyout lib/iov_iter.c:156 [inline] _copy_to_iter+0x69d/0x25c0 lib/iov_iter.c:670 copy_to_iter include/linux/uio.h:155 [inline] simple_copy_to_iter+0xf3/0x140 net/core/datagram.c:519 __skb_datagram_iter+0x2cb/0x1280 net/core/datagram.c:425 skb_copy_datagram_iter+0xdc/0x270 net/core/datagram.c:533 skb_copy_datagram_msg include/linux/skbuff.h:3657 [inline] netlink_recvmsg+0x660/0x1c60 net/netlink/af_netlink.c:1974 sock_recvmsg_nosec net/socket.c:944 [inline] sock_recvmsg net/socket.c:962 [inline] sock_read_iter+0x5a9/0x630 net/socket.c:1035 call_read_iter include/linux/fs.h:2156 [inline] new_sync_read fs/read_write.c:400 [inline] vfs_read+0x1631/0x1980 fs/read_write.c:481 ksys_read+0x28c/0x520 fs/read_write.c:619 __do_sys_read fs/read_write.c:629 [inline] __se_sys_read fs/read_write.c:627 [inline] __x64_sys_read+0xdb/0x120 fs/read_write.c:627 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_post_alloc_hook mm/slab.h:524 [inline] slab_alloc_node mm/slub.c:3251 [inline] __kmalloc_node_track_caller+0xe0c/0x1510 mm/slub.c:4974 kmalloc_reserve net/core/skbuff.c:354 [inline] __alloc_skb+0x545/0xf90 net/core/skbuff.c:426 alloc_skb include/linux/skbuff.h:1126 [inline] netlink_dump+0x3d5/0x16a0 net/netlink/af_netlink.c:2245 __netlink_dump_start+0xd1c/0xee0 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:254 [inline] inet_diag_handler_cmd+0x2e7/0x400 net/ipv4/inet_diag.c:1343 sock_diag_rcv_msg+0x24a/0x620 netlink_rcv_skb+0x447/0x800 net/netlink/af_netlink.c:2491 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:276 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x1095/0x1360 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x16f3/0x1870 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] sock_write_iter+0x594/0x690 net/socket.c:1057 do_iter_readv_writev+0xa7f/0xc70 do_iter_write+0x52c/0x1500 fs/read_write.c:851 vfs_writev fs/read_write.c:924 [inline] do_writev+0x63f/0xe30 fs/read_write.c:967 __do_sys_writev fs/read_write.c:1040 [inline] __se_sys_writev fs/read_write.c:1037 [inline] __x64_sys_writev+0xe5/0x120 fs/read_write.c:1037 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Bytes 68-71 of 312 are uninitialized Memory access of size 312 starts at ffff88812ab54000 Data copied to user address 0000000020001440 CPU: 1 PID: 6365 Comm: syz-executor801 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 3c4d05c80567 ("inet_diag: Introduce the inet socket dumping routine") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211209185058.53917-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_diag.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c8fa6e7f7d12..581b5b2d72a5 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -261,6 +261,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; + r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, sk_user_ns(NETLINK_CB(cb->skb).sk), @@ -314,9 +315,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { -- cgit From 94f2a444f28a649926c410eb9a38afb13a83ebe0 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 10:57:22 +0100 Subject: net: usb: qmi_wwan: add Telit 0x1070 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit FN990 composition: 0x1070: tty, adb, rmnet, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20211210095722.22269-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 86b814e99224..f510e8219470 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1358,6 +1358,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ -- cgit From ee60e626d536da4c710b3634afe68fe7c6d69b59 Mon Sep 17 00:00:00 2001 From: Filip Pokryvka Date: Fri, 10 Dec 2021 18:50:32 +0100 Subject: netdevsim: don't overwrite read only ethtool parms Ethtool ring feature has _max_pending attributes read-only. Set only read-write attributes in nsim_set_ringparam. This patch is useful, if netdevsim device is set-up using NetworkManager, because NetworkManager sends 0 as MAX values, as it is pointless to retrieve them in extra call, because they should be read-only. Then, the device is left in incosistent state (value > MAX). Fixes: a7fc6db099b5 ("netdevsim: support ethtool ring and coalesce settings") Signed-off-by: Filip Pokryvka Link: https://lore.kernel.org/r/20211210175032.411872-1-fpokryvk@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 0ab6a40be611..a6a713b31aad 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -77,7 +77,10 @@ static int nsim_set_ringparam(struct net_device *dev, { struct netdevsim *ns = netdev_priv(dev); - memcpy(&ns->ethtool.ring, ring, sizeof(ns->ethtool.ring)); + ns->ethtool.ring.rx_pending = ring->rx_pending; + ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending; + ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending; + ns->ethtool.ring.tx_pending = ring->tx_pending; return 0; } -- cgit From 3748939bce3fc7a15ef07161826507fbe410bb7a Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 10 Dec 2021 15:25:23 +0800 Subject: selftests: icmp_redirect: pass xfail=0 to log_test() If any sub-test in this icmp_redirect.sh is failing but not expected to fail. The script will complain: ./icmp_redirect.sh: line 72: [: 1: unary operator expected This is because when the sub-test is not expected to fail, we won't pass any value for the xfail local variable in log_test() and thus it's empty. Fix this by passing 0 as the 4th variable to log_test() for non-xfail cases. v2: added fixes tag Fixes: 0a36a75c6818 ("selftests: icmp_redirect: support expected failures") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/icmp_redirect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index ecbf57f264ed..7b9d6e31b8e7 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -311,7 +311,7 @@ check_exception() ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi - log_test $? 0 "IPv4: ${desc}" + log_test $? 0 "IPv4: ${desc}" 0 # No PMTU info for test "redirect" and "mtu exception plus redirect" if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then -- cgit From 27cbf64a766e86f068ce6214f04c00ceb4db1af4 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Fri, 10 Dec 2021 21:09:33 +0800 Subject: net: hns3: fix use-after-free bug in hclgevf_send_mbx_msg Currently, the hns3_remove function firstly uninstall client instance, and then uninstall acceletion engine device. The netdevice is freed in client instance uninstall process, but acceletion engine device uninstall process still use it to trace runtime information. This causes a use after free problem. So fixes it by check the instance register state to avoid use after free. Fixes: d8355240cf8f ("net: hns3: add trace event support for PF/VF mailbox") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index fdc66fae0960..c5ac6ecf36e1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -114,7 +114,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg)); - trace_hclge_vf_mbx_send(hdev, req); + if (test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state)) + trace_hclge_vf_mbx_send(hdev, req); /* synchronous send */ if (need_resp) { -- cgit From 6dde452bceca3f2ed2b33bc46a16ff5682a03a2e Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Fri, 10 Dec 2021 21:09:34 +0800 Subject: net: hns3: fix race condition in debugfs When multiple threads concurrently access the debugfs content, data and pointer exceptions may occur. Therefore, mutex lock protection is added for debugfs. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 ++ drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 20 ++++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3f7a9a4c59d5..63f5abcc6bf4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -839,6 +839,8 @@ struct hnae3_handle { u8 netdev_flags; struct dentry *hnae3_dbgfs; + /* protects concurrent contention between debugfs commands */ + struct mutex dbgfs_lock; /* Network interface message level enabled bits */ u32 msg_enable; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 081295bff765..c381f8af67f0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1226,6 +1226,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, if (ret) return ret; + mutex_lock(&handle->dbgfs_lock); save_buf = &hns3_dbg_cmd[index].buf; if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) || @@ -1238,15 +1239,15 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, read_buf = *save_buf; } else { read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL); - if (!read_buf) - return -ENOMEM; + if (!read_buf) { + ret = -ENOMEM; + goto out; + } /* save the buffer addr until the last read operation */ *save_buf = read_buf; - } - /* get data ready for the first time to read */ - if (!*ppos) { + /* get data ready for the first time to read */ ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd, read_buf, hns3_dbg_cmd[index].buf_len); if (ret) @@ -1255,8 +1256,10 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, size = simple_read_from_buffer(buffer, count, ppos, read_buf, strlen(read_buf)); - if (size > 0) + if (size > 0) { + mutex_unlock(&handle->dbgfs_lock); return size; + } out: /* free the buffer for the last read operation */ @@ -1265,6 +1268,7 @@ out: *save_buf = NULL; } + mutex_unlock(&handle->dbgfs_lock); return ret; } @@ -1337,6 +1341,8 @@ int hns3_dbg_init(struct hnae3_handle *handle) debugfs_create_dir(hns3_dbg_dentry[i].name, handle->hnae3_dbgfs); + mutex_init(&handle->dbgfs_lock); + for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) { if ((hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES && ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) || @@ -1363,6 +1369,7 @@ int hns3_dbg_init(struct hnae3_handle *handle) return 0; out: + mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; return ret; @@ -1378,6 +1385,7 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) hns3_dbg_cmd[i].buf = NULL; } + mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; } -- cgit From 7e0147592b5c4f9e2eb8c54a7857a56d4863f74e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 10:11:30 -0700 Subject: selftests: Add duplicate config only for MD5 VRF tests Commit referenced below added configuration in the default VRF that duplicates a VRF to check MD5 passwords are properly used and fail when expected. That config should not be added all the time as it can cause tests to pass that should not (by matching on default VRF setup when it should not). Move the duplicate setup to a function that is only called for the MD5 tests and add a cleanup function to remove it after the MD5 tests. Fixes: 5cad8bce26e0 ("fcnal-test: Add TCP MD5 tests for VRF") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 29cc72d7c3d0..dd7437dd2680 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -455,6 +455,22 @@ cleanup() ip netns del ${NSC} >/dev/null 2>&1 } +cleanup_vrf_dup() +{ + ip link del ${NSA_DEV2} >/dev/null 2>&1 + ip netns pids ${NSC} | xargs kill 2>/dev/null + ip netns del ${NSC} >/dev/null 2>&1 +} + +setup_vrf_dup() +{ + # some VRF tests use ns-C which has the same config as + # ns-B but for a device NOT in the VRF + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 +} + setup() { local with_vrf=${1} @@ -484,12 +500,6 @@ setup() ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV} ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} - - # some VRF tests use ns-C which has the same config as - # ns-B but for a device NOT in the VRF - create_ns ${NSC} "-" "-" - connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ - ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 else ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} @@ -1240,7 +1250,9 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv4_tcp_md5 + cleanup_vrf_dup # # enable VRF global server @@ -2719,7 +2731,9 @@ ipv6_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv6_tcp_md5 + cleanup_vrf_dup # # enable VRF global server -- cgit From 0f108ae4452025fef529671998f6c7f1c4526790 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 10:21:08 -0700 Subject: selftests: Fix raw socket bind tests with VRF Commit referenced below added negative socket bind tests for VRF. The socket binds should fail since the address to bind to is in a VRF yet the socket is not bound to the VRF or a device within it. Update the expected return code to check for 1 (bind failure) so the test passes when the bind fails as expected. Add a 'show_hint' comment to explain why the bind is expected to fail. Fixes: 75b2b2b3db4c ("selftests: Add ipv4 address bind tests to fcnal-test") Reported-by: Li Zhijian Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index dd7437dd2680..4340477863d3 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -1810,8 +1810,9 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start + show_hint "Socket not bound to VRF, but address is in VRF" run_cmd nettest -s -R -P icmp -l ${a} -b - log_test_addr ${a} $? 0 "Raw socket bind to local address" + log_test_addr ${a} $? 1 "Raw socket bind to local address" log_start run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b -- cgit From 28a2686c185e84b6aa6a4d9c9a972360eb7ca266 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 11:26:16 -0700 Subject: selftests: Fix IPv6 address bind tests IPv6 allows binding a socket to a device then binding to an address not on the device (__inet6_bind -> ipv6_chk_addr with strict flag not set). Update the bind tests to reflect legacy behavior. Fixes: 34d0302ab861 ("selftests: Add ipv6 address bind tests to fcnal-test") Reported-by: Li Zhijian Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 4340477863d3..ad2982b72e02 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3429,11 +3429,14 @@ ipv6_addr_bind_novrf() run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. So this test passes + # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Should fail with 'Cannot assign requested address'" + show_hint "Tecnically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" + log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } ipv6_addr_bind_vrf() @@ -3474,10 +3477,15 @@ ipv6_addr_bind_vrf() run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not a=${VRF_IP6} log_start + show_hint "Tecnically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start -- cgit From c062f2a0b04d86c5b8c9d973bea43493eaca3d32 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 10 Dec 2021 17:42:47 +0100 Subject: net/sched: sch_ets: don't remove idle classes from the round-robin list Shuang reported that the following script: 1) tc qdisc add dev ddd0 handle 10: parent 1: ets bands 8 strict 4 priomap 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 2) mausezahn ddd0 -A 10.10.10.1 -B 10.10.10.2 -c 0 -a own -b 00:c1:a0:c1:a0:00 -t udp & 3) tc qdisc change dev ddd0 handle 10: ets bands 4 strict 2 quanta 2500 2500 priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 crashes systematically when line 2) is commented: list_del corruption, ffff8e028404bd30->next is LIST_POISON1 (dead000000000100) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:47! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 954 Comm: tc Not tainted 5.16.0-rc4+ #478 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x47 Code: fe ff 0f 0b 48 89 c1 4c 89 c6 48 c7 c7 08 42 1b 87 e8 1d c5 fe ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 98 42 1b 87 e8 09 c5 fe ff <0f> 0b 48 c7 c7 48 43 1b 87 e8 fb c4 fe ff 0f 0b 48 89 f2 48 89 fe RSP: 0018:ffffae46807a3888 EFLAGS: 00010246 RAX: 000000000000004e RBX: 0000000000000007 RCX: 0000000000000202 RDX: 0000000000000000 RSI: ffffffff871ac536 RDI: 00000000ffffffff RBP: ffffae46807a3a10 R08: 0000000000000000 R09: c0000000ffff7fff R10: 0000000000000001 R11: ffffae46807a36a8 R12: ffff8e028404b800 R13: ffff8e028404bd30 R14: dead000000000100 R15: ffff8e02fafa2400 FS: 00007efdc92e4480(0000) GS:ffff8e02fb600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000682f48 CR3: 00000001058be000 CR4: 0000000000350ef0 Call Trace: ets_qdisc_change+0x58b/0xa70 [sch_ets] tc_modify_qdisc+0x323/0x880 rtnetlink_rcv_msg+0x169/0x4a0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x1a5/0x280 netlink_sendmsg+0x257/0x4d0 sock_sendmsg+0x5b/0x60 ____sys_sendmsg+0x1f2/0x260 ___sys_sendmsg+0x7c/0xc0 __sys_sendmsg+0x57/0xa0 do_syscall_64+0x3a/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7efdc8031338 Code: 89 02 48 c7 c0 ff ff ff ff eb b5 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 43 2c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 41 89 d4 55 RSP: 002b:00007ffdf1ce9828 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000061b37a97 RCX: 00007efdc8031338 RDX: 0000000000000000 RSI: 00007ffdf1ce9890 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000078a940 R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000688880 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: sch_ets sch_tbf dummy rfkill iTCO_wdt iTCO_vendor_support intel_rapl_msr intel_rapl_common joydev pcspkr i2c_i801 virtio_balloon i2c_smbus lpc_ich ip_tables xfs libcrc32c crct10dif_pclmul crc32_pclmul crc32c_intel serio_raw ghash_clmulni_intel ahci libahci libata virtio_blk virtio_console virtio_net net_failover failover sunrpc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: sch_ets] ---[ end trace f35878d1912655c2 ]--- RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x47 Code: fe ff 0f 0b 48 89 c1 4c 89 c6 48 c7 c7 08 42 1b 87 e8 1d c5 fe ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 98 42 1b 87 e8 09 c5 fe ff <0f> 0b 48 c7 c7 48 43 1b 87 e8 fb c4 fe ff 0f 0b 48 89 f2 48 89 fe RSP: 0018:ffffae46807a3888 EFLAGS: 00010246 RAX: 000000000000004e RBX: 0000000000000007 RCX: 0000000000000202 RDX: 0000000000000000 RSI: ffffffff871ac536 RDI: 00000000ffffffff RBP: ffffae46807a3a10 R08: 0000000000000000 R09: c0000000ffff7fff R10: 0000000000000001 R11: ffffae46807a36a8 R12: ffff8e028404b800 R13: ffff8e028404bd30 R14: dead000000000100 R15: ffff8e02fafa2400 FS: 00007efdc92e4480(0000) GS:ffff8e02fb600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000682f48 CR3: 00000001058be000 CR4: 0000000000350ef0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x4e00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- we can remove 'q->classes[i].alist' only if DRR class 'i' was part of the active list. In the ETS scheduler DRR classes belong to that list only if the queue length is greater than zero: we need to test for non-zero value of 'q->classes[i].qdisc->q.qlen' before removing from the list, similarly to what has been done elsewhere in the ETS code. Fixes: de6d25924c2a ("net/sched: sch_ets: don't peek at classes beyond 'nbands'") Reported-by: Shuang Li Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_ets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index e007fc75ef2f..d73393493553 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -666,9 +666,9 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } } for (i = q->nbands; i < oldbands; i++) { - qdisc_tree_flush_backlog(q->classes[i].qdisc); - if (i >= q->nstrict) + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del(&q->classes[i].alist); + qdisc_tree_flush_backlog(q->classes[i].qdisc); } q->nstrict = nstrict; memcpy(q->prio2band, priomap, sizeof(priomap)); -- cgit From ab8eb798ddabddb2944401bf31ead9671cb97d95 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sat, 11 Dec 2021 14:01:53 +0000 Subject: net: bcmgenet: Fix NULL vs IS_ERR() checking The phy_attach() function does not return NULL. It returns error pointers. Signed-off-by: Miaoqian Lin Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 5f259641437a..c888ddee1fc4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -589,9 +589,9 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) * Internal or external PHY with MDIO access */ phydev = phy_attach(priv->dev, phy_name, pd->phy_interface); - if (!phydev) { + if (IS_ERR(phydev)) { dev_err(kdev, "failed to register PHY device\n"); - return -ENODEV; + return PTR_ERR(phydev); } } else { /* -- cgit From a8d13611b4a7b1b20d17bf2b9a89a3efcabde56c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 11 Dec 2021 14:30:31 -0500 Subject: selftests/net: toeplitz: fix udp option Tiny fix. Option -u ("use udp") does not take an argument. It can cause the next argument to silently be ignored. Fixes: 5ebfb4cc3048 ("selftests/net: toeplitz test") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/toeplitz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index 710ac956bdb3..c5489341cfb8 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -498,7 +498,7 @@ static void parse_opts(int argc, char **argv) bool have_toeplitz = false; int index, c; - while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) { + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { switch (c) { case '4': cfg_family = AF_INET; -- cgit From 9d591fc028b6bddb38c6585874f331267cbdadae Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Sat, 11 Dec 2021 23:51:41 +0100 Subject: net: dsa: mv88e6xxx: Unforce speed & duplex in mac_link_down() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 64d47d50be7a ("net: dsa: mv88e6xxx: configure interface settings in mac_config") removed forcing of speed and duplex from mv88e6xxx_mac_config(), where the link is forced down, and left it only in mv88e6xxx_mac_link_up(), by which time link is unforced. It seems that (at least on 88E6190) when changing cmode to 2500base-x, if the link is not forced down, but the speed or duplex are still forced, the forcing of new settings for speed & duplex doesn't take in mv88e6xxx_mac_link_up(). Fix this by unforcing speed & duplex in mv88e6xxx_mac_link_down(). Fixes: 64d47d50be7a ("net: dsa: mv88e6xxx: configure interface settings in mac_config") Signed-off-by: Marek Behún Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 14f87f6ac479..cd8462d1e27c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -768,6 +768,10 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, if ((!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) && ops->port_sync_link) err = ops->port_sync_link(chip, port, mode, false); + + if (!err && ops->port_set_speed_duplex) + err = ops->port_set_speed_duplex(chip, port, SPEED_UNFORCED, + DUPLEX_UNFORCED); mv88e6xxx_reg_unlock(chip); if (err) -- cgit From 71da1aec215290e249d09c44c768df859f3a3bba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Dec 2021 16:36:00 +0800 Subject: selftest/net/forwarding: declare NETIFS p9 p10 The recent GRE selftests defined NUM_NETIFS=10. If the users copy forwarding.config.sample to forwarding.config directly, they will get error "Command line is not complete" when run the GRE tests, because create_netif_veth() failed with no interface name defined. Fix it by extending the NETIFS with p9 and p10. Fixes: 2800f2485417 ("selftests: forwarding: Test multipath hashing on inner IP pkts for GRE tunnel") Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/forwarding.config.sample | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index bf17e485684f..b0980a2efa31 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -13,6 +13,8 @@ NETIFS[p5]=veth4 NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +NETIFS[p9]=veth8 +NETIFS[p10]=veth9 # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 -- cgit From be565ec71d1d59438bed0c7ed0a252a327e0b0ef Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 13 Dec 2021 01:44:36 -0800 Subject: net: ethernet: ti: add missing of_node_put before return Fix following coccicheck warning: WARNING: Function "for_each_child_of_node" should have of_node_put() before return. Early exits from for_each_child_of_node should decrement the node reference counter. Signed-off-by: Wang Qing Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c092cb61416a..ffbbda8f4d41 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1844,13 +1844,14 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) if (ret < 0) { dev_err(dev, "%pOF error reading port_id %d\n", port_np, ret); - return ret; + goto of_node_put; } if (!port_id || port_id > common->port_num) { dev_err(dev, "%pOF has invalid port_id %u %s\n", port_np, port_id, port_np->name); - return -EINVAL; + ret = -EINVAL; + goto of_node_put; } port = am65_common_get_port(common, port_id); @@ -1866,8 +1867,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) (AM65_CPSW_NU_FRAM_PORT_OFFSET * (port_id - 1)); port->slave.mac_sl = cpsw_sl_get("am65", dev, port->port_base); - if (IS_ERR(port->slave.mac_sl)) - return PTR_ERR(port->slave.mac_sl); + if (IS_ERR(port->slave.mac_sl)) { + ret = PTR_ERR(port->slave.mac_sl); + goto of_node_put; + } port->disabled = !of_device_is_available(port_np); if (port->disabled) { @@ -1880,7 +1883,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) ret = PTR_ERR(port->slave.ifphy); dev_err(dev, "%pOF error retrieving port phy: %d\n", port_np, ret); - return ret; + goto of_node_put; } port->slave.mac_only = @@ -1889,10 +1892,12 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* get phy/link info */ if (of_phy_is_fixed_link(port_np)) { ret = of_phy_register_fixed_link(port_np); - if (ret) - return dev_err_probe(dev, ret, + if (ret) { + ret = dev_err_probe(dev, ret, "failed to register fixed-link phy %pOF\n", port_np); + goto of_node_put; + } port->slave.phy_node = of_node_get(port_np); } else { port->slave.phy_node = @@ -1902,14 +1907,15 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) if (!port->slave.phy_node) { dev_err(dev, "slave[%d] no phy found\n", port_id); - return -ENODEV; + ret = -ENODEV; + goto of_node_put; } ret = of_get_phy_mode(port_np, &port->slave.phy_if); if (ret) { dev_err(dev, "%pOF read phy-mode err %d\n", port_np, ret); - return ret; + goto of_node_put; } ret = of_get_mac_address(port_np, port->slave.mac_addr); @@ -1932,6 +1938,11 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) } return 0; + +of_node_put: + of_node_put(port_np); + of_node_put(node); + return ret; } static void am65_cpsw_pcpu_stats_free(void *data) -- cgit From d33dae51645c0d837e587000f3131118fcd6bf5e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 13 Dec 2021 11:05:13 +0000 Subject: net: phy: add a note about refcounting Recently, a patch has been submitted to "fix" the refcounting for a DT node in of_mdiobus_link_mdiodev(). This is not a leaked refcount. The refcount is passed to the new device. Sadly, coccicheck identifies this location as a leaked refcount, which means we're likely to keep getting patches to "fix" this. However, fixing this will cause breakage. Add a comment to state that the lack of of_node_put() here is intentional. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c204067f1890..c198722e4871 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -460,6 +460,9 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus, if (addr == mdiodev->addr) { device_set_node(dev, of_fwnode_handle(child)); + /* The refcount on "child" is passed to the mdio + * device. Do _not_ use of_node_put(child) here. + */ return; } } -- cgit From 884d2b845477cd0a18302444dc20fe2d9a01743e Mon Sep 17 00:00:00 2001 From: David Wu Date: Mon, 13 Dec 2021 19:15:15 +0800 Subject: net: stmmac: Add GFP_DMA32 for rx buffers if no 64 capability Use page_pool_alloc_pages instead of page_pool_dev_alloc_pages, which can give the gfp parameter, in the case of not supporting 64-bit width, using 32-bit address memory can reduce a copy from swiotlb. Signed-off-by: David Wu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index da8306f60730..8ded4be08b00 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1461,16 +1461,20 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + if (priv->dma_cap.addr64 <= 32) + gfp |= GFP_DMA32; if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->page) return -ENOMEM; buf->page_offset = stmmac_rx_offset(priv); } if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->sec_page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->sec_page) return -ENOMEM; @@ -4482,6 +4486,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; int dirty = stmmac_rx_dirty(priv, queue); unsigned int entry = rx_q->dirty_rx; + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + if (priv->dma_cap.addr64 <= 32) + gfp |= GFP_DMA32; while (dirty-- > 0) { struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; @@ -4494,13 +4502,13 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) p = rx_q->dma_rx + entry; if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->page) break; } if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->sec_page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->sec_page) break; -- cgit From bc2f39a6252ee40d9bfc2743d4437d420aec5f6e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Nov 2021 11:13:50 +0300 Subject: iavf: missing unlocks in iavf_watchdog_task() This code was re-organized and there some unlocks missing now. Fixes: 898ef1cb1cb2 ("iavf: Combine init and watchdog state machines") Signed-off-by: Dan Carpenter Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index cfdbf8c08d18..884a19c51543 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2046,6 +2046,7 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; + mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); @@ -2076,9 +2077,8 @@ static void iavf_watchdog_task(struct work_struct *work) iavf_detect_recover_hung(&adapter->vsi); break; case __IAVF_REMOVE: - mutex_unlock(&adapter->crit_lock); - return; default: + mutex_unlock(&adapter->crit_lock); return; } -- cgit From fe523d7c9a8332855376ad5eb1aa301091129ba4 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 1 Dec 2021 09:14:34 +0100 Subject: iavf: do not override the adapter state in the watchdog task (again) The watchdog task incorrectly changes the state to __IAVF_RESETTING, instead of letting the reset task take care of that. This was already resolved by commit 22c8fd71d3a5 ("iavf: do not override the adapter state in the watchdog task") but the problem was reintroduced by the recent code refactoring in commit 45eebd62999d ("iavf: Refactor iavf state machine tracking"). Fixes: 45eebd62999d ("iavf: Refactor iavf state machine tracking") Signed-off-by: Stefan Assmann Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 884a19c51543..4e7c04047f91 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2085,7 +2085,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; -- cgit From b0cdc5dbcf2ba0d99785da5aabf1b17943805b8a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 11 Dec 2021 17:11:12 +0100 Subject: mptcp: never allow the PM to close a listener subflow Currently, when deleting an endpoint the netlink PM treverses all the local MPTCP sockets, regardless of their status. If an MPTCP listener socket is bound to the IP matching the delete endpoint, the listener TCP socket will be closed. That is unexpected, the PM should only affect data subflows. Additionally, syzbot was able to trigger a NULL ptr dereference due to the above: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 1 PID: 6550 Comm: syz-executor122 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__lock_acquire+0xd7d/0x54a0 kernel/locking/lockdep.c:4897 Code: 0f 0e 41 be 01 00 00 00 0f 86 c8 00 00 00 89 05 69 cc 0f 0e e9 bd 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 da 48 c1 ea 03 <80> 3c 02 00 0f 85 f3 2f 00 00 48 81 3b 20 75 17 8f 0f 84 52 f3 ff RSP: 0018:ffffc90001f2f818 EFLAGS: 00010016 RAX: dffffc0000000000 RBX: 0000000000000018 RCX: 0000000000000000 RDX: 0000000000000003 RSI: 0000000000000000 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000000a R12: 0000000000000000 R13: ffff88801b98d700 R14: 0000000000000000 R15: 0000000000000001 FS: 00007f177cd3d700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f177cd1b268 CR3: 000000001dd55000 CR4: 0000000000350ee0 Call Trace: lock_acquire kernel/locking/lockdep.c:5637 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5602 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x39/0x50 kernel/locking/spinlock.c:162 finish_wait+0xc0/0x270 kernel/sched/wait.c:400 inet_csk_wait_for_connect net/ipv4/inet_connection_sock.c:464 [inline] inet_csk_accept+0x7de/0x9d0 net/ipv4/inet_connection_sock.c:497 mptcp_accept+0xe5/0x500 net/mptcp/protocol.c:2865 inet_accept+0xe4/0x7b0 net/ipv4/af_inet.c:739 mptcp_stream_accept+0x2e7/0x10e0 net/mptcp/protocol.c:3345 do_accept+0x382/0x510 net/socket.c:1773 __sys_accept4_file+0x7e/0xe0 net/socket.c:1816 __sys_accept4+0xb0/0x100 net/socket.c:1846 __do_sys_accept net/socket.c:1864 [inline] __se_sys_accept net/socket.c:1861 [inline] __x64_sys_accept+0x71/0xb0 net/socket.c:1861 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f177cd8b8e9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f177cd3d308 EFLAGS: 00000246 ORIG_RAX: 000000000000002b RAX: ffffffffffffffda RBX: 00007f177ce13408 RCX: 00007f177cd8b8e9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f177ce13400 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f177ce1340c R13: 00007f177cde1004 R14: 6d705f706374706d R15: 0000000000022000 Fix the issue explicitly skipping MPTCP socket in TCP_LISTEN status. Reported-and-tested-by: syzbot+e4d843bb96a9431e6331@syzkaller.appspotmail.com Reviewed-by: Mat Martineau Fixes: 740d798e8767 ("mptcp: remove id 0 address") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/ebc7594cdd420d241fb2172ddb8542ba64717657.1639238695.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7b96be1e9f14..f523051f5aef 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -700,6 +700,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, msk_owned_by_me(msk); + if (sk->sk_state == TCP_LISTEN) + return; + if (!rm_list->nr) return; -- cgit From 1fe98f5690c4219d419ea9cc190f94b3401cf324 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 2 Dec 2021 13:45:33 +0100 Subject: mac80211: send ADDBA requests using the tid/queue of the aggregation session Sending them out on a different queue can cause a race condition where a number of packets in the queue may be discarded by the receiver, because the ADDBA request is sent too early. This affects any driver with software A-MPDU setup which does not allocate packet seqno in hardware on tx, regardless of whether iTXQ is used or not. The only driver I've seen that explicitly deals with this issue internally is mwl8k. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211202124533.80388-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c1558dd2d244..58761ca7da3c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) -- cgit From 37d33114240ede043c42463a6347f68ed72d6904 Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Wed, 1 Dec 2021 13:49:18 +0100 Subject: nl80211: remove reload flag from regulatory_request This removes the previously unused reload flag, which was introduced in 1eda919126b4. The request is handled as NL80211_REGDOM_SET_BY_CORE, which is parsed unconditionally. Reported-by: kernel test robot Reported-by: Nathan Chancellor Fixes: 1eda919126b4 ("nl80211: reset regdom when reloading regdb") Link: https://lore.kernel.org/all/YaZuKYM5bfWe2Urn@archlinux-ax161/ Signed-off-by: Finn Behrens Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/YadvTolO8rQcNCd/@gimli.kloenk.dev Signed-off-by: Johannes Berg --- include/net/regulatory.h | 1 - net/wireless/reg.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 0cf9335431e0..47f06f6f5a67 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -83,7 +83,6 @@ struct regulatory_request { enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; - bool reload; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 61f1bf1bc4a7..8148a3b5f607 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1134,9 +1134,8 @@ int reg_reload_regdb(void) request->wiphy_idx = WIPHY_IDX_INVALID; request->alpha2[0] = current_regdomain->alpha2[0]; request->alpha2[1] = current_regdomain->alpha2[1]; - request->initiator = NL80211_USER_REG_HINT_USER; + request->initiator = NL80211_REGDOM_SET_BY_CORE; request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; - request->reload = true; reg_process_hint(request); @@ -2712,8 +2711,7 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - (treatment == REG_REQ_ALREADY_SET && - !user_request->reload)) + treatment == REG_REQ_ALREADY_SET) return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; -- cgit From 06c41bda0ea14aa7fba932a9613c4ee239682cf0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 2 Dec 2021 15:26:25 +0200 Subject: mac80211: agg-tx: don't schedule_and_wake_txq() under sta->lock When we call ieee80211_agg_start_txq(), that will in turn call schedule_and_wake_txq(). Called from ieee80211_stop_tx_ba_cb() this is done under sta->lock, which leads to certain circular lock dependencies, as reported by Chris Murphy: https://lore.kernel.org/r/CAJCQCtSXJ5qA4bqSPY=oLRMbv-irihVvP7A2uGutEbXQVkoNaw@mail.gmail.com In general, ieee80211_agg_start_txq() is usually not called with sta->lock held, only in this one place. But it's always called with sta->ampdu_mlme.mtx held, and that's therefore clearly sufficient. Change ieee80211_stop_tx_ba_cb() to also call it without the sta->lock held, by factoring it out of ieee80211_remove_tid_tx() (which is only called in this one place). This breaks the locking chain and makes it less likely that we'll have similar locking chain problems in the future. Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Reported-by: Chris Murphy Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202152554.f519884c8784.I555fef8e67d93fff3d9a304886c4a9f8b322e591@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 58761ca7da3c..74a878f213d3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2020 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation */ #include @@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (!txq) return; @@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); - ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -889,6 +890,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; + bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); @@ -906,10 +908,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); + start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); + if (start_txq) + ieee80211_agg_start_txq(sta, tid, false); + if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); -- cgit From e08ebd6d7b90ae81f21425ca39136f5b2272580f Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 2 Dec 2021 15:28:54 +0200 Subject: cfg80211: Acquire wiphy mutex on regulatory work The function cfg80211_reg_can_beacon_relax() expects wiphy mutex to be held when it is being called. However, when reg_leave_invalid_chans() is called the mutex is not held. Fix it by acquiring the lock before calling the function. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202152831.527686cda037.I40ad9372a47cbad53b4aae7b5a6ccc0dc3fddf8b@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8148a3b5f607..f8f01a3e020b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2359,6 +2359,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) struct cfg80211_chan_def chandef = {}; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; + bool ret; wdev_lock(wdev); iftype = wdev->iftype; @@ -2408,7 +2409,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: - return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); + wiphy_lock(wiphy); + ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); + wiphy_unlock(wiphy); + + return ret; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: return cfg80211_chandef_usable(wiphy, &chandef, -- cgit From 768c0b19b50665e337c96858aa2b7928d6dcf756 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 11 Dec 2021 20:10:24 +0100 Subject: mac80211: validate extended element ID is present Before attempting to parse an extended element, verify that the extended element ID is present. Fixes: 41cbb0f5a295 ("mac80211: add support for HE") Reported-by: syzbot+59bdff68edce82e393b6@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20211211201023.f30a1b128c07.I5cacc176da94ba316877c6e10fe3ceec8b4dbd7d@changeid Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 43df2f0c5db9..6c2934854d3c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -943,7 +943,12 @@ static void ieee80211_parse_extension_element(u32 *crc, struct ieee802_11_elems *elems) { const void *data = elem->data + 1; - u8 len = elem->datalen - 1; + u8 len; + + if (!elem->datalen) + return; + + len = elem->datalen - 1; switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: -- cgit From 511ab0c1dfb260a6b17b8771109e8d63474473a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:46 +0200 Subject: mac80211: fix lookup when adding AddBA extension element We should be doing the HE capabilities lookup based on the full interface type so if P2P doesn't have HE but client has it doesn't get confused. Fix that. Fixes: 2ab45876756f ("mac80211: add support for the ADDBA extension element") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.010fc1d61137.If3a468145f29d670cb00a693bed559d8290ba693@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 470ff0ce3dc7..7d2925bb966e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ /** @@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, sband = ieee80211_get_sband(sdata); if (!sband) return; - he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap) return; -- cgit From f22d981386d12d1513bd2720fb4387b469124d4b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:45 +0200 Subject: mac80211: Fix the size used for building probe request Instead of using the hard-coded value of '100' use the correct scan IEs length as calculated during HW registration to mac80211. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.0a82d6891719.I8ded1f2e0bccb9e71222c945666bcd86537f2e35@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6c2934854d3c..fe2903b84ceb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2068,7 +2068,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, chandef.chan = chan; skb = ieee80211_probereq_get(&local->hw, src, ssid, ssid_len, - 100 + ie_len); + local->scan_ies_len + ie_len); if (!skb) return NULL; -- cgit From 4dde3c3627b52ca515a34f6f4de3898224aa1dd3 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Mon, 29 Nov 2021 15:32:42 +0200 Subject: mac80211: update channel context before station state Currently channel context is updated only after station got an update about new assoc state, this results in station using the old channel context. Fix this by moving the update channel context before updating station, enabling the driver to immediately use the updated channel context in the new assoc state. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.1c80c17ffd8a.I94ae31378b363c1182cfdca46c4b7e7165cff984@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 840ad1a860fa..537535a88990 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -667,6 +667,15 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) list_add_tail_rcu(&sta->list, &local->sta_list); + /* update channel context before notifying the driver about state + * change, this enables driver using the updated channel context right away. + */ + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* notify driver */ err = sta_info_insert_drv_state(local, sdata, sta); if (err) @@ -674,12 +683,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_INSERTED); - if (sta->sta_state >= IEEE80211_STA_ASSOC) { - ieee80211_recalc_min_chandef(sta->sdata); - if (!sta->sta.support_p2p_ps) - ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); - } - /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); -- cgit From db7205af049d230e7e0abf61c1e74c1aab40f390 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:39 +0200 Subject: mac80211: mark TX-during-stop for TX in in_reconfig Mark TXQs as having seen transmit while they were stopped if we bail out of drv_wake_tx_queue() due to reconfig, so that the queue wake after this will make them catch up. This is particularly necessary for when TXQs are used for management packets since those TXQs won't see a lot of traffic that'd make them catch up later. Cc: stable@vger.kernel.org Fixes: 4856bfd23098 ("mac80211: do not call driver wake_tx_queue op during reconfig") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.4573a221c0e1.I0d1d5daea3089be3fc0dccc92991b0f8c5677f0c@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index cd3731cbf6c6..c336267f4599 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1219,8 +1219,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); - if (local->in_reconfig) + /* In reconfig don't transmit now, but mark for waking later */ + if (local->in_reconfig) { + set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); return; + } if (!check_sdata_in_driver(sdata)) return; -- cgit From 13dee10b30c058ee2c58c5da00339cc0d4201aa6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:40 +0200 Subject: mac80211: do drv_reconfig_complete() before restarting all When we reconfigure, the driver might do some things to complete the reconfiguration. It's strange and could be broken in some cases because we restart other works (e.g. remain-on-channel and TX) before this happens, yet only start queues later. Change this to do the reconfig complete when reconfiguration is actually complete, not when we've already started doing other things again. For iwlwifi, this should fix a race where the reconfig can race with TX, for ath10k and ath11k that also use this it won't make a difference because they just start queues there, and mac80211 also stopped the queues and will restart them later as before. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.cab99f22fe19.Iefe494687f15fd85f77c1b989d1149c8efdfdc36@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fe2903b84ceb..0e4e1956bcea 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2651,6 +2651,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } + /* + * If this is for hw restart things are still running. + * We may want to change that later, however. + */ + if (local->open_count && (!suspended || reconfig_due_to_wowlan)) + drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); + if (local->in_reconfig) { local->in_reconfig = false; barrier(); @@ -2669,13 +2676,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) IEEE80211_QUEUE_STOP_REASON_SUSPEND, false); - /* - * If this is for hw restart things are still running. - * We may want to change that later, however. - */ - if (local->open_count && (!suspended || reconfig_due_to_wowlan)) - drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); - if (!suspended) return 0; -- cgit From aeb7c75cb77478fdbf821628e9c95c4baa9adc63 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Sat, 11 Dec 2021 22:51:34 +0800 Subject: net: stmmac: fix tc flower deletion for VLAN priority Rx steering To replicate the issue:- 1) Add 1 flower filter for VLAN Priority based frame steering:- $ IFDEVNAME=eth0 $ tc qdisc add dev $IFDEVNAME ingress $ tc qdisc add dev $IFDEVNAME root mqprio num_tc 8 \ map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0 $ tc filter add dev $IFDEVNAME parent ffff: protocol 802.1Q \ flower vlan_prio 0 hw_tc 0 2) Get the 'pref' id $ tc filter show dev $IFDEVNAME ingress 3) Delete a specific tc flower record (say pref 49151) $ tc filter del dev $IFDEVNAME parent ffff: pref 49151 From dmesg, we will observe kernel NULL pointer ooops [ 197.170464] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 197.171367] #PF: supervisor read access in kernel mode [ 197.171367] #PF: error_code(0x0000) - not-present page [ 197.171367] PGD 0 P4D 0 [ 197.171367] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 197.171367] RIP: 0010:tc_setup_cls+0x20b/0x4a0 [stmmac] [ 197.171367] Call Trace: [ 197.171367] [ 197.171367] ? __stmmac_disable_all_queues+0xa8/0xe0 [stmmac] [ 197.171367] stmmac_setup_tc_block_cb+0x70/0x110 [stmmac] [ 197.171367] tc_setup_cb_destroy+0xb3/0x180 [ 197.171367] fl_hw_destroy_filter+0x94/0xc0 [cls_flower] The above issue is due to previous incorrect implementation of tc_del_vlan_flow(), shown below, that uses flow_cls_offload_flow_rule() to get struct flow_rule *rule which is no longer valid for tc filter delete operation. struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; So, to ensure tc_del_vlan_flow() deletes the right VLAN cls record for earlier configured RX queue (configured by hw_tc) in tc_add_vlan_flow(), this patch introduces stmmac_rfs_entry as driver-side flow_cls_offload record for 'RX frame steering' tc flower, currently used for VLAN priority. The implementation has taken consideration for future extension to include other type RX frame steering such as EtherType based. v2: - Clean up overly extensive backtrace and rewrite git message to better explain the kernel NULL pointer issue. Fixes: 0e039f5cf86c ("net: stmmac: add RX frame steering based on VLAN priority in tc flower") Tested-by: Kurt Kanzenbach Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 17 +++++ drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 86 +++++++++++++++++++++---- 2 files changed, 90 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 5f129733aabd..873b9e3e5da2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -172,6 +172,19 @@ struct stmmac_flow_entry { int is_l4; }; +/* Rx Frame Steering */ +enum stmmac_rfs_type { + STMMAC_RFS_T_VLAN, + STMMAC_RFS_T_MAX, +}; + +struct stmmac_rfs_entry { + unsigned long cookie; + int in_use; + int type; + int tc; +}; + struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames[MTL_MAX_TX_QUEUES]; @@ -289,6 +302,10 @@ struct stmmac_priv { struct stmmac_tc_entry *tc_entries; unsigned int flow_entries_max; struct stmmac_flow_entry *flow_entries; + unsigned int rfs_entries_max[STMMAC_RFS_T_MAX]; + unsigned int rfs_entries_cnt[STMMAC_RFS_T_MAX]; + unsigned int rfs_entries_total; + struct stmmac_rfs_entry *rfs_entries; /* Pulse Per Second output */ struct stmmac_pps_cfg pps[STMMAC_PPS_MAX]; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1c4ea0b1b845..d0a2b289f460 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -232,11 +232,33 @@ static int tc_setup_cls_u32(struct stmmac_priv *priv, } } +static int tc_rfs_init(struct stmmac_priv *priv) +{ + int i; + + priv->rfs_entries_max[STMMAC_RFS_T_VLAN] = 8; + + for (i = 0; i < STMMAC_RFS_T_MAX; i++) + priv->rfs_entries_total += priv->rfs_entries_max[i]; + + priv->rfs_entries = devm_kcalloc(priv->device, + priv->rfs_entries_total, + sizeof(*priv->rfs_entries), + GFP_KERNEL); + if (!priv->rfs_entries) + return -ENOMEM; + + dev_info(priv->device, "Enabled RFS Flow TC (entries=%d)\n", + priv->rfs_entries_total); + + return 0; +} + static int tc_init(struct stmmac_priv *priv) { struct dma_features *dma_cap = &priv->dma_cap; unsigned int count; - int i; + int ret, i; if (dma_cap->l3l4fnum) { priv->flow_entries_max = dma_cap->l3l4fnum; @@ -250,10 +272,14 @@ static int tc_init(struct stmmac_priv *priv) for (i = 0; i < priv->flow_entries_max; i++) priv->flow_entries[i].idx = i; - dev_info(priv->device, "Enabled Flow TC (entries=%d)\n", + dev_info(priv->device, "Enabled L3L4 Flow TC (entries=%d)\n", priv->flow_entries_max); } + ret = tc_rfs_init(priv); + if (ret) + return -ENOMEM; + if (!priv->plat->fpe_cfg) { priv->plat->fpe_cfg = devm_kzalloc(priv->device, sizeof(*priv->plat->fpe_cfg), @@ -607,16 +633,45 @@ static int tc_del_flow(struct stmmac_priv *priv, return ret; } +static struct stmmac_rfs_entry *tc_find_rfs(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + bool get_free) +{ + int i; + + for (i = 0; i < priv->rfs_entries_total; i++) { + struct stmmac_rfs_entry *entry = &priv->rfs_entries[i]; + + if (entry->cookie == cls->cookie) + return entry; + if (get_free && entry->in_use == false) + return entry; + } + + return NULL; +} + #define VLAN_PRIO_FULL_MASK (0x07) static int tc_add_vlan_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls) { + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; int tc = tc_classid_to_hwtc(priv->dev, cls->classid); struct flow_match_vlan match; + if (!entry) { + entry = tc_find_rfs(priv, cls, true); + if (!entry) + return -ENOENT; + } + + if (priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN] >= + priv->rfs_entries_max[STMMAC_RFS_T_VLAN]) + return -ENOENT; + /* Nothing to do here */ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) return -EINVAL; @@ -638,6 +693,12 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv, prio = BIT(match.key->vlan_priority); stmmac_rx_queue_prio(priv, priv->hw, prio, tc); + + entry->in_use = true; + entry->cookie = cls->cookie; + entry->tc = tc; + entry->type = STMMAC_RFS_T_VLAN; + priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]++; } return 0; @@ -646,20 +707,19 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv, static int tc_del_vlan_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls) { - struct flow_rule *rule = flow_cls_offload_flow_rule(cls); - struct flow_dissector *dissector = rule->match.dissector; - int tc = tc_classid_to_hwtc(priv->dev, cls->classid); + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); - /* Nothing to do here */ - if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) - return -EINVAL; + if (!entry || !entry->in_use || entry->type != STMMAC_RFS_T_VLAN) + return -ENOENT; - if (tc < 0) { - netdev_err(priv->dev, "Invalid traffic class\n"); - return -EINVAL; - } + stmmac_rx_queue_prio(priv, priv->hw, 0, entry->tc); + + entry->in_use = false; + entry->cookie = 0; + entry->tc = 0; + entry->type = 0; - stmmac_rx_queue_prio(priv, priv->hw, 0, tc); + priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]--; return 0; } -- cgit From 166b6a46b78bf8b9559a6620c3032f9fe492e082 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Mon, 13 Dec 2021 15:46:04 +0100 Subject: flow_offload: return EOPNOTSUPP for the unsupported mpls action type We need to return EOPNOTSUPP for the unsupported mpls action type when setup the flow action. In the original implement, we will return 0 for the unsupported mpls action type, actually we do not setup it and the following actions to the flow action entry. Fixes: 9838b20a7fb2 ("net: sched: take rtnl lock in tc_setup_flow_action()") Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ef8f5a6205a..e54f0a42270c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3687,6 +3687,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: + err = -EOPNOTSUPP; goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { -- cgit From 5f9562ebe710c307adc5f666bf1a2162ee7977c0 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Tue, 14 Dec 2021 18:46:59 +0800 Subject: rds: memory leak in __rds_conn_create() __rds_conn_create() did not release conn->c_path when loop_trans != 0 and trans->t_prefer_loopback != 0 and is_outgoing == 0. Fixes: aced3ce57cd3 ("RDS tcp loopback connection can hang") Signed-off-by: Hangyu Hua Reviewed-by: Sharath Srinivasan Signed-off-by: David S. Miller --- net/rds/connection.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/connection.c b/net/rds/connection.c index a3bc4b54d491..b4cc699c5fad 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,6 +253,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, * should end up here, but if it * does, reset/destroy the connection. */ + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(-EOPNOTSUPP); goto out; -- cgit From b442f2ea84624873d538e4c5986d7c0d40883a47 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 14 Dec 2021 12:21:36 +0200 Subject: mlxsw: spectrum_router: Consolidate MAC profiles when possible Currently, when setting a router interface (RIF) MAC address while the MAC profile is not shared with other RIFs, the profile is edited so that the new MAC address is assigned to it. This does not take into account a situation in which the new MAC address already matches an existing MAC profile. In that situation, two MAC profiles will be occupied even though they hold MAC addresses from the same profile. In order to prevent that, add a check to ensure that editing a MAC profile takes place only when the new MAC address does not match an existing profile. Fixes: 605d25cd782a6 ("mlxsw: spectrum_router: Add RIF MAC profiles support") Reported-by: Maksym Yaremchuk Tested-by: Maksym Yaremchuk Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 217e3b351dfe..c34833ff1dde 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8494,7 +8494,8 @@ mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp, u8 mac_profile; int err; - if (!mlxsw_sp_rif_mac_profile_is_shared(rif)) + if (!mlxsw_sp_rif_mac_profile_is_shared(rif) && + !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac)) return mlxsw_sp_rif_mac_profile_edit(rif, new_mac); err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac, -- cgit From 20617717cd219d3c1f798cd13dbce1bcd86a6ece Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 14 Dec 2021 12:21:37 +0200 Subject: selftests: mlxsw: Add a test case for MAC profiles consolidation Add a test case to cover the bug fixed by the previous patch. Edit the MAC address of one netdev so that it matches the MAC address of the second netdev. Verify that the two MAC profiles were consolidated by testing that the MAC profiles occupancy decreased by one. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/rif_mac_profiles_occ.sh | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh index b513f64d9092..026a126f584d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh @@ -72,6 +72,35 @@ rif_mac_profile_replacement_test() ip link set $h1.10 address $h1_10_mac } +rif_mac_profile_consolidation_test() +{ + local count=$1; shift + local h1_20_mac + + RET=0 + + if [[ $count -eq 1 ]]; then + return + fi + + h1_20_mac=$(mac_get $h1.20) + + # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are + # using the same MAC profile. + ip link set $h1.20 address 00:11:11:11:11:11 + check_err $? + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]') + + [[ $occ -eq $((count - 1)) ]] + check_err $? "MAC profile occupancy did not decrease" + + log_test "RIF MAC profile consolidation" + + ip link set $h1.20 address $h1_20_mac +} + rif_mac_profile_shared_replacement_test() { local count=$1; shift @@ -104,6 +133,7 @@ rif_mac_profile_edit_test() create_max_rif_mac_profiles $count rif_mac_profile_replacement_test + rif_mac_profile_consolidation_test $count rif_mac_profile_shared_replacement_test $count } -- cgit From bd0687c18e635b63233dc87f38058cd728802ab4 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 14 Dec 2021 11:26:07 +0100 Subject: xsk: Do not sleep in poll() when need_wakeup set Do not sleep in poll() when the need_wakeup flag is set. When this flag is set, the application needs to explicitly wake up the driver with a syscall (poll, recvmsg, sendmsg, etc.) to guarantee that Rx and/or Tx processing will be processed promptly. But the current code in poll(), sleeps first then wakes up the driver. This means that no driver processing will occur (baring any interrupts) until the timeout has expired. Fix this by checking the need_wakeup flag first and if set, wake the driver and return to the application. Only if need_wakeup is not set should the process sleep if there is a timeout set in the poll() call. Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Reported-by: Keith Wiles Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20211214102607.7677-1-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f16074eb53c7..7a466ea962c5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -677,8 +677,6 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; - sock_poll_wait(file, sock, wait); - if (unlikely(!xsk_is_bound(xs))) return mask; @@ -690,6 +688,8 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); + } else { + sock_poll_wait(file, sock, wait); } if (xs->rx && !xskq_prod_is_empty(xs->rx)) -- cgit From f7abc4c8df8c7930d0b9c56d9abee9a1fca635e9 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 14 Dec 2021 07:18:00 +0530 Subject: selftests/bpf: Fix OOB write in test_verifier The commit referenced below added fixup_map_timer support (to create a BPF map containing timers), but failed to increase the size of the map_fds array, leading to out of bounds write. Fix this by changing MAX_NR_MAPS to 22. Fixes: e60e6962c503 ("selftests/bpf: Add tests for restricted helpers") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211214014800.78762-1-memxor@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 465ef3f112c0..d3bf83d5c6cf 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -54,7 +54,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 21 +#define MAX_NR_MAPS 22 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 -- cgit From 0013881c1145d36bf26165bb70fdd7560a5507a3 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Thu, 4 Nov 2021 14:52:11 +0100 Subject: ice: Use div64_u64 instead of div_u64 in adjfine Change the division in ice_ptp_adjfine from div_u64 to div64_u64. div_u64 is used when the divisor is 32 bit but in this case incval is 64 bit and it caused incorrect calculations and incval adjustments. Fixes: 06c16d89d2cb ("ice: register 1588 PTP clock device object for E810 devices") Signed-off-by: Karol Kolacinski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index bf7247c6f58e..ad7cabe7932f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -705,7 +705,7 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) scaled_ppm = -scaled_ppm; } - while ((u64)scaled_ppm > div_u64(U64_MAX, incval)) { + while ((u64)scaled_ppm > div64_u64(U64_MAX, incval)) { /* handle overflow by scaling down the scaled_ppm and * the divisor, losing some precision */ -- cgit From 37e738b6fdb14529534dca441e0222313688fde3 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Tue, 16 Nov 2021 13:07:14 +0100 Subject: ice: Don't put stale timestamps in the skb The driver has to check if it does not accidentally put the timestamp in the SKB before previous timestamp gets overwritten. Timestamp values in the PHY are read only and do not get cleared except at hardware reset or when a new timestamp value is captured. The cached_tstamp field is used to detect the case where a new timestamp has not yet been captured, ensuring that we avoid sending stale timestamp data to the stack. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Karol Kolacinski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 11 ++++------- drivers/net/ethernet/intel/ice/ice_ptp.h | 6 ++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ad7cabe7932f..442b031b0edc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1540,19 +1540,16 @@ static void ice_ptp_tx_tstamp_work(struct kthread_work *work) if (err) continue; - /* Check if the timestamp is valid */ - if (!(raw_tstamp & ICE_PTP_TS_VALID)) + /* Check if the timestamp is invalid or stale */ + if (!(raw_tstamp & ICE_PTP_TS_VALID) || + raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* clear the timestamp register, so that it won't show valid - * again when re-used. - */ - ice_clear_phy_tstamp(hw, tx->quad, phy_idx); - /* The timestamp is valid, so we'll go ahead and clear this * index and then send the timestamp up to the stack. */ spin_lock(&tx->lock); + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index f71ad317d6c8..53c15fc9d996 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -55,15 +55,21 @@ struct ice_perout_channel { * struct ice_tx_tstamp - Tracking for a single Tx timestamp * @skb: pointer to the SKB for this timestamp request * @start: jiffies when the timestamp was first requested + * @cached_tstamp: last read timestamp * * This structure tracks a single timestamp request. The SKB pointer is * provided when initiating a request. The start time is used to ensure that * we discard old requests that were not fulfilled within a 2 second time * window. + * Timestamp values in the PHY are read only and do not get cleared except at + * hardware reset or when a new timestamp value is captured. The cached_tstamp + * field is used to detect the case where a new timestamp has not yet been + * captured, ensuring that we avoid sending stale timestamp data to the stack. */ struct ice_tx_tstamp { struct sk_buff *skb; unsigned long start; + u64 cached_tstamp; }; /** -- cgit From 404cd9a22150f24acf23a8df2ad0c094ba379f57 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Dec 2021 15:16:01 -0800 Subject: mptcp: remove tcp ulp setsockopt support TCP_ULP setsockopt cannot be used for mptcp because its already used internally to plumb subflow (tcp) sockets to the mptcp layer. syzbot managed to trigger a crash for mptcp connections that are in fallback mode: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] CPU: 1 PID: 1083 Comm: syz-executor.3 Not tainted 5.16.0-rc2-syzkaller #0 RIP: 0010:tls_build_proto net/tls/tls_main.c:776 [inline] [..] __tcp_set_ulp net/ipv4/tcp_ulp.c:139 [inline] tcp_set_ulp+0x428/0x4c0 net/ipv4/tcp_ulp.c:160 do_tcp_setsockopt+0x455/0x37c0 net/ipv4/tcp.c:3391 mptcp_setsockopt+0x1b47/0x2400 net/mptcp/sockopt.c:638 Remove support for TCP_ULP setsockopt. Fixes: d9e4c1291810 ("mptcp: only admit explicitly supported sockopt") Reported-by: syzbot+1fd9b69cde42967d1add@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0f1e661c2032..f8efd478ac97 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -525,7 +525,6 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NODELAY: case TCP_THIN_LINEAR_TIMEOUTS: case TCP_CONGESTION: - case TCP_ULP: case TCP_CORK: case TCP_KEEPIDLE: case TCP_KEEPINTVL: -- cgit From d6692b3b97bdc165d150f4c1505751a323a80717 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Dec 2021 15:16:02 -0800 Subject: mptcp: clear 'kern' flag from fallback sockets The mptcp ULP extension relies on sk->sk_sock_kern being set correctly: It prevents setsockopt(fd, IPPROTO_TCP, TCP_ULP, "mptcp", 6); from working for plain tcp sockets (any userspace-exposed socket). But in case of fallback, accept() can return a plain tcp sk. In such case, sk is still tagged as 'kernel' and setsockopt will work. This will crash the kernel, The subflow extension has a NULL ctx->conn mptcp socket: BUG: KASAN: null-ptr-deref in subflow_data_ready+0x181/0x2b0 Call Trace: tcp_data_ready+0xf8/0x370 [..] Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c82a76d2d0bf..6dc1ff07994c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2879,7 +2879,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, */ if (WARN_ON_ONCE(!new_mptcp_sock)) { tcp_sk(newsk)->is_mptcp = 0; - return newsk; + goto out; } /* acquire the 2nd reference for the owning socket */ @@ -2891,6 +2891,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } +out: + newsk->sk_kern_sock = kern; return newsk; } -- cgit From 3d79e3756ca90f7a6087b77b62c1d9c0801e0820 Mon Sep 17 00:00:00 2001 From: Maxim Galaganov Date: Tue, 14 Dec 2021 15:16:03 -0800 Subject: mptcp: fix deadlock in __mptcp_push_pending() __mptcp_push_pending() may call mptcp_flush_join_list() with subflow socket lock held. If such call hits mptcp_sockopt_sync_all() then subsequently __mptcp_sockopt_sync() could try to lock the subflow socket for itself, causing a deadlock. sysrq: Show Blocked State task:ss-server state:D stack: 0 pid: 938 ppid: 1 flags:0x00000000 Call Trace: __schedule+0x2d6/0x10c0 ? __mod_memcg_state+0x4d/0x70 ? csum_partial+0xd/0x20 ? _raw_spin_lock_irqsave+0x26/0x50 schedule+0x4e/0xc0 __lock_sock+0x69/0x90 ? do_wait_intr_irq+0xa0/0xa0 __lock_sock_fast+0x35/0x50 mptcp_sockopt_sync_all+0x38/0xc0 __mptcp_push_pending+0x105/0x200 mptcp_sendmsg+0x466/0x490 sock_sendmsg+0x57/0x60 __sys_sendto+0xf0/0x160 ? do_wait_intr_irq+0xa0/0xa0 ? fpregs_restore_userregs+0x12/0xd0 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f9ba546c2d0 RSP: 002b:00007ffdc3b762d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f9ba56c8060 RCX: 00007f9ba546c2d0 RDX: 000000000000077a RSI: 0000000000e5e180 RDI: 0000000000000234 RBP: 0000000000cc57f0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9ba56c8060 R13: 0000000000b6ba60 R14: 0000000000cc7840 R15: 41d8685b1d7901b8 Fix the issue by using __mptcp_flush_join_list() instead of plain mptcp_flush_join_list() inside __mptcp_push_pending(), as suggested by Florian. The sockopt sync will be deferred to the workqueue. Fixes: 1b3e7ede1365 ("mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/244 Suggested-by: Florian Westphal Reviewed-by: Florian Westphal Signed-off-by: Maxim Galaganov Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6dc1ff07994c..54613f5b7521 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1524,7 +1524,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) int ret = 0; prev_ssk = ssk; - mptcp_flush_join_list(msk); + __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); /* First check. If the ssk has changed since -- cgit From 6813b1928758ce64fabbb8ef157f994b7c2235fa Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 14 Dec 2021 15:16:04 -0800 Subject: mptcp: add missing documented NL params 'loc_id' and 'rem_id' are set in all events linked to subflows but those were missing in the events description in the comments. Fixes: b911c97c7dc7 ("mptcp: add netlink event support") Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index c8cc46f80a16..f106a3941cdf 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -136,19 +136,21 @@ struct mptcp_info { * MPTCP_EVENT_REMOVED: token, rem_id * An address has been lost by the peer. * - * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, - * daddr4 | daddr6, sport, dport, backup, - * if_idx [, error] + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, + * saddr4 | saddr6, daddr4 | daddr6, sport, + * dport, backup, if_idx [, error] * A new subflow has been established. 'error' should not be set. * - * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] + * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] * A subflow has been closed. An error (copy of sk_err) could be set if an * error has been detected for this subflow. * - * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] - * The priority of a subflow has changed. 'error' should not be set. + * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] + * The priority of a subflow has changed. 'error' should not be set. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC = 0, -- cgit From 7d3baf0afa3aa9102d6a521a8e4c41888bb79882 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 12:51:56 +0000 Subject: bpf: Fix kernel address leakage in atomic fetch The change in commit 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") around check_mem_access() handling is buggy since this would allow for unprivileged users to leak kernel pointers. For example, an atomic fetch/and with -1 on a stack destination which holds a spilled pointer will migrate the spilled register type into a scalar, which can then be exported out of the program (since scalar != pointer) by dumping it into a map value. The original implementation of XADD was preventing this situation by using a double call to check_mem_access() one with BPF_READ and a subsequent one with BPF_WRITE, in both cases passing -1 as a placeholder value instead of register as per XADD semantics since it didn't contain a value fetch. The BPF_READ also included a check in check_stack_read_fixed_off() which rejects the program if the stack slot is of __is_pointer_value() if dst_regno < 0. The latter is to distinguish whether we're dealing with a regular stack spill/ fill or some arithmetical operation which is disallowed on non-scalars, see also 6e7e63cbb023 ("bpf: Forbid XADD on spilled pointers for unprivileged users") for more context on check_mem_access() and its handling of placeholder value -1. One minimally intrusive option to fix the leak is for the BPF_FETCH case to initially check the BPF_READ case via check_mem_access() with -1 as register, followed by the actual load case with non-negative load_reg to propagate stack bounds to registers. Fixes: 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") Reported-by: Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2f1ed34cfe9..53d39db3b0fa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4584,13 +4584,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i load_reg = -1; } - /* check whether we can read the memory */ + /* Check whether we can read the memory, with second call for fetch + * case to simulate the register fill. + */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, load_reg, true); + BPF_SIZE(insn->code), BPF_READ, -1, true); + if (!err && load_reg >= 0) + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, load_reg, + true); if (err) return err; - /* check whether we can write into the same memory */ + /* Check whether we can write into the same memory. */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true); if (err) -- cgit From 180486b430f4e22cc00a478163d942804baae4b5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 10:07:04 +0000 Subject: bpf, selftests: Add test case for atomic fetch on spilled pointer Test whether unprivileged would be able to leak the spilled pointer either by exporting the returned value from the atomic{32,64} operation or by reading and exporting the value from the stack after the atomic operation took place. Note that for unprivileged, the below atomic cmpxchg test case named "Dest pointer in r0 - succeed" is failing. The reason is that in the dst memory location (r10 -8) there is the spilled register r10: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (bf) r0 = r10 1: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (7b) *(u64 *)(r10 -8) = r0 2: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 fp-8_w=fp 2: (b7) r1 = 0 3: R0_w=fp0 R1_w=invP0 R10=fp0 fp-8_w=fp 3: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r1) 4: R0_w=fp0 R1_w=invP0 R10=fp0 fp-8_w=mmmmmmmm 4: (79) r1 = *(u64 *)(r0 -8) 5: R0_w=fp0 R1_w=invP(id=0) R10=fp0 fp-8_w=mmmmmmmm 5: (b7) r0 = 0 6: R0_w=invP0 R1_w=invP(id=0) R10=fp0 fp-8_w=mmmmmmmm 6: (95) exit However, allowing this case for unprivileged is a bit useless given an update with a new pointer will fail anyway: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (bf) r0 = r10 1: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (7b) *(u64 *)(r10 -8) = r0 2: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 fp-8_w=fp 2: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r10) R10 leaks addr into mem Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 23 ++++++ .../testing/selftests/bpf/verifier/atomic_fetch.c | 94 ++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index c22dc83a41fd..0ffc69f602af 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -156,4 +156,27 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "Dest pointer in r0 - succeed, check 2", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R5 leaks addr into mem", }, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c index 3bc9ff7a860b..5bf03fb4fa2b 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_fetch.c +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c @@ -1,3 +1,97 @@ +{ + "atomic dw/fetch and address leakage of (map ptr & -1) via stack slot", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "atomic dw/fetch and address leakage of (map ptr & -1) via returned value", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "atomic w/fetch and address leakage of (map ptr & -1) via stack slot", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = REJECT, + .errstr = "invalid size of register fill", +}, +{ + "atomic w/fetch and address leakage of (map ptr & -1) via returned value", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = REJECT, + .errstr = "invalid size of register fill", +}, #define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \ { \ "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \ -- cgit From a82fe085f344ef20b452cd5f481010ff96b5c4cd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 11:02:02 +0000 Subject: bpf: Fix kernel address leakage in atomic cmpxchg's r0 aux reg The implementation of BPF_CMPXCHG on a high level has the following parameters: .-[old-val] .-[new-val] BPF_R0 = cmpxchg{32,64}(DST_REG + insn->off, BPF_R0, SRC_REG) `-[mem-loc] `-[old-val] Given a BPF insn can only have two registers (dst, src), the R0 is fixed and used as an auxilliary register for input (old value) as well as output (returning old value from memory location). While the verifier performs a number of safety checks, it misses to reject unprivileged programs where R0 contains a pointer as old value. Through brute-forcing it takes about ~16sec on my machine to leak a kernel pointer with BPF_CMPXCHG. The PoC is basically probing for kernel addresses by storing the guessed address into the map slot as a scalar, and using the map value pointer as R0 while SRC_REG has a canary value to detect a matching address. Fix it by checking R0 for pointers, and reject if that's the case for unprivileged programs. Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg") Reported-by: Ryota Shiga (Flatt Security) Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 53d39db3b0fa..2d48159b58bd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4547,9 +4547,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (insn->imm == BPF_CMPXCHG) { /* Check comparison of R0 with memory location */ - err = check_reg_arg(env, BPF_REG_0, SRC_OP); + const u32 aux_reg = BPF_REG_0; + + err = check_reg_arg(env, aux_reg, SRC_OP); if (err) return err; + + if (is_pointer_value(env, aux_reg)) { + verbose(env, "R%d leaks addr into mem\n", aux_reg); + return -EACCES; + } } if (is_pointer_value(env, insn->src_reg)) { -- cgit From e523102cb719cbad1673b6aa2a4d5c1fa6f13799 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 13 Dec 2021 22:25:23 +0000 Subject: bpf, selftests: Update test case for atomic cmpxchg on r0 with pointer Fix up unprivileged test case results for 'Dest pointer in r0' verifier tests given they now need to reject R0 containing a pointer value, and add a couple of new related ones with 32bit cmpxchg as well. root@foo:~/bpf/tools/testing/selftests/bpf# ./test_verifier #0/u invalid and of negative number OK #0/p invalid and of negative number OK [...] #1268/p XDP pkt read, pkt_meta' <= pkt_data, bad access 1 OK #1269/p XDP pkt read, pkt_meta' <= pkt_data, bad access 2 OK #1270/p XDP pkt read, pkt_data <= pkt_meta', good access OK #1271/p XDP pkt read, pkt_data <= pkt_meta', bad access 1 OK #1272/p XDP pkt read, pkt_data <= pkt_meta', bad access 2 OK Summary: 1900 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 67 +++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 0ffc69f602af..b39665f33524 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -138,6 +138,8 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr into mem", }, { "Dest pointer in r0 - succeed", @@ -157,7 +159,7 @@ }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "leaking pointer from stack off -8", + .errstr_unpriv = "R0 leaks addr into mem", }, { "Dest pointer in r0 - succeed, check 2", @@ -178,5 +180,66 @@ }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R5 leaks addr into mem", + .errstr_unpriv = "R0 leaks addr into mem", +}, +{ + "Dest pointer in r0 - succeed, check 3", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid size of register fill", + .errstr_unpriv = "R0 leaks addr into mem", +}, +{ + "Dest pointer in r0 - succeed, check 4", + .insns = { + /* r0 = &val */ + BPF_MOV32_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV32_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r10 */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R10 partial copy of pointer", +}, +{ + "Dest pointer in r0 - succeed, check 5", + .insns = { + /* r0 = &val */ + BPF_MOV32_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV32_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 invalid mem access", + .errstr_unpriv = "R10 partial copy of pointer", }, -- cgit From 584af82154f56e6b2740160fcc84a2966d969e15 Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Tue, 31 Aug 2021 13:16:35 +0200 Subject: igb: Fix removal of unicast MAC filters of VFs Move checking condition of VF MAC filter before clearing or adding MAC filter to VF to prevent potential blackout caused by removal of necessary and working VF's MAC filter. Fixes: 1b8b062a99dc ("igb: add VF trust infrastructure") Signed-off-by: Karen Sornek Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd54d3ef890b..b597b8bfb910 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7648,6 +7648,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, struct vf_mac_filter *entry = NULL; int ret = 0; + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d requested MAC filter but is administratively denied\n", + vf); + return -EINVAL; + } + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC filter\n", + vf); + return -EINVAL; + } + switch (info) { case E1000_VF_MAC_FILTER_CLR: /* remove all unicast MAC filters related to the current VF */ @@ -7661,20 +7675,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: - if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && - !vf_data->trusted) { - dev_warn(&pdev->dev, - "VF %d requested MAC filter but is administratively denied\n", - vf); - return -EINVAL; - } - if (!is_valid_ether_addr(addr)) { - dev_warn(&pdev->dev, - "VF %d attempted to set invalid MAC filter\n", - vf); - return -EINVAL; - } - /* try to find empty slot in the list */ list_for_each(pos, &adapter->vf_macs.l) { entry = list_entry(pos, struct vf_mac_filter, l); -- cgit From b6d335a60dc624c0d279333b22c737faa765b028 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Sat, 13 Nov 2021 11:42:34 +0800 Subject: igbvf: fix double free in `igbvf_probe` In `igbvf_probe`, if register_netdev() fails, the program will go to label err_hw_init, and then to label err_ioremap. In free_netdev() which is just below label err_ioremap, there is `list_for_each_entry_safe` and `netif_napi_del` which aims to delete all entries in `dev->napi_list`. The program has added an entry `adapter->rx_ring->napi` which is added by `netif_napi_add` in igbvf_alloc_queues(). However, adapter->rx_ring has been freed below label err_hw_init. So this a UAF. In terms of how to patch the problem, we can refer to igbvf_remove() and delete the entry before `adapter->rx_ring`. The KASAN logs are as follows: [ 35.126075] BUG: KASAN: use-after-free in free_netdev+0x1fd/0x450 [ 35.127170] Read of size 8 at addr ffff88810126d990 by task modprobe/366 [ 35.128360] [ 35.128643] CPU: 1 PID: 366 Comm: modprobe Not tainted 5.15.0-rc2+ #14 [ 35.129789] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 35.131749] Call Trace: [ 35.132199] dump_stack_lvl+0x59/0x7b [ 35.132865] print_address_description+0x7c/0x3b0 [ 35.133707] ? free_netdev+0x1fd/0x450 [ 35.134378] __kasan_report+0x160/0x1c0 [ 35.135063] ? free_netdev+0x1fd/0x450 [ 35.135738] kasan_report+0x4b/0x70 [ 35.136367] free_netdev+0x1fd/0x450 [ 35.137006] igbvf_probe+0x121d/0x1a10 [igbvf] [ 35.137808] ? igbvf_vlan_rx_add_vid+0x100/0x100 [igbvf] [ 35.138751] local_pci_probe+0x13c/0x1f0 [ 35.139461] pci_device_probe+0x37e/0x6c0 [ 35.165526] [ 35.165806] Allocated by task 366: [ 35.166414] ____kasan_kmalloc+0xc4/0xf0 [ 35.167117] foo_kmem_cache_alloc_trace+0x3c/0x50 [igbvf] [ 35.168078] igbvf_probe+0x9c5/0x1a10 [igbvf] [ 35.168866] local_pci_probe+0x13c/0x1f0 [ 35.169565] pci_device_probe+0x37e/0x6c0 [ 35.179713] [ 35.179993] Freed by task 366: [ 35.180539] kasan_set_track+0x4c/0x80 [ 35.181211] kasan_set_free_info+0x1f/0x40 [ 35.181942] ____kasan_slab_free+0x103/0x140 [ 35.182703] kfree+0xe3/0x250 [ 35.183239] igbvf_probe+0x1173/0x1a10 [igbvf] [ 35.184040] local_pci_probe+0x13c/0x1f0 Fixes: d4e0fe01a38a0 (igbvf: add new driver to support 82576 virtual functions) Reported-by: Zheyu Ma Signed-off-by: Letu Ren Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 74ccd622251a..4d988da68394 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2859,6 +2859,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_hw_init: + netif_napi_del(&adapter->rx_ring->napi); kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: -- cgit From 0182d1f3fa640888a2ed7e3f6df2fdb10adee7c8 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 2 Nov 2021 09:20:06 +0200 Subject: igc: Fix typo in i225 LTR functions The LTR maximum value was incorrectly written using the scale from the LTR minimum value. This would cause incorrect values to be sent, in cases where the initial calculation lead to different min/max scales. Fixes: 707abf069548 ("igc: Add initial LTR support") Suggested-by: Dima Ruinskiy Signed-off-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_i225.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index b2ef9fde97b3..b6807e16eea9 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -636,7 +636,7 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) ltrv = rd32(IGC_LTRMAXV); if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) { ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max | - (scale_min << IGC_LTRMAXV_SCALE_SHIFT); + (scale_max << IGC_LTRMAXV_SCALE_SHIFT); wr32(IGC_LTRMAXV, ltrv); } } -- cgit From 271225fd57c2f1e0b3f8826df51be6c634affefe Mon Sep 17 00:00:00 2001 From: Robert Schlabbach Date: Tue, 26 Oct 2021 02:24:48 +0200 Subject: ixgbe: Document how to enable NBASE-T support Commit a296d665eae1 ("ixgbe: Add ethtool support to enable 2.5 and 5.0 Gbps support") introduced suppression of the advertisement of NBASE-T speeds by default, according to Todd Fujinaka to accommodate customers with network switches which could not cope with advertised NBASE-T speeds, as posted in the E1000-devel mailing list: https://sourceforge.net/p/e1000/mailman/message/37106269/ However, the suppression was not documented at all, nor was how to enable NBASE-T support. Properly document the NBASE-T suppression and how to enable NBASE-T support. Fixes: a296d665eae1 ("ixgbe: Add ethtool support to enable 2.5 and 5.0 Gbps support") Reported-by: Robert Schlabbach Signed-off-by: Robert Schlabbach Signed-off-by: Tony Nguyen --- .../networking/device_drivers/ethernet/intel/ixgbe.rst | 16 ++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst index f1d5233e5e51..0a233b17c664 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst @@ -440,6 +440,22 @@ NOTE: For 82599-based network connections, if you are enabling jumbo frames in a virtual function (VF), jumbo frames must first be enabled in the physical function (PF). The VF MTU setting cannot be larger than the PF MTU. +NBASE-T Support +--------------- +The ixgbe driver supports NBASE-T on some devices. However, the advertisement +of NBASE-T speeds is suppressed by default, to accommodate broken network +switches which cannot cope with advertised NBASE-T speeds. Use the ethtool +command to enable advertising NBASE-T speeds on devices which support it:: + + ethtool -s eth? advertise 0x1800000001028 + +On Linux systems with INTERFACES(5), this can be specified as a pre-up command +in /etc/network/interfaces so that the interface is always brought up with +NBASE-T support, e.g.:: + + iface eth? inet dhcp + pre-up ethtool -s eth? advertise 0x1800000001028 || true + Generic Receive Offload, aka GRO -------------------------------- The driver supports the in-kernel software implementation of GRO. GRO has diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0f9f022260d7..45e2ec4d264d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5531,6 +5531,10 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) if (!speed && hw->mac.ops.get_link_capabilities) { ret = hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg); + /* remove NBASE-T speeds from default autonegotiation + * to accommodate broken network switches in the field + * which cannot cope with advertised NBASE-T speeds + */ speed &= ~(IXGBE_LINK_SPEED_5GB_FULL | IXGBE_LINK_SPEED_2_5GB_FULL); } -- cgit From bf0a375055bd1afbbf02a0ef45f7655da7b71317 Mon Sep 17 00:00:00 2001 From: Cyril Novikov Date: Mon, 1 Nov 2021 18:39:36 -0700 Subject: ixgbe: set X550 MDIO speed before talking to PHY The MDIO bus speed must be initialized before talking to the PHY the first time in order to avoid talking to it using a speed that the PHY doesn't support. This fixes HW initialization error -17 (IXGBE_ERR_PHY_ADDR_INVALID) on Denverton CPUs (a.k.a. the Atom C3000 family) on ports with a 10Gb network plugged in. On those devices, HLREG0[MDCSPD] resets to 1, which combined with the 10Gb network results in a 24MHz MDIO speed, which is apparently too fast for the connected PHY. PHY register reads over MDIO bus return garbage, leading to initialization failure. Reproduced with Linux kernel 4.19 and 5.15-rc7. Can be reproduced using the following setup: * Use an Atom C3000 family system with at least one X552 LAN on the SoC * Disable PXE or other BIOS network initialization if possible (the interface must not be initialized before Linux boots) * Connect a live 10Gb Ethernet cable to an X550 port * Power cycle (not reset, doesn't always work) the system and boot Linux * Observe: ixgbe interfaces w/ 10GbE cables plugged in fail with error -17 Fixes: e84db7272798 ("ixgbe: Introduce function to control MDIO speed") Signed-off-by: Cyril Novikov Reviewed-by: Andrew Lunn Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 9724ffb16518..e4b50c7781ff 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3405,6 +3405,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); + /* set MDIO speed before talking to the PHY in case it's the 1st time */ + ixgbe_set_mdio_speed(hw); + /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || -- cgit From 972ce7e3801e790bb348bfe98be1ab65af15bacd Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Wed, 15 Dec 2021 12:58:31 +0200 Subject: dpaa2-eth: fix ethtool statistics Unfortunately, with the blamed commit I also added a side effect in the ethtool stats shown. Because I added two more fields in the per channel structure without verifying if its size is used in any way, part of the ethtool statistics were off by 2. Fix this by not looking up the size of the structure but instead on a fixed value kept in a macro. Fixes: fc398bec0387 ("net: dpaa2: add adaptive interrupt coalescing") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20211215105831.290070-1-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 2 ++ drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 2085844227fe..e54e70ebdd05 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -388,6 +388,8 @@ struct dpaa2_eth_ch_stats { __u64 bytes_per_cdan; }; +#define DPAA2_ETH_CH_STATS 7 + /* Maximum number of queues associated with a DPNI */ #define DPAA2_ETH_MAX_TCS 8 #define DPAA2_ETH_MAX_RX_QUEUES_PER_TC 16 diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index adb8ce5306ee..3fdbf87dccb1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -278,7 +278,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, /* Per-channel stats */ for (k = 0; k < priv->num_channels; k++) { ch_stats = &priv->channel[k]->stats; - for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64) - 1; j++) + for (j = 0; j < DPAA2_ETH_CH_STATS; j++) *((__u64 *)data + i + j) += *((__u64 *)ch_stats + j); } i += j; -- cgit From 481221775d53d6215a6e5e9ce1cce6d2b4ab9a46 Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Wed, 15 Dec 2021 19:15:30 +0800 Subject: netdevsim: Zero-initialize memory for new map's value in function nsim_bpf_map_alloc Zero-initialize memory for new map's value in function nsim_bpf_map_alloc since it may cause a potential kernel information leak issue, as follows: 1. nsim_bpf_map_alloc calls nsim_map_alloc_elem to allocate elements for a new map. 2. nsim_map_alloc_elem uses kmalloc to allocate map's value, but doesn't zero it. 3. A user application can use IOCTL BPF_MAP_LOOKUP_ELEM to get specific element's information in the map. 4. The kernel function map_lookup_elem will call bpf_map_copy_value to get the information allocated at step-2, then use copy_to_user to copy to the user buffer. This can only leak information for an array map. Fixes: 395cacb5f1a0 ("netdevsim: bpf: support fake map offload") Suggested-by: Jakub Kicinski Acked-by: Jakub Kicinski Signed-off-by: Haimin Zhang Link: https://lore.kernel.org/r/20211215111530.72103-1-tcs.kernel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 90aafb56f140..a43820212932 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -514,6 +514,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } -- cgit From ec6af094ea28f0f2dda1a6a33b14cd57e36a9755 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 15 Dec 2021 09:39:37 -0500 Subject: net/packet: rx_owner_map depends on pg_vec Packet sockets may switch ring versions. Avoid misinterpreting state between versions, whose fields share a union. rx_owner_map is only allocated with a packet ring (pg_vec) and both are swapped together. If pg_vec is NULL, meaning no packet ring was allocated, then neither was rx_owner_map. And the field may be old state from a tpacket_v3. Fixes: 61fad6816fc1 ("net/packet: tpacket_rcv: avoid a producer race condition") Reported-by: Syzbot Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211215143937.106178-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 46943a18a10d..76c2dca7f0a5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4492,9 +4492,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: - bitmap_free(rx_owner_map); - if (pg_vec) + if (pg_vec) { + bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); + } out: return err; } -- cgit From ef8a0f6eab1ca5d1a75c242c5c7b9d386735fa0a Mon Sep 17 00:00:00 2001 From: Greg Jesionowski Date: Tue, 14 Dec 2021 15:10:27 -0700 Subject: net: usb: lan78xx: add Allied Telesis AT29M2-AF This adds the vendor and product IDs for the AT29M2-AF which is a lan7801-based device. Signed-off-by: Greg Jesionowski Link: https://lore.kernel.org/r/20211214221027.305784-1-jesionowskigreg@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8cd265fc1fd9..075f8abde5cd 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -76,6 +76,8 @@ #define LAN7801_USB_PRODUCT_ID (0x7801) #define LAN78XX_EEPROM_MAGIC (0x78A5) #define LAN78XX_OTP_MAGIC (0x78F3) +#define AT29M2AF_USB_VENDOR_ID (0x07C9) +#define AT29M2AF_USB_PRODUCT_ID (0x0012) #define MII_READ 1 #define MII_WRITE 0 @@ -4734,6 +4736,10 @@ static const struct usb_device_id products[] = { /* LAN7801 USB Gigabit Ethernet Device */ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID), }, + { + /* ATM2-AF USB Gigabit Ethernet Device */ + USB_DEVICE(AT29M2AF_USB_VENDOR_ID, AT29M2AF_USB_PRODUCT_ID), + }, {}, }; MODULE_DEVICE_TABLE(usb, products); -- cgit From 0546b224cc7717cc8a2db076b0bb069a9c430794 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 14 Dec 2021 19:10:09 +0000 Subject: net: stmmac: dwmac-rk: fix oob read in rk_gmac_setup KASAN reports an out-of-bounds read in rk_gmac_setup on the line: while (ops->regs[i]) { This happens for most platforms since the regs flexible array member is empty, so the memory after the ops structure is being read here. It seems that mostly this happens to contain zero anyway, so we get lucky and everything still works. To avoid adding redundant data to nearly all the ops structures, add a new flag to indicate whether the regs field is valid and avoid this loop when it is not. Fixes: 3bb3d6b1c195 ("net: stmmac: Add RK3566/RK3568 SoC support") Signed-off-by: John Keeping Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6924a6aacbd5..c469abc91fa1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -33,6 +33,7 @@ struct rk_gmac_ops { void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed); void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed); void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv); + bool regs_valid; u32 regs[]; }; @@ -1092,6 +1093,7 @@ static const struct rk_gmac_ops rk3568_ops = { .set_to_rmii = rk3568_set_to_rmii, .set_rgmii_speed = rk3568_set_gmac_speed, .set_rmii_speed = rk3568_set_gmac_speed, + .regs_valid = true, .regs = { 0xfe2a0000, /* gmac0 */ 0xfe010000, /* gmac1 */ @@ -1383,7 +1385,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, * to be distinguished. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res) { + if (res && ops->regs_valid) { int i = 0; while (ops->regs[i]) { -- cgit From 407ecd1bd726f240123f704620d46e285ff30dd9 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 15 Dec 2021 22:37:31 +0800 Subject: sfc_ef100: potential dereference of null pointer The return value of kmalloc() needs to be checked. To avoid use in efx_nic_update_stats() in case of the failure of alloc. Fixes: b593b6f1b492 ("sfc_ef100: statistics gathering") Signed-off-by: Jiasheng Jiang Reported-by: kernel test robot Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 6aa81229b68a..e77a5cb4e40d 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -609,6 +609,9 @@ static size_t ef100_update_stats(struct efx_nic *efx, ef100_common_stat_mask(mask); ef100_ethtool_stat_mask(mask); + if (!mc_stats) + return 0; + efx_nic_copy_stats(efx, mc_stats); efx_nic_update_stats(ef100_stat_desc, EF100_STAT_COUNT, mask, stats, mc_stats, false); -- cgit From e08cdf63049b711099efff0811273449083bb958 Mon Sep 17 00:00:00 2001 From: Andrey Eremeev Date: Wed, 15 Dec 2021 20:30:32 +0300 Subject: dsa: mv88e6xxx: fix debug print for SPEED_UNFORCED Debug print uses invalid check to detect if speed is unforced: (speed != SPEED_UNFORCED) should be used instead of (!speed). Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Andrey Eremeev Fixes: 96a2b40c7bd3 ("net: dsa: mv88e6xxx: add port's MAC speed setter") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index d9817b20ea64..ab41619a809b 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -283,7 +283,7 @@ static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip, if (err) return err; - if (speed) + if (speed != SPEED_UNFORCED) dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); @@ -516,7 +516,7 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, if (err) return err; - if (speed) + if (speed != SPEED_UNFORCED) dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); -- cgit From 053c9e18c6f9cf82242ef35ac21cae1842725714 Mon Sep 17 00:00:00 2001 From: Wenliang Wang Date: Thu, 16 Dec 2021 11:11:35 +0800 Subject: virtio_net: fix rx_drops stat for small pkts We found the stat of rx drops for small pkts does not increment when build_skb fail, it's not coherent with other mode's rx drops stat. Signed-off-by: Wenliang Wang Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 55db6a336f7e..b107835242ad 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -733,7 +733,7 @@ static struct sk_buff *receive_small(struct net_device *dev, pr_debug("%s: rx error: len %u exceeds max size %d\n", dev->name, len, GOOD_PACKET_LEN); dev->stats.rx_length_errors++; - goto err_len; + goto err; } if (likely(!vi->xdp_enabled)) { @@ -825,10 +825,8 @@ static struct sk_buff *receive_small(struct net_device *dev, skip_xdp: skb = build_skb(buf, buflen); - if (!skb) { - put_page(page); + if (!skb) goto err; - } skb_reserve(skb, headroom - delta); skb_put(skb, len); if (!xdp_prog) { @@ -839,13 +837,12 @@ skip_xdp: if (metasize) skb_metadata_set(skb, metasize); -err: return skb; err_xdp: rcu_read_unlock(); stats->xdp_drops++; -err_len: +err: stats->drops++; put_page(page); xdp_xmit: -- cgit From 8a03ef676ade55182f9b05115763aeda6dc08159 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 16 Dec 2021 11:28:25 +0200 Subject: net: Fix double 0x prefix print in SKB dump When printing netdev features %pNF already takes care of the 0x prefix, remove the explicit one. Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data") Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ba2f38246f07..909db87d7383 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -832,7 +832,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) - printk("%sdev name=%s feat=0x%pNF\n", + printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", -- cgit From 5c15b3123f65f8fbb1b445d9a7e8812e0e435df2 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 15 Dec 2021 20:29:21 +0800 Subject: net/smc: Prevent smc_release() from long blocking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In nginx/wrk benchmark, there's a hung problem with high probability on case likes that: (client will last several minutes to exit) server: smc_run nginx client: smc_run wrk -c 10000 -t 1 http://server Client hangs with the following backtrace: 0 [ffffa7ce8Of3bbf8] __schedule at ffffffff9f9eOd5f 1 [ffffa7ce8Of3bc88] schedule at ffffffff9f9eløe6 2 [ffffa7ce8Of3bcaO] schedule_timeout at ffffffff9f9e3f3c 3 [ffffa7ce8Of3bd2O] wait_for_common at ffffffff9f9el9de 4 [ffffa7ce8Of3bd8O] __flush_work at ffffffff9fOfeOl3 5 [ffffa7ce8øf3bdfO] smc_release at ffffffffcO697d24 [smc] 6 [ffffa7ce8Of3be2O] __sock_release at ffffffff9f8O2e2d 7 [ffffa7ce8Of3be4ø] sock_close at ffffffff9f8ø2ebl 8 [ffffa7ce8øf3be48] __fput at ffffffff9f334f93 9 [ffffa7ce8Of3be78] task_work_run at ffffffff9flOlff5 10 [ffffa7ce8Of3beaO] do_exit at ffffffff9fOe5Ol2 11 [ffffa7ce8Of3bflO] do_group_exit at ffffffff9fOe592a 12 [ffffa7ce8Of3bf38] __x64_sys_exit_group at ffffffff9fOe5994 13 [ffffa7ce8Of3bf4O] do_syscall_64 at ffffffff9f9d4373 14 [ffffa7ce8Of3bfsO] entry_SYSCALL_64_after_hwframe at ffffffff9fa0007c This issue dues to flush_work(), which is used to wait for smc_connect_work() to finish in smc_release(). Once lots of smc_connect_work() was pending or all executing work dangling, smc_release() has to block until one worker comes to free, which is equivalent to wait another smc_connnect_work() to finish. In order to fix this, There are two changes: 1. For those idle smc_connect_work(), cancel it from the workqueue; for executing smc_connect_work(), waiting for it to finish. For that purpose, replace flush_work() with cancel_work_sync(). 2. Since smc_connect() hold a reference for passive closing, if smc_connect_work() has been cancelled, release the reference. Fixes: 24ac3a08e658 ("net/smc: rebuild nonblocking connect") Reported-by: Tony Lu Tested-by: Dust Li Reviewed-by: Dust Li Reviewed-by: Tony Lu Signed-off-by: D. Wythe Acked-by: Karsten Graul Link: https://lore.kernel.org/r/1639571361-101128-1-git-send-email-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 230072f9ec48..1c9289f56dc4 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -194,7 +194,9 @@ static int smc_release(struct socket *sock) /* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires -- cgit From 8b8e6e782456f1ce02a7ae914bbd5b1053f0b034 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 Dec 2021 12:24:49 -0800 Subject: net: systemport: Add global locking for descriptor lifecycle The descriptor list is a shared resource across all of the transmit queues, and the locking mechanism used today only protects concurrency across a given transmit queue between the transmit and reclaiming. This creates an opportunity for the SYSTEMPORT hardware to work on corrupted descriptors if we have multiple producers at once which is the case when using multiple transmit queues. This was particularly noticeable when using multiple flows/transmit queues and it showed up in interesting ways in that UDP packets would get a correct UDP header checksum being calculated over an incorrect packet length. Similarly TCP packets would get an equally correct checksum computed by the hardware over an incorrect packet length. The SYSTEMPORT hardware maintains an internal descriptor list that it re-arranges when the driver produces a new descriptor anytime it writes to the WRITE_PORT_{HI,LO} registers, there is however some delay in the hardware to re-organize its descriptors and it is possible that concurrent TX queues eventually break this internal allocation scheme to the point where the length/status part of the descriptor gets used for an incorrect data buffer. The fix is to impose a global serialization for all TX queues in the short section where we are writing to the WRITE_PORT_{HI,LO} registers which solves the corruption even with multiple concurrent TX queues being used. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20211215202450.4086240-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++++- drivers/net/ethernet/broadcom/bcmsysport.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 40933bf5a710..60dde29974bf 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1309,11 +1309,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, struct bcm_sysport_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; struct bcm_sysport_tx_ring *ring; + unsigned long flags, desc_flags; struct bcm_sysport_cb *cb; struct netdev_queue *txq; u32 len_status, addr_lo; unsigned int skb_len; - unsigned long flags; dma_addr_t mapping; u16 queue; int ret; @@ -1373,8 +1373,10 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, ring->desc_count--; /* Ports are latched, so write upper address first */ + spin_lock_irqsave(&priv->desc_lock, desc_flags); tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index)); tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index)); + spin_unlock_irqrestore(&priv->desc_lock, desc_flags); /* Check ring space and update SW control flow */ if (ring->desc_count == 0) @@ -2013,6 +2015,7 @@ static int bcm_sysport_open(struct net_device *dev) } /* Initialize both hardware and software ring */ + spin_lock_init(&priv->desc_lock); for (i = 0; i < dev->num_tx_queues; i++) { ret = bcm_sysport_init_tx_ring(priv, i); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 984f76e74b43..16b73bb9acc7 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -711,6 +711,7 @@ struct bcm_sysport_priv { int wol_irq; /* Transmit rings */ + spinlock_t desc_lock; struct bcm_sysport_tx_ring *tx_rings; /* Receive queue */ -- cgit From e28587cc491ef0f3c51258fdc87fbc386b1d4c59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Dec 2021 03:17:41 -0800 Subject: sit: do not call ipip6_dev_free() from sit_init_net() ipip6_dev_free is sit dev->priv_destructor, already called by register_netdevice() if something goes wrong. Alternative would be to make ipip6_dev_free() robust against multiple invocations, but other drivers do not implement this strategy. syzbot reported: dst_release underflow WARNING: CPU: 0 PID: 5059 at net/core/dst.c:173 dst_release+0xd8/0xe0 net/core/dst.c:173 Modules linked in: CPU: 1 PID: 5059 Comm: syz-executor.4 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:dst_release+0xd8/0xe0 net/core/dst.c:173 Code: 4c 89 f2 89 d9 31 c0 5b 41 5e 5d e9 da d5 44 f9 e8 1d 90 5f f9 c6 05 87 48 c6 05 01 48 c7 c7 80 44 99 8b 31 c0 e8 e8 67 29 f9 <0f> 0b eb 85 0f 1f 40 00 53 48 89 fb e8 f7 8f 5f f9 48 83 c3 a8 48 RSP: 0018:ffffc9000aa5faa0 EFLAGS: 00010246 RAX: d6894a925dd15a00 RBX: 00000000ffffffff RCX: 0000000000040000 RDX: ffffc90005e19000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 0000000000000000 R08: ffffffff816a1f42 R09: ffffed1017344f2c R10: ffffed1017344f2c R11: 0000000000000000 R12: 0000607f462b1358 R13: 1ffffffff1bfd305 R14: ffffe8ffffcb1358 R15: dffffc0000000000 FS: 00007f66c71a2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f88aaed5058 CR3: 0000000023e0f000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dst_cache_destroy+0x107/0x1e0 net/core/dst_cache.c:160 ipip6_dev_free net/ipv6/sit.c:1414 [inline] sit_init_net+0x229/0x550 net/ipv6/sit.c:1936 ops_init+0x313/0x430 net/core/net_namespace.c:140 setup_net+0x35b/0x9d0 net/core/net_namespace.c:326 copy_net_ns+0x359/0x5c0 net/core/net_namespace.c:470 create_new_namespaces+0x4ce/0xa00 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0x11e/0x180 kernel/nsproxy.c:226 ksys_unshare+0x57d/0xb50 kernel/fork.c:3075 __do_sys_unshare kernel/fork.c:3146 [inline] __se_sys_unshare kernel/fork.c:3144 [inline] __x64_sys_unshare+0x34/0x40 kernel/fork.c:3144 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f66c882ce99 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f66c71a2168 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f66c893ff60 RCX: 00007f66c882ce99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000048040200 RBP: 00007f66c8886ff1 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff6634832f R14: 00007f66c71a2300 R15: 0000000000022000 Fixes: cf124db566e6 ("net: Fix inconsistent teardown and release of private netdev state.") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211216111741.1387540-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b57ee36d668..8a3618a30632 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1933,7 +1933,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: - ipip6_dev_free(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; -- cgit From 3cf2b61eb06765e27fec6799292d9fb46d0b7e60 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 22:02:19 +0000 Subject: bpf: Fix signed bounds propagation after mov32 For the case where both s32_{min,max}_value bounds are positive, the __reg_assign_32_into_64() directly propagates them to their 64 bit counterparts, otherwise it pessimises them into [0,u32_max] universe and tries to refine them later on by learning through the tnum as per comment in mentioned function. However, that does not always happen, for example, in mov32 operation we call zext_32_to_64(dst_reg) which invokes the __reg_assign_32_into_64() as is without subsequent bounds update as elsewhere thus no refinement based on tnum takes place. Thus, not calling into the __update_reg_bounds() / __reg_deduce_bounds() / __reg_bound_offset() triplet as we do, for example, in case of ALU ops via adjust_scalar_min_max_vals(), will lead to more pessimistic bounds when dumping the full register state: Before fix: 0: (b4) w0 = -1 1: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) 1: (bc) w0 = w0 2: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=0,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) Technically, the smin_value=0 and smax_value=4294967295 bounds are not incorrect, but given the register is still a constant, they break assumptions about const scalars that smin_value == smax_value and umin_value == umax_value. After fix: 0: (b4) w0 = -1 1: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) 1: (bc) w0 = w0 2: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) Without the smin_value == smax_value and umin_value == umax_value invariant being intact for const scalars, it is possible to leak out kernel pointers from unprivileged user space if the latter is enabled. For example, when such registers are involved in pointer arithmtics, then adjust_ptr_min_max_vals() will taint the destination register into an unknown scalar, and the latter can be exported and stored e.g. into a BPF map value. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Kuee K1r0a Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2d48159b58bd..0872b6c9fb33 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8317,6 +8317,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); + + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); } } else { /* case: R = imm -- cgit From e572ff80f05c33cd0cb4860f864f5c9c044280b6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 22:28:48 +0000 Subject: bpf: Make 32->64 bounds propagation slightly more robust Make the bounds propagation in __reg_assign_32_into_64() slightly more robust and readable by aligning it similarly as we did back in the __reg_combine_64_into_32() counterpart. Meaning, only propagate or pessimize them as a smin/smax pair. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0872b6c9fb33..b532f1058d35 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1366,22 +1366,28 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static bool __reg32_bound_s64(s32 a) +{ + return a >= 0 && a <= S32_MAX; +} + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { reg->umin_value = reg->u32_min_value; reg->umax_value = reg->u32_max_value; - /* Attempt to pull 32-bit signed bounds into 64-bit bounds - * but must be positive otherwise set to worse case bounds - * and refine later from tnum. + + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must + * be positive otherwise set to worse case bounds and refine later + * from tnum. */ - if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) - reg->smax_value = reg->s32_max_value; - else - reg->smax_value = U32_MAX; - if (reg->s32_min_value >= 0) + if (__reg32_bound_s64(reg->s32_min_value) && + __reg32_bound_s64(reg->s32_max_value)) { reg->smin_value = reg->s32_min_value; - else + reg->smax_value = reg->s32_max_value; + } else { reg->smin_value = 0; + reg->smax_value = U32_MAX; + } } static void __reg_combine_32_into_64(struct bpf_reg_state *reg) -- cgit From b1a7288dedc6caf9023f2676b4f5ed34cf0d4029 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 23:48:54 +0000 Subject: bpf, selftests: Add test case trying to taint map value pointer Add a test case which tries to taint map value pointer arithmetic into a unknown scalar with subsequent export through the map. Before fix: # ./test_verifier 1186 #1186/u map access: trying to leak tained dst reg FAIL Unexpected success to load! verification time 24 usec stack depth 8 processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 #1186/p map access: trying to leak tained dst reg FAIL Unexpected success to load! verification time 8 usec stack depth 8 processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 Summary: 0 PASSED, 0 SKIPPED, 2 FAILED After fix: # ./test_verifier 1186 #1186/u map access: trying to leak tained dst reg OK #1186/p map access: trying to leak tained dst reg OK Summary: 2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- .../selftests/bpf/verifier/value_ptr_arith.c | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 2debba4e8a3a..4d347bc53aa2 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1077,6 +1077,29 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "map access: trying to leak tained dst reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_1, 0xFFFFFFFF), + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 4 }, + .result = REJECT, + .errstr = "math between map_value pointer and 4294967295 is not allowed", +}, { "32bit pkt_ptr -= scalar", .insns = { -- cgit From 433956e91200734d09958673a56df02d00a917c2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Dec 2021 18:38:30 -0800 Subject: bpf: Fix extable fixup offset. The prog - start_of_ldx is the offset before the faulting ldx to the location after it, so this will be used to adjust pt_regs->ip for jumping over it and continuing, and with old temp it would have been fixed up to the wrong offset, causing crash. Fixes: 4c5de127598e ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.") Signed-off-by: Alexei Starovoitov Reviewed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 726700fabca6..fa58681db45e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1305,7 +1305,7 @@ st: if (is_imm8(insn->off)) * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" * of 4 bytes will be ignored and rbx will be zero inited. */ - ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); + ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8); } break; -- cgit From 588a25e92458c6efeb7a261d5ca5726f5de89184 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Dec 2021 19:25:13 -0800 Subject: bpf: Fix extable address check. The verifier checks that PTR_TO_BTF_ID pointer is either valid or NULL, but it cannot distinguish IS_ERR pointer from valid one. When offset is added to IS_ERR pointer it may become small positive value which is a user address that is not handled by extable logic and has to be checked for at the runtime. Tighten BPF_PROBE_MEM pointer check code to prevent this case. Fixes: 4c5de127598e ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.") Reported-by: Lorenzo Fontana Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 49 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fa58681db45e..bafe36e69227 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1252,19 +1252,54 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { - /* test src_reg, src_reg */ - maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */ - EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg)); - /* jne start_of_ldx */ - EMIT2(X86_JNE, 0); + /* Though the verifier prevents negative insn->off in BPF_PROBE_MEM + * add abs(insn->off) to the limit to make sure that negative + * offset won't be an issue. + * insn->off is s16, so it won't affect valid pointers. + */ + u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off); + u8 *end_of_jmp1, *end_of_jmp2; + + /* Conservatively check that src_reg + insn->off is a kernel address: + * 1. src_reg + insn->off >= limit + * 2. src_reg + insn->off doesn't become small positive. + * Cannot do src_reg + insn->off >= limit in one branch, + * since it needs two spare registers, but JIT has only one. + */ + + /* movabsq r11, limit */ + EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); + EMIT((u32)limit, 4); + EMIT(limit >> 32, 4); + /* cmp src_reg, r11 */ + maybe_emit_mod(&prog, src_reg, AUX_REG, true); + EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* if unsigned '<' goto end_of_jmp2 */ + EMIT2(X86_JB, 0); + end_of_jmp1 = prog; + + /* mov r11, src_reg */ + emit_mov_reg(&prog, true, AUX_REG, src_reg); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); + /* jmp if not carry to start_of_ldx + * Otherwise ERR_PTR(-EINVAL) + 128 will be the user addr + * that has to be rejected. + */ + EMIT2(0x73 /* JNC */, 0); + end_of_jmp2 = prog; + /* xor dst_reg, dst_reg */ emit_mov_imm32(&prog, false, dst_reg, 0); /* jmp byte_after_ldx */ EMIT2(0xEB, 0); - /* populate jmp_offset for JNE above */ - temp[4] = prog - temp - 5 /* sizeof(test + jne) */; + /* populate jmp_offset for JB above to jump to xor dst_reg */ + end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1; + /* populate jmp_offset for JNC above to jump to start_of_ldx */ start_of_ldx = prog; + end_of_jmp2[-1] = start_of_ldx - end_of_jmp2; } emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { -- cgit From 7edc3fcbf9a2b2e3df53c9656a9f85bf807affac Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Dec 2021 12:35:34 -0800 Subject: selftest/bpf: Add a test that reads various addresses. Add a function to bpf_testmod that returns invalid kernel and user addresses. Then attach an fexit program to that function that tries to read memory through these addresses. This logic checks that bpf_probe_read_kernel and BPF_PROBE_MEM logic is sane. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- .../testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 20 ++++++++++++++++++++ .../testing/selftests/bpf/progs/test_module_attach.c | 12 ++++++++++++ 2 files changed, 32 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 5d52ea2768df..df3b292a8ffe 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -33,6 +33,22 @@ noinline int bpf_testmod_loop_test(int n) return sum; } +__weak noinline struct file *bpf_testmod_return_ptr(int arg) +{ + static struct file f = {}; + + switch (arg) { + case 1: return (void *)EINVAL; /* user addr */ + case 2: return (void *)0xcafe4a11; /* user addr */ + case 3: return (void *)-EINVAL; /* canonical, but invalid */ + case 4: return (void *)(1ull << 60); /* non-canonical and invalid */ + case 5: return (void *)~(1ull << 30); /* trigger extable */ + case 6: return &f; /* valid addr */ + case 7: return (void *)((long)&f | 1); /* kernel tricks */ + default: return NULL; + } +} + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -43,6 +59,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .off = off, .len = len, }; + int i = 1; + + while (bpf_testmod_return_ptr(i)) + i++; /* This is always true. Use the check to make sure the compiler * doesn't remove bpf_testmod_loop_test. diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index b36857093f71..50ce16d02da7 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -87,6 +87,18 @@ int BPF_PROG(handle_fexit, return 0; } +SEC("fexit/bpf_testmod_return_ptr") +int BPF_PROG(handle_fexit_ret, int arg, struct file *ret) +{ + long buf = 0; + + bpf_probe_read_kernel(&buf, 8, ret); + bpf_probe_read_kernel(&buf, 8, (char *)ret + 256); + *(volatile long long *)ret; + *(volatile int *)&ret->f_mode; + return 0; +} + __u32 fmod_ret_read_sz = 0; SEC("fmod_ret/bpf_testmod_test_read") -- cgit From c2fcbf81c332b42382a0c439bfe2414a241e4f5b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 16 Dec 2021 11:16:30 -0800 Subject: bpf, selftests: Fix racing issue in btf_skc_cls_ingress test The libbpf CI reported occasional failure in btf_skc_cls_ingress: test_syncookie:FAIL:Unexpected syncookie states gen_cookie:80326634 recv_cookie:0 bpf prog error at line 97 "error at line 97" means the bpf prog cannot find the listening socket when the final ack is received. It then skipped processing the syncookie in the final ack which then led to "recv_cookie:0". The problem is the userspace program did not do accept() and went ahead to close(listen_fd) before the kernel (and the bpf prog) had a chance to process the final ack. The fix is to add accept() call so that the userspace will wait for the kernel to finish processing the final ack first before close()-ing everything. Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211216191630.466151-1-kafai@fb.com --- .../selftests/bpf/prog_tests/btf_skc_cls_ingress.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 762f6a9da8b5..664ffc0364f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -90,7 +90,7 @@ static void print_err_line(void) static void test_conn(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -112,6 +112,10 @@ static void test_conn(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port || skel->bss->req_sk_sport != srv_port, "Unexpected sk src port", @@ -134,11 +138,13 @@ done: close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } static void test_syncookie(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -161,6 +167,10 @@ static void test_syncookie(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port, "Unexpected tp src port", "listen_tp_sport:%u expected:%u\n", @@ -188,6 +198,8 @@ done: close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } struct test { -- cgit