From d68f4e43a46ff1f772ff73085f96d44eb4163e9d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Dec 2019 14:18:42 +0100 Subject: mt76: fix LED link time failure The mt76_led_cleanup() function is called unconditionally, which leads to a link error when CONFIG_LEDS is a loadable module or disabled but mt76 is built-in: drivers/net/wireless/mediatek/mt76/mac80211.o: In function `mt76_unregister_device': mac80211.c:(.text+0x2ac): undefined reference to `led_classdev_unregister' Use the same trick that is guarding the registration, using an IS_ENABLED() check for the CONFIG_MT76_LEDS symbol that indicates whether LEDs can be used or not. Fixes: 36f7e2b2bb1d ("mt76: do not use devm API for led classdev") Signed-off-by: Arnd Bergmann Acked-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mac80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index b9f2a401041a..96018fd65779 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -378,7 +378,8 @@ void mt76_unregister_device(struct mt76_dev *dev) { struct ieee80211_hw *hw = dev->hw; - mt76_led_cleanup(dev); + if (IS_ENABLED(CONFIG_MT76_LEDS)) + mt76_led_cleanup(dev); mt76_tx_status_check(dev, NULL, true); ieee80211_unregister_hw(hw); } -- cgit From b43e36d75e8727f78892652a25967a1ffa03d1d1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Nov 2019 07:49:56 +0300 Subject: mt76: Off by one in mt76_calc_rx_airtime() The sband->bitrates[] array has "sband->n_bitrates" elements so this check needs to be >= instead of > or we could read beyond the end of the array. These values come from when we call mt76_register_device(): ret = mt76_register_device(&dev->mt76, true, mt7603_rates, ARRAY_SIZE(mt7603_rates)); Here sband->bitrates[] is mt7603_rates[] and ->n_bitrates is the ARRAY_SIZE() Fixes: 5ce09c1a7907 ("mt76: track rx airtime for airtime fairness and survey") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/airtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/airtime.c b/drivers/net/wireless/mediatek/mt76/airtime.c index 55116f395f9a..a4a785467748 100644 --- a/drivers/net/wireless/mediatek/mt76/airtime.c +++ b/drivers/net/wireless/mediatek/mt76/airtime.c @@ -242,7 +242,7 @@ u32 mt76_calc_rx_airtime(struct mt76_dev *dev, struct mt76_rx_status *status, return 0; sband = dev->hw->wiphy->bands[status->band]; - if (!sband || status->rate_idx > sband->n_bitrates) + if (!sband || status->rate_idx >= sband->n_bitrates) return 0; rate = &sband->bitrates[status->rate_idx]; -- cgit From e5e884b42639c74b5b57dc277909915c0aefc8bb Mon Sep 17 00:00:00 2001 From: Wen Huang Date: Thu, 28 Nov 2019 18:51:04 +0800 Subject: libertas: Fix two buffer overflows at parsing bss descriptor add_ie_rates() copys rates without checking the length in bss descriptor from remote AP.when victim connects to remote attacker, this may trigger buffer overflow. lbs_ibss_join_existing() copys rates without checking the length in bss descriptor from remote IBSS node.when victim connects to remote attacker, this may trigger buffer overflow. Fix them by putting the length check before performing copy. This fix addresses CVE-2019-14896 and CVE-2019-14897. This also fix build warning of mixed declarations and code. Reported-by: kbuild test robot Signed-off-by: Wen Huang Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/libertas/cfg.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 57edfada0665..c9401c121a14 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -273,6 +273,10 @@ add_ie_rates(u8 *tlv, const u8 *ie, int *nrates) int hw, ap, ap_max = ie[1]; u8 hw_rate; + if (ap_max > MAX_RATES) { + lbs_deb_assoc("invalid rates\n"); + return tlv; + } /* Advance past IE header */ ie += 2; @@ -1717,6 +1721,9 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, struct cmd_ds_802_11_ad_hoc_join cmd; u8 preamble = RADIO_PREAMBLE_SHORT; int ret = 0; + int hw, i; + u8 rates_max; + u8 *rates; /* TODO: set preamble based on scan result */ ret = lbs_set_radio(priv, preamble, 1); @@ -1775,9 +1782,12 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, if (!rates_eid) { lbs_add_rates(cmd.bss.rates); } else { - int hw, i; - u8 rates_max = rates_eid[1]; - u8 *rates = cmd.bss.rates; + rates_max = rates_eid[1]; + if (rates_max > MAX_RATES) { + lbs_deb_join("invalid rates"); + goto out; + } + rates = cmd.bss.rates; for (hw = 0; hw < ARRAY_SIZE(lbs_rates); hw++) { u8 hw_rate = lbs_rates[hw].bitrate / 5; for (i = 0; i < rates_max; i++) { -- cgit From 33328bfab892d676920abb440d41fbf4b16c2717 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 19 Dec 2019 10:35:59 +0100 Subject: MAINTAINERS: change Gruszka's email address My RedHat email address does not work any longer. Change to my private one. Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e34488f7baae..096a982c860c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8563,7 +8563,7 @@ S: Maintained F: drivers/platform/x86/intel-vbtn.c INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy) -M: Stanislaw Gruszka +M: Stanislaw Gruszka L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/intel/iwlegacy/ @@ -13817,7 +13817,7 @@ S: Maintained F: arch/mips/ralink RALINK RT2X00 WIRELESS LAN DRIVER -M: Stanislaw Gruszka +M: Stanislaw Gruszka M: Helmut Schaa L: linux-wireless@vger.kernel.org S: Maintained -- cgit From 160bab43419ebca9ee57219b2ccf02f0fa2c59e8 Mon Sep 17 00:00:00 2001 From: Gil Adam Date: Thu, 7 Nov 2019 21:23:21 +0200 Subject: iwlwifi: don't send PPAG command if disabled we should not send the PPAG (Per-Platform Antenna Gain) command to FW unless the platform has this ACPI table and it was read and validated during the init flow. also no need to send the command if the feature is disabled, so check if enabled before sending, as if there is no valid table the feature is disabled. Signed-off-by: Gil Adam Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index dd685f7eb410..c09624d8d7ee 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -841,9 +841,13 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) return 0; } + if (!mvm->fwrt.ppag_table.enabled) { + IWL_DEBUG_RADIO(mvm, + "PPAG not enabled, command not sent.\n"); + return 0; + } + IWL_DEBUG_RADIO(mvm, "Sending PER_PLATFORM_ANT_GAIN_CMD\n"); - IWL_DEBUG_RADIO(mvm, "PPAG is %s\n", - mvm->fwrt.ppag_table.enabled ? "enabled" : "disabled"); for (i = 0; i < ACPI_PPAG_NUM_CHAINS; i++) { for (j = 0; j < ACPI_PPAG_NUM_SUB_BANDS; j++) { -- cgit From 7b02bf6194887eab2f8912f7284a9e407329a255 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Nov 2019 15:18:16 +0100 Subject: iwlwifi: pcie: move page tracking into get_page_hdr() Move the tracking that records the page in the SKB for later free (refcount decrement) into the get_page_hdr() function for better code reuse. While at it, also add an assertion that this doesn't overwrite any existing page pointer in the skb. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 3 ++- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 7 +------ drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 20 +++++++++++++------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index a091690f6c79..3688911ce3df 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1082,7 +1082,8 @@ void iwl_pcie_apply_destination(struct iwl_trans *trans); void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie, struct sk_buff *skb); #ifdef CONFIG_INET -struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len); +struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len, + struct sk_buff *skb); #endif /* common functions that are used by gen3 transport */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 8ca0250de99e..494a8864368d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -246,7 +246,6 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, u8 hdr_len, struct iwl_device_cmd *dev_cmd) { #ifdef CONFIG_INET - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_tx_cmd_gen2 *tx_cmd = (void *)dev_cmd->payload; struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; @@ -254,7 +253,6 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, u16 length, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; - struct page **page_ptr; struct tso_t tso; trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), @@ -270,14 +268,11 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ - hdr_page = get_page_hdr(trans, hdr_room); + hdr_page = get_page_hdr(trans, hdr_room, skb); if (!hdr_page) return -ENOMEM; - get_page(hdr_page->page); start_hdr = hdr_page->pos; - page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); - *page_ptr = hdr_page->page; /* * Pull the ieee80211 header to be able to use TSO core, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index f21f16ab2a97..2d1758031a0a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -2052,17 +2052,24 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, } #ifdef CONFIG_INET -struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len) +struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len, + struct sk_buff *skb) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->tso_hdr_page); + struct page **page_ptr; + + page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); + + if (WARN_ON(*page_ptr)) + return NULL; if (!p->page) goto alloc; /* enough room on this page */ if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE) - return p; + goto out; /* We don't have enough room on this page, get a new one. */ __free_page(p->page); @@ -2072,6 +2079,9 @@ alloc: if (!p->page) return NULL; p->pos = page_address(p->page); +out: + *page_ptr = p->page; + get_page(p->page); return p; } @@ -2107,7 +2117,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, u16 length, iv_len, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; - struct page **page_ptr; struct tso_t tso; /* if the packet is protected, then it must be CCMP or GCMP */ @@ -2130,14 +2139,11 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; /* Our device supports 9 segments at most, it will fit in 1 page */ - hdr_page = get_page_hdr(trans, hdr_room); + hdr_page = get_page_hdr(trans, hdr_room, skb); if (!hdr_page) return -ENOMEM; - get_page(hdr_page->page); start_hdr = hdr_page->pos; - page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); - *page_ptr = hdr_page->page; memcpy(hdr_page->pos, skb->data + hdr_len, iv_len); hdr_page->pos += iv_len; -- cgit From c4a786b32621850775dedd1a329de0c060f9c904 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Nov 2019 10:32:42 +0100 Subject: iwlwifi: pcie: work around DMA hardware bug There's a hardware bug in the flow handler (DMA engine), if the address + len of some TB wraps around a 2^32 boundary, the carry bit is then carried over into the next TB. Work around this by copying the data to a new page when we find this situation, and then copy it in a way that we cannot hit the very end of the page. To be able to free the new page again later we need to chain it to the TSO page, use the last pointer there to make sure we can never use the page fully for DMA, and thus cannot cause the same overflow situation on this page. This leaves a few potential places (where we didn't observe the problem) unaddressed: * The second TB could reach or cross the end of a page (and thus 2^32) due to the way we allocate the dev_cmd for the header * For host commands, a similar thing could happen since they're just kmalloc(). We'll address these in further commits. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 179 ++++++++++++++++++---- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 28 +++- 2 files changed, 176 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 494a8864368d..8abadfbc793a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -213,6 +213,16 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) } } +/* + * We need this inline in case dma_addr_t is only 32-bits - since the + * hardware is always 64-bit, the issue can still occur in that case, + * so use u64 for 'phys' here to force the addition in 64-bit. + */ +static inline bool crosses_4g_boundary(u64 phys, u16 len) +{ + return upper_32_bits(phys) != upper_32_bits(phys + len); +} + static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd, dma_addr_t addr, u16 len) @@ -240,6 +250,107 @@ static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, return idx; } +static struct page *get_workaround_page(struct iwl_trans *trans, + struct sk_buff *skb) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct page **page_ptr; + struct page *ret; + + page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); + + ret = alloc_page(GFP_ATOMIC); + if (!ret) + return NULL; + + /* set the chaining pointer to the previous page if there */ + *(void **)(page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr; + *page_ptr = ret; + + return ret; +} + +/* + * Add a TB and if needed apply the FH HW bug workaround; + * meta != NULL indicates that it's a page mapping and we + * need to dma_unmap_page() and set the meta->tbs bit in + * this case. + */ +static int iwl_pcie_gen2_set_tb_with_wa(struct iwl_trans *trans, + struct sk_buff *skb, + struct iwl_tfh_tfd *tfd, + dma_addr_t phys, void *virt, + u16 len, struct iwl_cmd_meta *meta) +{ + dma_addr_t oldphys = phys; + struct page *page; + int ret; + + if (unlikely(dma_mapping_error(trans->dev, phys))) + return -ENOMEM; + + if (likely(!crosses_4g_boundary(phys, len))) { + ret = iwl_pcie_gen2_set_tb(trans, tfd, phys, len); + + if (ret < 0) + goto unmap; + + if (meta) + meta->tbs |= BIT(ret); + + ret = 0; + goto trace; + } + + /* + * Work around a hardware bug. If (as expressed in the + * condition above) the TB ends on a 32-bit boundary, + * then the next TB may be accessed with the wrong + * address. + * To work around it, copy the data elsewhere and make + * a new mapping for it so the device will not fail. + */ + + if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) { + ret = -ENOBUFS; + goto unmap; + } + + page = get_workaround_page(trans, skb); + if (!page) { + ret = -ENOMEM; + goto unmap; + } + + memcpy(page_address(page), virt, len); + + phys = dma_map_single(trans->dev, page_address(page), len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(trans->dev, phys))) + return -ENOMEM; + ret = iwl_pcie_gen2_set_tb(trans, tfd, phys, len); + if (ret < 0) { + /* unmap the new allocation as single */ + oldphys = phys; + meta = NULL; + goto unmap; + } + IWL_WARN(trans, + "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n", + len, (unsigned long long)oldphys, (unsigned long long)phys); + + ret = 0; +unmap: + if (meta) + dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE); + else + dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE); +trace: + trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len); + + return ret; +} + static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_tfh_tfd *tfd, int start_len, @@ -327,6 +438,11 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, dev_kfree_skb(csum_skb); goto out_err; } + /* + * No need for _with_wa, this is from the TSO page and + * we leave some space at the end of it so can't hit + * the buggy scenario. + */ iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, tb_len); trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr, tb_phys, tb_len); @@ -338,16 +454,18 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, /* put the payload */ while (data_left) { + int ret; + tb_len = min_t(unsigned int, tso.size, data_left); tb_phys = dma_map_single(trans->dev, tso.data, tb_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(trans->dev, tb_phys))) { + ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, + tb_phys, tso.data, + tb_len, NULL); + if (ret) { dev_kfree_skb(csum_skb); goto out_err; } - iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, tb_len); - trace_iwlwifi_dev_tx_tb(trans->dev, skb, tso.data, - tb_phys, tb_len); data_left -= tb_len; tso_build_data(skb, &tso, tb_len); @@ -381,6 +499,11 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx_amsdu(struct iwl_trans *trans, tb_phys = iwl_pcie_get_first_tb_dma(txq, idx); + /* + * No need for _with_wa, the first TB allocation is aligned up + * to a 64-byte boundary and thus can't be at the end or cross + * a page boundary (much less a 2^32 boundary). + */ iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); /* @@ -425,24 +548,19 @@ static int iwl_pcie_gen2_tx_add_frags(struct iwl_trans *trans, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t tb_phys; - int tb_idx; + unsigned int fragsz = skb_frag_size(frag); + int ret; - if (!skb_frag_size(frag)) + if (!fragsz) continue; tb_phys = skb_frag_dma_map(trans->dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); - - if (unlikely(dma_mapping_error(trans->dev, tb_phys))) - return -ENOMEM; - tb_idx = iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, - skb_frag_size(frag)); - trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb_frag_address(frag), - tb_phys, skb_frag_size(frag)); - if (tb_idx < 0) - return tb_idx; - - out_meta->tbs |= BIT(tb_idx); + fragsz, DMA_TO_DEVICE); + ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, + skb_frag_address(frag), + fragsz, out_meta); + if (ret) + return ret; } return 0; @@ -470,6 +588,11 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx(struct iwl_trans *trans, /* The first TB points to bi-directional DMA data */ memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); + /* + * No need for _with_wa, the first TB allocation is aligned up + * to a 64-byte boundary and thus can't be at the end or cross + * a page boundary (much less a 2^32 boundary). + */ iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); /* @@ -499,26 +622,30 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx(struct iwl_trans *trans, tb2_len = skb_headlen(skb) - hdr_len; if (tb2_len > 0) { + int ret; + tb_phys = dma_map_single(trans->dev, skb->data + hdr_len, tb2_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(trans->dev, tb_phys))) + ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, + skb->data + hdr_len, tb2_len, + NULL); + if (ret) goto out_err; - iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, tb2_len); - trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb->data + hdr_len, - tb_phys, tb2_len); } if (iwl_pcie_gen2_tx_add_frags(trans, skb, tfd, out_meta)) goto out_err; skb_walk_frags(skb, frag) { + int ret; + tb_phys = dma_map_single(trans->dev, frag->data, skb_headlen(frag), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(trans->dev, tb_phys))) + ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, + frag->data, + skb_headlen(frag), NULL); + if (ret) goto out_err; - iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, skb_headlen(frag)); - trace_iwlwifi_dev_tx_tb(trans->dev, skb, frag->data, - tb_phys, skb_headlen(frag)); if (iwl_pcie_gen2_tx_add_frags(trans, frag, tfd, out_meta)) goto out_err; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 2d1758031a0a..ba37b780dec4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -624,12 +624,18 @@ void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie, struct sk_buff *skb) { struct page **page_ptr; + struct page *next; page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); + next = *page_ptr; + *page_ptr = NULL; - if (*page_ptr) { - __free_page(*page_ptr); - *page_ptr = NULL; + while (next) { + struct page *tmp = next; + + next = *(void **)(page_address(next) + PAGE_SIZE - + sizeof(void *)); + __free_page(tmp); } } @@ -2067,8 +2073,18 @@ struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len, if (!p->page) goto alloc; - /* enough room on this page */ - if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE) + /* + * Check if there's enough room on this page + * + * Note that we put a page chaining pointer *last* in the + * page - we need it somewhere, and if it's there then we + * avoid DMA mapping the last bits of the page which may + * trigger the 32-bit boundary hardware bug. + * + * (see also get_workaround_page() in tx-gen2.c) + */ + if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE - + sizeof(void *)) goto out; /* We don't have enough room on this page, get a new one. */ @@ -2079,6 +2095,8 @@ alloc: if (!p->page) return NULL; p->pos = page_address(p->page); + /* set the chaining pointer to NULL */ + *(void **)(page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL; out: *page_ptr = p->page; get_page(p->page); -- cgit From c5a4e8eb683c6a80c6907c12acf7b66665ef4b0b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Nov 2019 09:26:20 +0100 Subject: iwlwifi: pcie: detect the DMA bug and warn if it happens Warn if the DMA bug is going to happen. We don't have a good way of actually aborting in this case and we have workarounds in place for the cases where it happens, but in order to not be surprised add a safety-check and warn. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 8abadfbc793a..a2dc380ac17c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -231,6 +231,17 @@ static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, int idx = iwl_pcie_gen2_get_num_tbs(trans, tfd); struct iwl_tfh_tb *tb; + /* + * Only WARN here so we know about the issue, but we mess up our + * unmap path because not every place currently checks for errors + * returned from this function - it can only return an error if + * there's no more space, and so when we know there is enough we + * don't always check ... + */ + WARN(crosses_4g_boundary(addr, len), + "possible DMA problem with iova:0x%llx, len:%d\n", + (unsigned long long)addr, len); + if (WARN_ON(idx >= IWL_TFH_NUM_TBS)) return -EINVAL; tb = &tfd->tbs[idx]; -- cgit From a89c72ffd07369f5ccc74f0332d2785a7077241d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Nov 2019 09:09:34 +0100 Subject: iwlwifi: pcie: allocate smaller dev_cmd for TX headers As noted in the previous commit, due to the way we allocate the dev_cmd headers with 324 byte size, and 4/8 byte alignment, the part we use of them (bytes 20..40-68) could still cross a page and thus 2^32 boundary. Address this by using alignment to ensure that the allocation cannot cross a page boundary, on hardware that's affected. To make that not cause more memory consumption, reduce the size of the allocations to the necessary size - we go from 324 bytes in each allocation to 60/68 on gen2 depending on family, and ~120 or so on gen1 (so on gen1 it's a pure reduction in size, since we don't need alignment there). To avoid size and clearing issues, add a new structure that's just the header, and use kmem_cache_zalloc(). Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/dvm/tx.c | 3 +- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 10 +++---- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 26 ++++++++++++++---- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 15 ++++------ drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 6 ++-- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 32 ++++++++++++++++------ drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 21 ++++++++++---- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 20 ++++++++------ 8 files changed, 84 insertions(+), 49 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c index cd73fc5cfcbb..fd454836adbe 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c @@ -267,7 +267,7 @@ int iwlagn_tx_skb(struct iwl_priv *priv, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct iwl_station_priv *sta_priv = NULL; struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_BSS]; - struct iwl_device_cmd *dev_cmd; + struct iwl_device_tx_cmd *dev_cmd; struct iwl_tx_cmd *tx_cmd; __le16 fc; u8 hdr_len; @@ -348,7 +348,6 @@ int iwlagn_tx_skb(struct iwl_priv *priv, if (unlikely(!dev_cmd)) goto drop_unlock_priv; - memset(dev_cmd, 0, sizeof(*dev_cmd)); dev_cmd->hdr.cmd = REPLY_TX; tx_cmd = (struct iwl_tx_cmd *) dev_cmd->payload; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 28bdc9a9617e..f91197e4ae40 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -66,7 +66,9 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, struct device *dev, - const struct iwl_trans_ops *ops) + const struct iwl_trans_ops *ops, + unsigned int cmd_pool_size, + unsigned int cmd_pool_align) { struct iwl_trans *trans; #ifdef CONFIG_LOCKDEP @@ -90,10 +92,8 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, "iwl_cmd_pool:%s", dev_name(trans->dev)); trans->dev_cmd_pool = kmem_cache_create(trans->dev_cmd_pool_name, - sizeof(struct iwl_device_cmd), - sizeof(void *), - SLAB_HWCACHE_ALIGN, - NULL); + cmd_pool_size, cmd_pool_align, + SLAB_HWCACHE_ALIGN, NULL); if (!trans->dev_cmd_pool) return NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 8cadad7364ac..e33df5ad00e0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -193,6 +193,18 @@ struct iwl_device_cmd { }; } __packed; +/** + * struct iwl_device_tx_cmd - buffer for TX command + * @hdr: the header + * @payload: the payload placeholder + * + * The actual structure is sized dynamically according to need. + */ +struct iwl_device_tx_cmd { + struct iwl_cmd_header hdr; + u8 payload[]; +} __packed; + #define TFD_MAX_PAYLOAD_SIZE (sizeof(struct iwl_device_cmd)) /* @@ -544,7 +556,7 @@ struct iwl_trans_ops { int (*send_cmd)(struct iwl_trans *trans, struct iwl_host_cmd *cmd); int (*tx)(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_device_cmd *dev_cmd, int queue); + struct iwl_device_tx_cmd *dev_cmd, int queue); void (*reclaim)(struct iwl_trans *trans, int queue, int ssn, struct sk_buff_head *skbs); @@ -948,22 +960,22 @@ iwl_trans_dump_data(struct iwl_trans *trans, u32 dump_mask) return trans->ops->dump_data(trans, dump_mask); } -static inline struct iwl_device_cmd * +static inline struct iwl_device_tx_cmd * iwl_trans_alloc_tx_cmd(struct iwl_trans *trans) { - return kmem_cache_alloc(trans->dev_cmd_pool, GFP_ATOMIC); + return kmem_cache_zalloc(trans->dev_cmd_pool, GFP_ATOMIC); } int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd); static inline void iwl_trans_free_tx_cmd(struct iwl_trans *trans, - struct iwl_device_cmd *dev_cmd) + struct iwl_device_tx_cmd *dev_cmd) { kmem_cache_free(trans->dev_cmd_pool, dev_cmd); } static inline int iwl_trans_tx(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_device_cmd *dev_cmd, int queue) + struct iwl_device_tx_cmd *dev_cmd, int queue) { if (unlikely(test_bit(STATUS_FW_ERROR, &trans->status))) return -EIO; @@ -1271,7 +1283,9 @@ static inline bool iwl_trans_dbg_ini_valid(struct iwl_trans *trans) *****************************************************/ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, struct device *dev, - const struct iwl_trans_ops *ops); + const struct iwl_trans_ops *ops, + unsigned int cmd_pool_size, + unsigned int cmd_pool_align); void iwl_trans_free(struct iwl_trans *trans); /***************************************************** diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index dc5c02fbc65a..80052ad1fa6d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -490,13 +490,13 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, /* * Allocates and sets the Tx cmd the driver data pointers in the skb */ -static struct iwl_device_cmd * +static struct iwl_device_tx_cmd * iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_tx_info *info, int hdrlen, struct ieee80211_sta *sta, u8 sta_id) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct iwl_device_cmd *dev_cmd; + struct iwl_device_tx_cmd *dev_cmd; struct iwl_tx_cmd *tx_cmd; dev_cmd = iwl_trans_alloc_tx_cmd(mvm->trans); @@ -504,11 +504,6 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, if (unlikely(!dev_cmd)) return NULL; - /* Make sure we zero enough of dev_cmd */ - BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen2) > sizeof(*tx_cmd)); - BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen3) > sizeof(*tx_cmd)); - - memset(dev_cmd, 0, sizeof(dev_cmd->hdr) + sizeof(*tx_cmd)); dev_cmd->hdr.cmd = TX_CMD; if (iwl_mvm_has_new_tx_api(mvm)) { @@ -597,7 +592,7 @@ out: } static void iwl_mvm_skb_prepare_status(struct sk_buff *skb, - struct iwl_device_cmd *cmd) + struct iwl_device_tx_cmd *cmd) { struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb); @@ -716,7 +711,7 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_tx_info info; - struct iwl_device_cmd *dev_cmd; + struct iwl_device_tx_cmd *dev_cmd; u8 sta_id; int hdrlen = ieee80211_hdrlen(hdr->frame_control); __le16 fc = hdr->frame_control; @@ -1078,7 +1073,7 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct iwl_mvm_sta *mvmsta; - struct iwl_device_cmd *dev_cmd; + struct iwl_device_tx_cmd *dev_cmd; __le16 fc; u16 seq_number = 0; u8 tid = IWL_MAX_TID_COUNT; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 3688911ce3df..04361ecf31bd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -305,7 +305,7 @@ struct iwl_cmd_meta { #define IWL_FIRST_TB_SIZE_ALIGN ALIGN(IWL_FIRST_TB_SIZE, 64) struct iwl_pcie_txq_entry { - struct iwl_device_cmd *cmd; + void *cmd; struct sk_buff *skb; /* buffer to free after command completes */ const void *free_buf; @@ -688,7 +688,7 @@ void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq); int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_device_cmd *dev_cmd, int txq_id); + struct iwl_device_tx_cmd *dev_cmd, int txq_id); void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans); int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd); void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx); @@ -1107,7 +1107,7 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans, unsigned int timeout); void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue); int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_device_cmd *dev_cmd, int txq_id); + struct iwl_device_tx_cmd *dev_cmd, int txq_id); int iwl_trans_pcie_gen2_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd); void iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index a0677131634d..91fa439d1255 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -79,6 +79,7 @@ #include "iwl-agn-hw.h" #include "fw/error-dump.h" #include "fw/dbg.h" +#include "fw/api/tx.h" #include "internal.h" #include "iwl-fh.h" @@ -3460,19 +3461,34 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, { struct iwl_trans_pcie *trans_pcie; struct iwl_trans *trans; - int ret, addr_size; + int ret, addr_size, txcmd_size, txcmd_align; + const struct iwl_trans_ops *ops = &trans_ops_pcie_gen2; + + if (!cfg_trans->gen2) { + ops = &trans_ops_pcie; + txcmd_size = sizeof(struct iwl_tx_cmd); + txcmd_align = sizeof(void *); + } else if (cfg_trans->device_family < IWL_DEVICE_FAMILY_AX210) { + txcmd_size = sizeof(struct iwl_tx_cmd_gen2); + txcmd_align = 64; + } else { + txcmd_size = sizeof(struct iwl_tx_cmd_gen3); + txcmd_align = 128; + } + + txcmd_size += sizeof(struct iwl_cmd_header); + txcmd_size += 36; /* biggest possible 802.11 header */ + + /* Ensure device TX cmd cannot reach/cross a page boundary in gen2 */ + if (WARN_ON(cfg_trans->gen2 && txcmd_size >= txcmd_align)) + return ERR_PTR(-EINVAL); ret = pcim_enable_device(pdev); if (ret) return ERR_PTR(ret); - if (cfg_trans->gen2) - trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), - &pdev->dev, &trans_ops_pcie_gen2); - else - trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), - &pdev->dev, &trans_ops_pcie); - + trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, ops, + txcmd_size, txcmd_align); if (!trans) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index a2dc380ac17c..56d752beb940 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -365,7 +365,8 @@ trace: static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_tfh_tfd *tfd, int start_len, - u8 hdr_len, struct iwl_device_cmd *dev_cmd) + u8 hdr_len, + struct iwl_device_tx_cmd *dev_cmd) { #ifdef CONFIG_INET struct iwl_tx_cmd_gen2 *tx_cmd = (void *)dev_cmd->payload; @@ -496,7 +497,7 @@ out_err: static struct iwl_tfh_tfd *iwl_pcie_gen2_build_tx_amsdu(struct iwl_trans *trans, struct iwl_txq *txq, - struct iwl_device_cmd *dev_cmd, + struct iwl_device_tx_cmd *dev_cmd, struct sk_buff *skb, struct iwl_cmd_meta *out_meta, int hdr_len, @@ -533,6 +534,10 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx_amsdu(struct iwl_trans *trans, tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) goto out_err; + /* + * No need for _with_wa(), we ensure (via alignment) that the data + * here can never cross or end at a page boundary. + */ iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, len); if (iwl_pcie_gen2_build_amsdu(trans, skb, tfd, @@ -580,7 +585,7 @@ static int iwl_pcie_gen2_tx_add_frags(struct iwl_trans *trans, static struct iwl_tfh_tfd *iwl_pcie_gen2_build_tx(struct iwl_trans *trans, struct iwl_txq *txq, - struct iwl_device_cmd *dev_cmd, + struct iwl_device_tx_cmd *dev_cmd, struct sk_buff *skb, struct iwl_cmd_meta *out_meta, int hdr_len, @@ -625,6 +630,10 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx(struct iwl_trans *trans, tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) goto out_err; + /* + * No need for _with_wa(), we ensure (via alignment) that the data + * here can never cross or end at a page boundary. + */ iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, tb1_len); trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, hdr_len); @@ -671,7 +680,7 @@ out_err: static struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, - struct iwl_device_cmd *dev_cmd, + struct iwl_device_tx_cmd *dev_cmd, struct sk_buff *skb, struct iwl_cmd_meta *out_meta) { @@ -711,7 +720,7 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans, } int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_device_cmd *dev_cmd, int txq_id) + struct iwl_device_tx_cmd *dev_cmd, int txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_cmd_meta *out_meta; @@ -736,7 +745,7 @@ int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, /* don't put the packet on the ring, if there is no room */ if (unlikely(iwl_queue_space(trans, txq) < 3)) { - struct iwl_device_cmd **dev_cmd_ptr; + struct iwl_device_tx_cmd **dev_cmd_ptr; dev_cmd_ptr = (void *)((u8 *)skb->cb + trans_pcie->dev_cmd_offs); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index ba37b780dec4..b0eb52b4951b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -213,8 +213,8 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, u8 sec_ctl = 0; u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; __le16 bc_ent; - struct iwl_tx_cmd *tx_cmd = - (void *)txq->entries[txq->write_ptr].cmd->payload; + struct iwl_device_tx_cmd *dev_cmd = txq->entries[txq->write_ptr].cmd; + struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload; u8 sta_id = tx_cmd->sta_id; scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; @@ -257,8 +257,8 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, int read_ptr = txq->read_ptr; u8 sta_id = 0; __le16 bc_ent; - struct iwl_tx_cmd *tx_cmd = - (void *)txq->entries[read_ptr].cmd->payload; + struct iwl_device_tx_cmd *dev_cmd = txq->entries[read_ptr].cmd; + struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload; WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX); @@ -1202,7 +1202,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, while (!skb_queue_empty(&overflow_skbs)) { struct sk_buff *skb = __skb_dequeue(&overflow_skbs); - struct iwl_device_cmd *dev_cmd_ptr; + struct iwl_device_tx_cmd *dev_cmd_ptr; dev_cmd_ptr = *(void **)((u8 *)skb->cb + trans_pcie->dev_cmd_offs); @@ -2125,7 +2125,8 @@ static void iwl_compute_pseudo_hdr_csum(void *iph, struct tcphdr *tcph, static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_txq *txq, u8 hdr_len, struct iwl_cmd_meta *out_meta, - struct iwl_device_cmd *dev_cmd, u16 tb1_len) + struct iwl_device_tx_cmd *dev_cmd, + u16 tb1_len) { struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload; struct iwl_trans_pcie *trans_pcie = txq->trans_pcie; @@ -2303,7 +2304,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_txq *txq, u8 hdr_len, struct iwl_cmd_meta *out_meta, - struct iwl_device_cmd *dev_cmd, u16 tb1_len) + struct iwl_device_tx_cmd *dev_cmd, + u16 tb1_len) { /* No A-MSDU without CONFIG_INET */ WARN_ON(1); @@ -2313,7 +2315,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, #endif /* CONFIG_INET */ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_device_cmd *dev_cmd, int txq_id) + struct iwl_device_tx_cmd *dev_cmd, int txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct ieee80211_hdr *hdr; @@ -2370,7 +2372,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, /* don't put the packet on the ring, if there is no room */ if (unlikely(iwl_queue_space(trans, txq) < 3)) { - struct iwl_device_cmd **dev_cmd_ptr; + struct iwl_device_tx_cmd **dev_cmd_ptr; dev_cmd_ptr = (void *)((u8 *)skb->cb + trans_pcie->dev_cmd_offs); -- cgit From 4f565ee2999960d541854389e20b78e7105e3afc Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 25 Nov 2019 11:50:58 +0200 Subject: iwlwifi: fix TLV fragment allocation loop In the allocation loop, "pages" will never become zero (because of the DIV_ROUND_UP), so if we can't allocate any size and pages becomes 1, we will keep trying to allocate 1 page until it succeeds. And in that case, as coverity reported, block will never be NULL. Reported-by: coverity-bot Addresses-Coverity-ID: 1487402 ("Control flow issues") Fixes: 14124b25780d ("iwlwifi: dbg_ini: implement monitor allocation flow") Signed-off-by: Luca Coelho Fixes: 14124b25780d ("iwlwifi: dbg_ini: implement monitor allocation flow") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index f266647dc08c..ce8f248c33ea 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -480,7 +480,14 @@ static int iwl_dbg_tlv_alloc_fragment(struct iwl_fw_runtime *fwrt, if (!frag || frag->size || !pages) return -EIO; - while (pages) { + /* + * We try to allocate as many pages as we can, starting with + * the requested amount and going down until we can allocate + * something. Because of DIV_ROUND_UP(), pages will never go + * down to 0 and stop the loop, so stop when pages reaches 1, + * which is too small anyway. + */ + while (pages > 1) { block = dma_alloc_coherent(fwrt->dev, pages * PAGE_SIZE, &physical, GFP_KERNEL | __GFP_NOWARN); -- cgit From b3f20e098293892388d6a0491d6bbb2efb46fbff Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 25 Nov 2019 13:21:58 +0200 Subject: iwlwifi: mvm: fix NVM check for 3168 devices We had a check on !NVM_EXT and then a check for NVM_SDP in the else block of this if. The else block, obviously, could only be reached if using NVM_EXT, so it would never be NVM_SDP. Fix that by checking whether the nvm_type is IWL_NVM instead of checking for !IWL_NVM_EXT to solve this issue. Reported-by: Stefan Sperling Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 945c1ea5cda8..493bcc54a848 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -281,7 +281,7 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) int regulatory_type; /* Checking for required sections */ - if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { + if (mvm->trans->cfg->nvm_type == IWL_NVM) { if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || !mvm->nvm_sections[mvm->cfg->nvm_hw_section_num].data) { IWL_ERR(mvm, "Can't parse empty OTP/NVM sections\n"); -- cgit From ed780545c13a21dc0276807897e2d4fc142e11ac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Nov 2019 11:02:31 +0100 Subject: iwlwifi: mvm: report TX rate to mac80211 directly for RS offload If we have offloaded rate scaling, which is always true for those devices supporting HE, then report the TX rate directly from the data the firmware gives us, instead of only passing it to mac80211 on frame status only and for it to track it. First of all, this makes us always report the last good rate that the rate scaling algorithm picked, which is better than reporting the last rate for any frame since management frames etc. are sent with very low rates and could interfere. Additionally, this allows us to properly report HE rates, though in case there's a lot of trigger-based traffic, we don't get any choice in the rates and don't report that properly right now. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 126 ++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 32dc9d6f0fb6..481f1c9d814f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4771,6 +4771,125 @@ static int iwl_mvm_mac_get_survey(struct ieee80211_hw *hw, int idx, return ret; } +static void iwl_mvm_set_sta_rate(u32 rate_n_flags, struct rate_info *rinfo) +{ + switch (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) { + case RATE_MCS_CHAN_WIDTH_20: + rinfo->bw = RATE_INFO_BW_20; + break; + case RATE_MCS_CHAN_WIDTH_40: + rinfo->bw = RATE_INFO_BW_40; + break; + case RATE_MCS_CHAN_WIDTH_80: + rinfo->bw = RATE_INFO_BW_80; + break; + case RATE_MCS_CHAN_WIDTH_160: + rinfo->bw = RATE_INFO_BW_160; + break; + } + + if (rate_n_flags & RATE_MCS_HT_MSK) { + rinfo->flags |= RATE_INFO_FLAGS_MCS; + rinfo->mcs = u32_get_bits(rate_n_flags, RATE_HT_MCS_INDEX_MSK); + rinfo->nss = u32_get_bits(rate_n_flags, + RATE_HT_MCS_NSS_MSK) + 1; + if (rate_n_flags & RATE_MCS_SGI_MSK) + rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; + } else if (rate_n_flags & RATE_MCS_VHT_MSK) { + rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; + rinfo->mcs = u32_get_bits(rate_n_flags, + RATE_VHT_MCS_RATE_CODE_MSK); + rinfo->nss = u32_get_bits(rate_n_flags, + RATE_VHT_MCS_NSS_MSK) + 1; + if (rate_n_flags & RATE_MCS_SGI_MSK) + rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; + } else if (rate_n_flags & RATE_MCS_HE_MSK) { + u32 gi_ltf = u32_get_bits(rate_n_flags, + RATE_MCS_HE_GI_LTF_MSK); + + rinfo->flags |= RATE_INFO_FLAGS_HE_MCS; + rinfo->mcs = u32_get_bits(rate_n_flags, + RATE_VHT_MCS_RATE_CODE_MSK); + rinfo->nss = u32_get_bits(rate_n_flags, + RATE_VHT_MCS_NSS_MSK) + 1; + + if (rate_n_flags & RATE_MCS_HE_106T_MSK) { + rinfo->bw = RATE_INFO_BW_HE_RU; + rinfo->he_ru_alloc = NL80211_RATE_INFO_HE_RU_ALLOC_106; + } + + switch (rate_n_flags & RATE_MCS_HE_TYPE_MSK) { + case RATE_MCS_HE_TYPE_SU: + case RATE_MCS_HE_TYPE_EXT_SU: + if (gi_ltf == 0 || gi_ltf == 1) + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_0_8; + else if (gi_ltf == 2) + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_1_6; + else if (rate_n_flags & RATE_MCS_SGI_MSK) + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_0_8; + else + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_3_2; + break; + case RATE_MCS_HE_TYPE_MU: + if (gi_ltf == 0 || gi_ltf == 1) + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_0_8; + else if (gi_ltf == 2) + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_1_6; + else + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_3_2; + break; + case RATE_MCS_HE_TYPE_TRIG: + if (gi_ltf == 0 || gi_ltf == 1) + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_1_6; + else + rinfo->he_gi = NL80211_RATE_INFO_HE_GI_3_2; + break; + } + + if (rate_n_flags & RATE_HE_DUAL_CARRIER_MODE_MSK) + rinfo->he_dcm = 1; + } else { + switch (u32_get_bits(rate_n_flags, RATE_LEGACY_RATE_MSK)) { + case IWL_RATE_1M_PLCP: + rinfo->legacy = 10; + break; + case IWL_RATE_2M_PLCP: + rinfo->legacy = 20; + break; + case IWL_RATE_5M_PLCP: + rinfo->legacy = 55; + break; + case IWL_RATE_11M_PLCP: + rinfo->legacy = 110; + break; + case IWL_RATE_6M_PLCP: + rinfo->legacy = 60; + break; + case IWL_RATE_9M_PLCP: + rinfo->legacy = 90; + break; + case IWL_RATE_12M_PLCP: + rinfo->legacy = 120; + break; + case IWL_RATE_18M_PLCP: + rinfo->legacy = 180; + break; + case IWL_RATE_24M_PLCP: + rinfo->legacy = 240; + break; + case IWL_RATE_36M_PLCP: + rinfo->legacy = 360; + break; + case IWL_RATE_48M_PLCP: + rinfo->legacy = 480; + break; + case IWL_RATE_54M_PLCP: + rinfo->legacy = 540; + break; + } + } +} + static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -4785,6 +4904,13 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); } + if (iwl_mvm_has_tlc_offload(mvm)) { + struct iwl_lq_sta_rs_fw *lq_sta = &mvmsta->lq_sta.rs_fw; + + iwl_mvm_set_sta_rate(lq_sta->last_rate_n_flags, &sinfo->txrate); + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); + } + /* if beacon filtering isn't on mac80211 does it anyway */ if (!(vif->driver_flags & IEEE80211_VIF_BEACON_FILTER)) return; -- cgit From 2763bba6328c53c455d8f7f5302b80030551c31b Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 27 Nov 2019 14:55:58 -0500 Subject: iwlwifi: Don't ignore the cap field upon mcc update When receiving a new MCC driver get all the data about the new country code and its regulatory information. Mistakenly, we ignored the cap field, which includes global regulatory information which should be applies to every channel. Fix it. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 48 +++++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h | 6 +-- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 1e240a2a8329..068e4924c04e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -224,6 +224,34 @@ enum iwl_nvm_channel_flags { NVM_CHANNEL_DC_HIGH = BIT(12), }; +/** + * enum iwl_reg_capa_flags - global flags applied for the whole regulatory + * domain. + * @REG_CAPA_BF_CCD_LOW_BAND: Beam-forming or Cyclic Delay Diversity in the + * 2.4Ghz band is allowed. + * @REG_CAPA_BF_CCD_HIGH_BAND: Beam-forming or Cyclic Delay Diversity in the + * 5Ghz band is allowed. + * @REG_CAPA_160MHZ_ALLOWED: 11ac channel with a width of 160Mhz is allowed + * for this regulatory domain (valid only in 5Ghz). + * @REG_CAPA_80MHZ_ALLOWED: 11ac channel with a width of 80Mhz is allowed + * for this regulatory domain (valid only in 5Ghz). + * @REG_CAPA_MCS_8_ALLOWED: 11ac with MCS 8 is allowed. + * @REG_CAPA_MCS_9_ALLOWED: 11ac with MCS 9 is allowed. + * @REG_CAPA_40MHZ_FORBIDDEN: 11n channel with a width of 40Mhz is forbidden + * for this regulatory domain (valid only in 5Ghz). + * @REG_CAPA_DC_HIGH_ENABLED: DC HIGH allowed. + */ +enum iwl_reg_capa_flags { + REG_CAPA_BF_CCD_LOW_BAND = BIT(0), + REG_CAPA_BF_CCD_HIGH_BAND = BIT(1), + REG_CAPA_160MHZ_ALLOWED = BIT(2), + REG_CAPA_80MHZ_ALLOWED = BIT(3), + REG_CAPA_MCS_8_ALLOWED = BIT(4), + REG_CAPA_MCS_9_ALLOWED = BIT(5), + REG_CAPA_40MHZ_FORBIDDEN = BIT(7), + REG_CAPA_DC_HIGH_ENABLED = BIT(9), +}; + static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level, int chan, u32 flags) { @@ -1038,6 +1066,7 @@ IWL_EXPORT_SYMBOL(iwl_parse_nvm_data); static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan, int ch_idx, u16 nvm_flags, + u16 cap_flags, const struct iwl_cfg *cfg) { u32 flags = NL80211_RRF_NO_HT40; @@ -1076,13 +1105,27 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan, (flags & NL80211_RRF_NO_IR)) flags |= NL80211_RRF_GO_CONCURRENT; + /* + * cap_flags is per regulatory domain so apply it for every channel + */ + if (ch_idx >= NUM_2GHZ_CHANNELS) { + if (cap_flags & REG_CAPA_40MHZ_FORBIDDEN) + flags |= NL80211_RRF_NO_HT40; + + if (!(cap_flags & REG_CAPA_80MHZ_ALLOWED)) + flags |= NL80211_RRF_NO_80MHZ; + + if (!(cap_flags & REG_CAPA_160MHZ_ALLOWED)) + flags |= NL80211_RRF_NO_160MHZ; + } + return flags; } struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc, - u16 geo_info) + u16 geo_info, u16 cap) { int ch_idx; u16 ch_flags; @@ -1140,7 +1183,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, } reg_rule_flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx, - ch_flags, cfg); + ch_flags, cap, + cfg); /* we can't continue the same rule */ if (ch_idx == 0 || prev_reg_rule_flags != reg_rule_flags || diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index b7e1ddf8f177..4eeedb41e9ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -7,7 +7,7 @@ * * Copyright(c) 2008 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -29,7 +29,7 @@ * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,7 +103,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc, - u16 geo_info); + u16 geo_info, u16 cap); /** * struct iwl_nvm_section - describes an NVM section in memory. diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 481f1c9d814f..a46204b905d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -256,7 +256,8 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy, __le32_to_cpu(resp->n_channels), resp->channels, __le16_to_cpu(resp->mcc), - __le16_to_cpu(resp->geo_info)); + __le16_to_cpu(resp->geo_info), + __le16_to_cpu(resp->cap)); /* Store the return source id */ src_id = resp->source_id; kfree(resp); -- cgit From d84a7a654a66eead599cfd4f436d1f921e01074f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Dec 2019 09:31:07 +0100 Subject: iwlwifi: pcie: extend hardware workaround to context-info After more investigation on the hardware side, it appears that the hardware bug regarding 2^32 boundary reaching/crossing also affects other uses of the DMA engine, in particular the ones triggered by the context-info (image loader) mechanism. It also turns out that the bug only affects devices with gen2 TX hardware engine, so we don't need to change context info for gen3. The TX path workarounds are simpler to still keep for both though. Add the workaround to that code as well; this is a lot simpler as we have just a single way to allocate DMA memory there. I made the algorithm recursive (with a small limit) since it's actually (almost) impossible to hit this today - dma_alloc_coherent is currently documented to always return 32-bit addressable memory regardless of the DMA mask for it, and so we could only get REALLY unlucky to get the very last page in that area. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/pcie/ctxt-info.c | 45 ++++++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 10 +++++ drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 14 +------ 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c index d38cefbb779e..e249e3fd14c6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c @@ -57,6 +57,42 @@ #include "internal.h" #include "iwl-prph.h" +static void *_iwl_pcie_ctxt_info_dma_alloc_coherent(struct iwl_trans *trans, + size_t size, + dma_addr_t *phys, + int depth) +{ + void *result; + + if (WARN(depth > 2, + "failed to allocate DMA memory not crossing 2^32 boundary")) + return NULL; + + result = dma_alloc_coherent(trans->dev, size, phys, GFP_KERNEL); + + if (!result) + return NULL; + + if (unlikely(iwl_pcie_crosses_4g_boundary(*phys, size))) { + void *old = result; + dma_addr_t oldphys = *phys; + + result = _iwl_pcie_ctxt_info_dma_alloc_coherent(trans, size, + phys, + depth + 1); + dma_free_coherent(trans->dev, size, old, oldphys); + } + + return result; +} + +static void *iwl_pcie_ctxt_info_dma_alloc_coherent(struct iwl_trans *trans, + size_t size, + dma_addr_t *phys) +{ + return _iwl_pcie_ctxt_info_dma_alloc_coherent(trans, size, phys, 0); +} + void iwl_pcie_ctxt_info_free_paging(struct iwl_trans *trans) { struct iwl_self_init_dram *dram = &trans->init_dram; @@ -161,14 +197,17 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, struct iwl_context_info *ctxt_info; struct iwl_context_info_rbd_cfg *rx_cfg; u32 control_flags = 0, rb_size; + dma_addr_t phys; int ret; - ctxt_info = dma_alloc_coherent(trans->dev, sizeof(*ctxt_info), - &trans_pcie->ctxt_info_dma_addr, - GFP_KERNEL); + ctxt_info = iwl_pcie_ctxt_info_dma_alloc_coherent(trans, + sizeof(*ctxt_info), + &phys); if (!ctxt_info) return -ENOMEM; + trans_pcie->ctxt_info_dma_addr = phys; + ctxt_info->version.version = 0; ctxt_info->version.mac_id = cpu_to_le16((u16)iwl_read32(trans, CSR_HW_REV)); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 04361ecf31bd..f14bcef3495e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -672,6 +672,16 @@ void iwl_pcie_disable_ict(struct iwl_trans *trans); /***************************************************** * TX / HCMD ******************************************************/ +/* + * We need this inline in case dma_addr_t is only 32-bits - since the + * hardware is always 64-bit, the issue can still occur in that case, + * so use u64 for 'phys' here to force the addition in 64-bit. + */ +static inline bool iwl_pcie_crosses_4g_boundary(u64 phys, u16 len) +{ + return upper_32_bits(phys) != upper_32_bits(phys + len); +} + int iwl_pcie_tx_init(struct iwl_trans *trans); int iwl_pcie_gen2_tx_init(struct iwl_trans *trans, int txq_id, int queue_size); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 56d752beb940..bfb984b2e00c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -213,16 +213,6 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) } } -/* - * We need this inline in case dma_addr_t is only 32-bits - since the - * hardware is always 64-bit, the issue can still occur in that case, - * so use u64 for 'phys' here to force the addition in 64-bit. - */ -static inline bool crosses_4g_boundary(u64 phys, u16 len) -{ - return upper_32_bits(phys) != upper_32_bits(phys + len); -} - static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd, dma_addr_t addr, u16 len) @@ -238,7 +228,7 @@ static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, * there's no more space, and so when we know there is enough we * don't always check ... */ - WARN(crosses_4g_boundary(addr, len), + WARN(iwl_pcie_crosses_4g_boundary(addr, len), "possible DMA problem with iova:0x%llx, len:%d\n", (unsigned long long)addr, len); @@ -300,7 +290,7 @@ static int iwl_pcie_gen2_set_tb_with_wa(struct iwl_trans *trans, if (unlikely(dma_mapping_error(trans->dev, phys))) return -ENOMEM; - if (likely(!crosses_4g_boundary(phys, len))) { + if (likely(!iwl_pcie_crosses_4g_boundary(phys, len))) { ret = iwl_pcie_gen2_set_tb(trans, tfd, phys, len); if (ret < 0) -- cgit From b9f726c94224e863d4d3458dfec2e7e1284a39ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Dec 2019 10:09:56 +0100 Subject: iwlwifi: mvm: fix SKB leak on invalid queue It used to be the case that if we got here, we wouldn't warn but instead allocate the queue (DQA). With using the mac80211 TXQs model this changed, and we really have nothing to do with the frame here anymore, hence the warning now. However, clearly we missed in coding & review that this is now a pure error path and leaks the SKB if we return 0 instead of an indication that the SKB needs to be freed. Fix this. Signed-off-by: Johannes Berg Fixes: cfbc6c4c5b91 ("iwlwifi: mvm: support mac80211 TXQs model") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 80052ad1fa6d..eaeb8501c9b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1149,7 +1149,7 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, if (WARN_ONCE(txq_id == IWL_MVM_INVALID_QUEUE, "Invalid TXQ id")) { iwl_trans_free_tx_cmd(mvm->trans, dev_cmd); spin_unlock(&mvmsta->lock); - return 0; + return -1; } if (!iwl_mvm_has_new_tx_api(mvm)) { -- cgit From df2378ab0f2a9dd4cf4501268af1902cc4ebacd8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Dec 2019 10:15:52 +0100 Subject: iwlwifi: mvm: fix potential SKB leak on TXQ TX When we transmit after TXQ dequeue, we aren't paying attention to the return value of the transmit functions, leading to a potential SKB leak. Refactor the code a bit (and rename ..._tx to ..._tx_sta) to check for this happening. Signed-off-by: Johannes Berg Fixes: cfbc6c4c5b91 ("iwlwifi: mvm: support mac80211 TXQs model") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 28 +++++++++++++---------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 4 ++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index a46204b905d2..6717f25c46b1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -755,6 +755,20 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) return ret; } +static void iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_sta *sta) +{ + if (likely(sta)) { + if (likely(iwl_mvm_tx_skb_sta(mvm, skb, sta) == 0)) + return; + } else { + if (likely(iwl_mvm_tx_skb_non_sta(mvm, skb) == 0)) + return; + } + + ieee80211_free_txskb(mvm->hw, skb); +} + static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -798,14 +812,7 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, } } - if (sta) { - if (iwl_mvm_tx_skb(mvm, skb, sta)) - goto drop; - return; - } - - if (iwl_mvm_tx_skb_non_sta(mvm, skb)) - goto drop; + iwl_mvm_tx_skb(mvm, skb, sta); return; drop: ieee80211_free_txskb(hw, skb); @@ -855,10 +862,7 @@ void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) break; } - if (!txq->sta) - iwl_mvm_tx_skb_non_sta(mvm, skb); - else - iwl_mvm_tx_skb(mvm, skb, txq->sta); + iwl_mvm_tx_skb(mvm, skb, txq->sta); } } while (atomic_dec_return(&mvmtxq->tx_request)); rcu_read_unlock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 3ec8de00f3aa..e5b21f8b02e7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1508,8 +1508,8 @@ int __must_check iwl_mvm_send_cmd_status(struct iwl_mvm *mvm, int __must_check iwl_mvm_send_cmd_pdu_status(struct iwl_mvm *mvm, u32 id, u16 len, const void *data, u32 *status); -int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, - struct ieee80211_sta *sta); +int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_sta *sta); int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb); void iwl_mvm_set_tx_cmd(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_tx_cmd *tx_cmd, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index eaeb8501c9b5..ddfc9a668036 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1201,8 +1201,8 @@ drop: return -1; } -int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, - struct ieee80211_sta *sta) +int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_sta *sta) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct ieee80211_tx_info info; -- cgit From 3d1b28fd30ab8b87c0935584aff6f9b433939d2c Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 10 Dec 2019 15:11:36 +0200 Subject: iwlwifi: pcie: rename L0S_ENABLED bit to L0S_DISABLED This bit has been misnamed since the initial implementation of the driver. The correct semantics is that setting this bit disables L0S states, and we already clearly use it as such in the code. Rename it to avoid confusion. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 92d9898ab7c2..c2f7252ae4e7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -379,7 +379,7 @@ enum { /* CSR GIO */ -#define CSR_GIO_REG_VAL_L0S_ENABLED (0x00000002) +#define CSR_GIO_REG_VAL_L0S_DISABLED (0x00000002) /* * UCODE-DRIVER GP (general purpose) mailbox register 1 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 91fa439d1255..2e599ba2f2ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -311,9 +311,9 @@ void iwl_pcie_apm_config(struct iwl_trans *trans) */ pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); if (lctl & PCI_EXP_LNKCTL_ASPM_L1) - iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); + iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_DISABLED); else - iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); + iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_DISABLED); trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); -- cgit From cc894b85abf70d40e9920976c7fadd6ded757c60 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 10 Dec 2019 15:18:16 +0200 Subject: iwlwifi: pcie: always disable L0S states L0S states have been found to be unstable with our devices and in newer hardware they are not supported at all, so we must always set the L0S_DISABLED bit. Previously we were only disabling L0S states if L1 was supported, because the assumption was that transitions from L0S to L1 state was the problematic case. But now we should never use L0S, so do it regardless of whether L1 is supported or not. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2e599ba2f2ad..f60d66f1e55b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -302,18 +302,13 @@ void iwl_pcie_apm_config(struct iwl_trans *trans) u16 cap; /* - * HW bug W/A for instability in PCIe bus L0S->L1 transition. - * Check if BIOS (or OS) enabled L1-ASPM on this device. - * If so (likely), disable L0S, so device moves directly L0->L1; - * costs negligible amount of power savings. - * If not (unlikely), enable L0S, so there is at least some - * power savings, even without L1. + * L0S states have been found to be unstable with our devices + * and in newer hardware they are not officially supported at + * all, so we must always set the L0S_DISABLED bit. */ + iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_DISABLED); + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); - if (lctl & PCI_EXP_LNKCTL_ASPM_L1) - iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_DISABLED); - else - iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_DISABLED); trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); -- cgit From 990aba28f5001f6e90fdd84e13612b560a75deda Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Sun, 15 Dec 2019 20:06:22 +0200 Subject: iwlwifi: dbg: force stop the debug monitor HW The driver is required to stop the debug monitor HW recording regardless of the debug configuration since the driver is responsible to halt the FW DBGC. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index ed90dd104366..4c60f9959f7b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2669,12 +2669,7 @@ int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, { int ret = 0; - /* if the FW crashed or not debug monitor cfg was given, there is - * no point in changing the recording state - */ - if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status) || - (!fwrt->trans->dbg.dest_tlv && - fwrt->trans->dbg.ini_dest == IWL_FW_INI_LOCATION_INVALID)) + if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) return 0; if (fw_has_capa(&fwrt->fw->ucode_capa, -- cgit From f06021a18fcf8d8a1e79c5e0a8ec4eb2b038e153 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 16 Dec 2019 22:23:15 +0200 Subject: iwlwifi: remove lar_disable module parameter This is an old parameter that was used supposed to be used only when LAR was still under development. It should not be used anymore, but, since it's available, end-users have been mangling with it unnecessarily. In some cases it can cause problems because when LAR is supported the driver and the firmware do not expect it to be disabled. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 3 --- drivers/net/wireless/intel/iwlwifi/iwl-modparams.h | 2 -- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 13 +++++++------ drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 --- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 10 ++-------- 6 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 4096ccf58b07..bc8c959588ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1817,9 +1817,6 @@ MODULE_PARM_DESC(antenna_coupling, module_param_named(nvm_file, iwlwifi_mod_params.nvm_file, charp, 0444); MODULE_PARM_DESC(nvm_file, "NVM file name"); -module_param_named(lar_disable, iwlwifi_mod_params.lar_disable, bool, 0444); -MODULE_PARM_DESC(lar_disable, "disable LAR functionality (default: N)"); - module_param_named(uapsd_disable, iwlwifi_mod_params.uapsd_disable, uint, 0644); MODULE_PARM_DESC(uapsd_disable, "disable U-APSD functionality bitmap 1: BSS 2: P2P Client (default: 3)"); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h index ebea3f308b5d..82e5cac23d8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h @@ -115,7 +115,6 @@ enum iwl_uapsd_disable { * @nvm_file: specifies a external NVM file * @uapsd_disable: disable U-APSD, see &enum iwl_uapsd_disable, default = * IWL_DISABLE_UAPSD_BSS | IWL_DISABLE_UAPSD_P2P_CLIENT - * @lar_disable: disable LAR (regulatory), default = 0 * @fw_monitor: allow to use firmware monitor * @disable_11ac: disable VHT capabilities, default = false. * @remove_when_gone: remove an inaccessible device from the PCIe bus. @@ -136,7 +135,6 @@ struct iwl_mod_params { int antenna_coupling; char *nvm_file; u32 uapsd_disable; - bool lar_disable; bool fw_monitor; bool disable_11ac; /** diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 068e4924c04e..d4f834b52f50 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -967,10 +967,11 @@ iwl_nvm_no_wide_in_5ghz(struct iwl_trans *trans, const struct iwl_cfg *cfg, struct iwl_nvm_data * iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, + const struct iwl_fw *fw, const __be16 *nvm_hw, const __le16 *nvm_sw, const __le16 *nvm_calib, const __le16 *regulatory, const __le16 *mac_override, const __le16 *phy_sku, - u8 tx_chains, u8 rx_chains, bool lar_fw_supported) + u8 tx_chains, u8 rx_chains) { struct iwl_nvm_data *data; bool lar_enabled; @@ -1050,7 +1051,8 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, return NULL; } - if (lar_fw_supported && lar_enabled) + if (lar_enabled && + fw_has_capa(&fw->ucode_capa, IWL_UCODE_TLV_CAPA_LAR_SUPPORT)) sbands_flags |= IWL_NVM_SBANDS_FLAGS_LAR; if (iwl_nvm_no_wide_in_5ghz(trans, cfg, nvm_hw)) @@ -1449,9 +1451,6 @@ struct iwl_nvm_data *iwl_get_nvm(struct iwl_trans *trans, .id = WIDE_ID(REGULATORY_AND_NVM_GROUP, NVM_GET_INFO) }; int ret; - bool lar_fw_supported = !iwlwifi_mod_params.lar_disable && - fw_has_capa(&fw->ucode_capa, - IWL_UCODE_TLV_CAPA_LAR_SUPPORT); bool empty_otp; u32 mac_flags; u32 sbands_flags = 0; @@ -1529,7 +1528,9 @@ struct iwl_nvm_data *iwl_get_nvm(struct iwl_trans *trans, nvm->valid_tx_ant = (u8)le32_to_cpu(rsp->phy_sku.tx_chains); nvm->valid_rx_ant = (u8)le32_to_cpu(rsp->phy_sku.rx_chains); - if (le32_to_cpu(rsp->regulatory.lar_enabled) && lar_fw_supported) { + if (le32_to_cpu(rsp->regulatory.lar_enabled) && + fw_has_capa(&fw->ucode_capa, + IWL_UCODE_TLV_CAPA_LAR_SUPPORT)) { nvm->lar_enabled = true; sbands_flags |= IWL_NVM_SBANDS_FLAGS_LAR; } diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index 4eeedb41e9ac..fb0b385d10fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -85,10 +85,11 @@ enum iwl_nvm_sbands_flags { */ struct iwl_nvm_data * iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, + const struct iwl_fw *fw, const __be16 *nvm_hw, const __le16 *nvm_sw, const __le16 *nvm_calib, const __le16 *regulatory, const __le16 *mac_override, const __le16 *phy_sku, - u8 tx_chains, u8 rx_chains, bool lar_fw_supported); + u8 tx_chains, u8 rx_chains); /** * iwl_parse_mcc_info - parse MCC (mobile country code) info coming from FW diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index e5b21f8b02e7..67ab7e7e9c9d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1298,9 +1298,6 @@ static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm) bool tlv_lar = fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_LAR_SUPPORT); - if (iwlwifi_mod_params.lar_disable) - return false; - /* * Enable LAR only if it is supported by the FW (TLV) && * enabled in the NVM diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 493bcc54a848..46128a2a9c6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -277,7 +277,6 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) struct iwl_nvm_section *sections = mvm->nvm_sections; const __be16 *hw; const __le16 *sw, *calib, *regulatory, *mac_override, *phy_sku; - bool lar_enabled; int regulatory_type; /* Checking for required sections */ @@ -327,14 +326,9 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY_SDP].data : (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY].data; - lar_enabled = !iwlwifi_mod_params.lar_disable && - fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_LAR_SUPPORT); - - return iwl_parse_nvm_data(mvm->trans, mvm->cfg, hw, sw, calib, + return iwl_parse_nvm_data(mvm->trans, mvm->cfg, mvm->fw, hw, sw, calib, regulatory, mac_override, phy_sku, - mvm->fw->valid_tx_ant, mvm->fw->valid_rx_ant, - lar_enabled); + mvm->fw->valid_tx_ant, mvm->fw->valid_rx_ant); } /* Loads the NVM data stored in mvm->nvm_sections into the NIC */ -- cgit From fb3c06cfda0db68f6082f05c43d63c1fb1761af0 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 23 Dec 2019 13:00:59 +0200 Subject: iwlwifi: fw: make pos static in iwl_sar_get_ewrd_table() loop In the for loop where we are supposed to go through the entire table, we are using a non-static local to keep the pos index. This makes each iteration start with 3, so we always access the first item on the table. Fix this by moving the variable outside of the loo so it doesn't lose its value at every iteration. Reported-by: Colin Ian King Signed-off-by: Luca Coelho Fixes: ba3224db7803 ("iwlwifi: mvm: fix an out-of-bound access") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 40fe2d667622..48d375a86d86 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -357,8 +357,8 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) { union acpi_object *wifi_pkg, *data; bool enabled; - int i, n_profiles, tbl_rev; - int ret = 0; + int i, n_profiles, tbl_rev, pos; + int ret = 0; data = iwl_acpi_get_object(fwrt->dev, ACPI_EWRD_METHOD); if (IS_ERR(data)) @@ -390,10 +390,10 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) goto out_free; } - for (i = 0; i < n_profiles; i++) { - /* the tables start at element 3 */ - int pos = 3; + /* the tables start at element 3 */ + pos = 3; + for (i = 0; i < n_profiles; i++) { /* The EWRD profiles officially go from 2 to 4, but we * save them in sar_profiles[1-3] (because we don't * have profile 0). So in the array we start from 1. -- cgit From 205608749e1ef394f513888091e613c5bfccbcca Mon Sep 17 00:00:00 2001 From: Mehmet Akif Tasova Date: Fri, 13 Dec 2019 23:35:10 +0300 Subject: Revert "iwlwifi: mvm: fix scan config command size" Since v5.4-rc1 was released, iwlwifi started throwing errors when scan commands were sent to the firmware with certain devices (depending on the OTP burned in the device, which contains the list of available channels). For instance: iwlwifi 0000:00:14.3: FW error in SYNC CMD SCAN_CFG_CMD This bug was reported in the ArchLinux bug tracker: https://bugs.archlinux.org/task/64703 And also in a specific case in bugzilla, when the lar_disabled option was set: https://bugzilla.kernel.org/show_bug.cgi?id=205193 Revert the commit that introduced this error, by using the number of channels from the OTP instead of the number of channels that is specified in the FW TLV that tells us how many channels it supports. This reverts commit 06eb547c4ae4382e70d556ba213d13c95ca1801b. Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Mehmet Akif Tasova [ Luca: reworded the commit message a bit. ] Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index a046ac9fa852..a5af8f4128b1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1213,7 +1213,7 @@ static int iwl_mvm_legacy_config_scan(struct iwl_mvm *mvm) cmd_size = sizeof(struct iwl_scan_config_v2); else cmd_size = sizeof(struct iwl_scan_config_v1); - cmd_size += num_channels; + cmd_size += mvm->fw->ucode_capa.n_scan_channels; cfg = kzalloc(cmd_size, GFP_KERNEL); if (!cfg) -- cgit From 95224166a9032ff5d08fca633d37113078ce7d01 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jan 2020 09:32:46 +0100 Subject: vti[6]: fix packet tx through bpf_redirect() With an ebpf program that redirects packets through a vti[6] interface, the packets are dropped because no dst is attached. This could also be reproduced with an AF_PACKET socket, with the following python script (vti1 is an ip_vti interface): import socket send_s = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, 0) # scapy # p = IP(src='10.100.0.2', dst='10.200.0.1')/ICMP(type='echo-request') # raw(p) req = b'E\x00\x00\x1c\x00\x01\x00\x00@\x01e\xb2\nd\x00\x02\n\xc8\x00\x01\x08\x00\xf7\xff\x00\x00\x00\x00' send_s.sendto(req, ('vti1', 0x800, 0, 0)) Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 13 +++++++++++-- net/ipv6/ip6_vti.c | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e90b600c7a25..37cddd18f282 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -187,8 +187,17 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); } dst_hold(dst); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 6f08b760c2a7..524006aa0d78 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -449,8 +449,17 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int err = -1; int mtu; - if (!dst) - goto tx_err_link_failure; + if (!dst) { + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + goto tx_err_link_failure; + } + skb_dst_set(skb, dst); + } dst_hold(dst); dst = xfrm_lookup(t->net, dst, fl, NULL, 0); -- cgit From f042365dbffea98fb8148c98c700402e8d099f02 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jan 2020 09:32:47 +0100 Subject: xfrm interface: fix packet tx through bpf_redirect() With an ebpf program that redirects packets through a xfrm interface, packets are dropped because no dst is attached to skb. This could also be reproduced with an AF_PACKET socket, with the following python script (xfrm1 is a xfrm interface): import socket send_s = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, 0) # scapy # p = IP(src='10.100.0.2', dst='10.200.0.1')/ICMP(type='echo-request') # raw(p) req = b'E\x00\x00\x1c\x00\x01\x00\x00@\x01e\xb2\nd\x00\x02\n\xc8\x00\x01\x08\x00\xf7\xff\x00\x00\x00\x00' send_s.sendto(req, ('xfrm1', 0x800, 0, 0)) It was also not possible to send an ip packet through an AF_PACKET socket because a LL header was expected. Let's remove those LL header constraints. Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 7ac1542feaf8..00393179f185 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -268,9 +268,6 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int err = -1; int mtu; - if (!dst) - goto tx_err_link_failure; - dst_hold(dst); dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id); if (IS_ERR(dst)) { @@ -343,6 +340,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct net_device_stats *stats = &xi->dev->stats; + struct dst_entry *dst = skb_dst(skb); struct flowi fl; int ret; @@ -352,10 +350,33 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) case htons(ETH_P_IPV6): xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + if (!dst) { + fl.u.ip6.flowi6_oif = dev->ifindex; + fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); + if (dst->error) { + dst_release(dst); + stats->tx_carrier_errors++; + goto tx_err; + } + skb_dst_set(skb, dst); + } break; case htons(ETH_P_IP): xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + if (!dst) { + struct rtable *rt; + + fl.u.ip4.flowi4_oif = dev->ifindex; + fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); + if (IS_ERR(rt)) { + stats->tx_carrier_errors++; + goto tx_err; + } + skb_dst_set(skb, &rt->dst); + } break; default: goto tx_err; @@ -563,12 +584,9 @@ static void xfrmi_dev_setup(struct net_device *dev) { dev->netdev_ops = &xfrmi_netdev_ops; dev->type = ARPHRD_NONE; - dev->hard_header_len = ETH_HLEN; - dev->min_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = ETH_DATA_LEN; - dev->addr_len = ETH_ALEN; + dev->max_mtu = IP_MAX_MTU; dev->flags = IFF_NOARP; dev->needs_free_netdev = true; dev->priv_destructor = xfrmi_dev_free; -- cgit From 8aaea2b0428b6aad7c7e22d3fddc31a78bb1d724 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 13 Jan 2020 09:00:36 +0000 Subject: xfrm: interface: do not confirm neighbor when do pmtu update When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. Signed-off-by: Xu Wang Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 00393179f185..dc651a628dcf 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -294,7 +294,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) -- cgit From 4e4362d2bf2a49ff44dbbc9585207977ca3d71d0 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Wed, 15 Jan 2020 12:11:29 +0100 Subject: xfrm: support output_mark for offload ESP packets Commit 9b42c1f179a6 ("xfrm: Extend the output_mark") added output_mark support but missed ESP offload support. xfrm_smark_get() is not called within xfrm_input() for packets coming from esp4_gro_receive() or esp6_gro_receive(). Therefore call xfrm_smark_get() directly within these functions. Fixes: 9b42c1f179a6 ("xfrm: Extend the output_mark to support input direction and masking.") Signed-off-by: Ulrich Weber Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 2 ++ net/ipv6/esp6_offload.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 0e4a7cf6bc87..e2e219c7854a 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -57,6 +57,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, if (!x) goto out_reset; + skb->mark = xfrm_smark_get(skb->mark, x); + sp->xvec[sp->len++] = x; sp->olen++; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index e31626ffccd1..fd535053245b 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -79,6 +79,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, if (!x) goto out_reset; + skb->mark = xfrm_smark_get(skb->mark, x); + sp->xvec[sp->len++] = x; sp->olen++; -- cgit From 7eaecf7963c1c8f62d62c6a8e7c439b0e7f2d365 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 18 Jan 2020 11:27:25 +0100 Subject: netfilter: nft_osf: add missing check for DREG attribute syzbot reports just another NULL deref crash because of missing test for presence of the attribute. Reported-by: syzbot+cf23983d697c26c34f60@syzkaller.appspotmail.com Fixes: b96af92d6eaf9fadd ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_osf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index f54d6ae15bb1..b42247aa48a9 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -61,6 +61,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, int err; u8 ttl; + if (!tb[NFTA_OSF_DREG]) + return -EINVAL; + if (tb[NFTA_OSF_TTL]) { ttl = nla_get_u8(tb[NFTA_OSF_TTL]); if (ttl > 2) -- cgit From 690afc165bb314354667f67157c1a1aea7dc797a Mon Sep 17 00:00:00 2001 From: Niko Kortstrom Date: Thu, 16 Jan 2020 11:43:27 +0200 Subject: net: ip6_gre: fix moving ip6gre between namespaces Support for moving IPv4 GRE tunnels between namespaces was added in commit b57708add314 ("gre: add x-netns support"). The respective change for IPv6 tunnels, commit 22f08069e8b4 ("ip6gre: add x-netns support") did not drop NETIF_F_NETNS_LOCAL flag so moving them from one netns to another is still denied in IPv6 case. Drop NETIF_F_NETNS_LOCAL flag from ip6gre tunnels to allow moving ip6gre tunnel endpoints between network namespaces. Signed-off-by: Niko Kortstrom Acked-by: Nicolas Dichtel Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ee968d980746..55bfc5149d0c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1466,7 +1466,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->mtu -= 8; if (tunnel->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; netif_keep_dst(dev); } ip6gre_tnl_init_features(dev); @@ -1894,7 +1893,6 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); @@ -2197,7 +2195,6 @@ static void ip6erspan_tap_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); -- cgit From 80892772c4edac88c538165d26a0105f19b61c1c Mon Sep 17 00:00:00 2001 From: "xiaofeng.yan" Date: Mon, 20 Jan 2020 14:26:39 +0800 Subject: hsr: Fix a compilation error A compliation error happen when building branch 5.5-rc7 In file included from net/hsr/hsr_main.c:12:0: net/hsr/hsr_main.h:194:20: error: two or more data types in declaration specifiers static inline void void hsr_debugfs_rename(struct net_device *dev) So Removed one void. Fixes: 4c2d5e33dcd3 ("hsr: rename debugfs file when interface name is changed") Signed-off-by: xiaofeng.yan Acked-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index d40de84a637f..754d84b217f0 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -191,7 +191,7 @@ void hsr_debugfs_term(struct hsr_priv *priv); void hsr_debugfs_create_root(void); void hsr_debugfs_remove_root(void); #else -static inline void void hsr_debugfs_rename(struct net_device *dev) +static inline void hsr_debugfs_rename(struct net_device *dev) { } static inline void hsr_debugfs_init(struct hsr_priv *priv, -- cgit From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001 From: Kadlecsik József Date: Sun, 19 Jan 2020 22:06:49 +0100 Subject: netfilter: ipset: use bitmap infrastructure completely The bitmap allocation did not use full unsigned long sizes when calculating the required size and that was triggered by KASAN as slab-out-of-bounds read in several places. The patch fixes all of them. Reported-by: syzbot+fabca5cbf5e54f3fe2de@syzkaller.appspotmail.com Reported-by: syzbot+827ced406c9a1d9570ed@syzkaller.appspotmail.com Reported-by: syzbot+190d63957b22ef673ea5@syzkaller.appspotmail.com Reported-by: syzbot+dfccdb2bdb4a12ad425e@syzkaller.appspotmail.com Reported-by: syzbot+df0d0f5895ef1f41a65b@syzkaller.appspotmail.com Reported-by: syzbot+b08bd19bb37513357fd4@syzkaller.appspotmail.com Reported-by: syzbot+53cdd0ec0bbabd53370a@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 7 ------- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 6 +++--- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 6 +++--- net/netfilter/ipset/ip_set_bitmap_port.c | 6 +++--- 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 4d8b1eaf7708..908d38dbcb91 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) sizeof(*addr)); } -/* Calculate the bytes required to store the inclusive range of a-b */ -static inline int -bitmap_bytes(u32 a, u32 b) -{ - return 4 * ((((b - a + 8) / 8) + 3) / 4); -} - /* How often should the gc be run by default */ #define IPSET_GC_TIME (3 * 60) diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 077a2cb65fcb..26ab0e9612d8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set) if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); - memset(map->members, 0, map->memsize); + bitmap_zero(map->members, map->elements); set->elements = 0; set->ext_size = 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index abe8f77d7d23..0a2196f59106 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip"); /* Type structure */ struct bitmap_ip { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, u32 first_ip, u32 last_ip, u32 elements, u32 hosts, u8 netmask) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (!map) return -ENOMEM; - map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index b618713297da..739e343efaf6 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -42,7 +42,7 @@ enum { /* Type structure */ struct bitmap_ipmac { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -299,7 +299,7 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (!map) return -ENOMEM; - map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 23d6095cb196..b49978dd810d 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port"); /* Type structure */ struct bitmap_port { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u16 first_port; /* host byte order, included in range */ u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -231,7 +231,7 @@ static bool init_map_port(struct ip_set *set, struct bitmap_port *map, u16 first_port, u16 last_port) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_port = first_port; @@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = elements; - map->memsize = bitmap_bytes(0, map->elements); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { kfree(map); -- cgit From 62ebaeaedee7591c257543d040677a60e35c7aec Mon Sep 17 00:00:00 2001 From: Yuki Taguchi Date: Mon, 20 Jan 2020 13:48:37 +0900 Subject: ipv6: sr: remove SKB_GSO_IPXIP6 on End.D* actions After LRO/GRO is applied, SRv6 encapsulated packets have SKB_GSO_IPXIP6 feature flag, and this flag must be removed right after decapulation procedure. Currently, SKB_GSO_IPXIP6 flag is not removed on End.D* actions, which creates inconsistent packet state, that is, a normal TCP/IP packets have the SKB_GSO_IPXIP6 flag. This behavior can cause unexpected fallback to GSO on routing to netdevices that do not support SKB_GSO_IPXIP6. For example, on inter-VRF forwarding, decapsulated packets separated into small packets by GSO because VRF devices do not support TSO for packets with SKB_GSO_IPXIP6 flag, and this degrades forwarding performance. This patch removes encapsulation related GSO flags from the skb right after the End.D* action is applied. Fixes: d7a669dd2f8b ("ipv6: sr: add helper functions for seg6local") Signed-off-by: Yuki Taguchi Signed-off-by: David S. Miller --- net/ipv6/seg6_local.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 85a5447a3e8d..7cbc19731997 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif @@ -135,7 +136,8 @@ static bool decap_and_validate(struct sk_buff *skb, int proto) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->encapsulation = 0; + if (iptunnel_pull_offloads(skb)) + return false; return true; } -- cgit From cb626bf566eb4433318d35681286c494f04fedcc Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Mon, 20 Jan 2020 09:51:03 +0200 Subject: net-sysfs: Fix reference count leak Netdev_register_kobject is calling device_initialize. In case of error reference taken by device_initialize is not given up. Drivers are supposed to call free_netdev in case of error. In non-error case the last reference is given up there and device release sequence is triggered. In error case this reference is kept and the release sequence is never started. Fix this by setting reg_state as NETREG_UNREGISTERED if registering fails. This is the rootcause for couple of memory leaks reported by Syzkaller: BUG: memory leak unreferenced object 0xffff8880675ca008 (size 256): comm "netdev_register", pid 281, jiffies 4294696663 (age 6.808s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000058ca4711>] kmem_cache_alloc_trace+0x167/0x280 [<000000002340019b>] device_add+0x882/0x1750 [<000000001d588c3a>] netdev_register_kobject+0x128/0x380 [<0000000011ef5535>] register_netdevice+0xa1b/0xf00 [<000000007fcf1c99>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000006a5b7b2b>] tun_chr_ioctl+0x2f/0x40 [<00000000f30f834a>] do_vfs_ioctl+0x1c7/0x1510 [<00000000fba062ea>] ksys_ioctl+0x99/0xb0 [<00000000b1c1b8d2>] __x64_sys_ioctl+0x78/0xb0 [<00000000984cabb9>] do_syscall_64+0x16f/0x580 [<000000000bde033d>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000e6ca2d9f>] 0xffffffffffffffff BUG: memory leak unreferenced object 0xffff8880668ba588 (size 8): comm "kobject_set_nam", pid 286, jiffies 4294725297 (age 9.871s) hex dump (first 8 bytes): 6e 72 30 00 cc be df 2b nr0....+ backtrace: [<00000000a322332a>] __kmalloc_track_caller+0x16e/0x290 [<00000000236fd26b>] kstrdup+0x3e/0x70 [<00000000dd4a2815>] kstrdup_const+0x3e/0x50 [<0000000049a377fc>] kvasprintf_const+0x10e/0x160 [<00000000627fc711>] kobject_set_name_vargs+0x5b/0x140 [<0000000019eeab06>] dev_set_name+0xc0/0xf0 [<0000000069cb12bc>] netdev_register_kobject+0xc8/0x320 [<00000000f2e83732>] register_netdevice+0xa1b/0xf00 [<000000009e1f57cc>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000009c560784>] tun_chr_ioctl+0x2f/0x40 [<000000000d759e02>] do_vfs_ioctl+0x1c7/0x1510 [<00000000351d7c31>] ksys_ioctl+0x99/0xb0 [<000000008390040a>] __x64_sys_ioctl+0x78/0xb0 [<0000000052d196b7>] do_syscall_64+0x16f/0x580 [<0000000019af9236>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000bc384531>] 0xffffffffffffffff v3 -> v4: Set reg_state to NETREG_UNREGISTERED if registering fails v2 -> v3: * Replaced BUG_ON with WARN_ON in free_netdev and netdev_release v1 -> v2: * Relying on driver calling free_netdev rather than calling put_device directly in error path Reported-by: syzbot+ad8ca40ecd77896d51e2@syzkaller.appspotmail.com Cc: David Miller Cc: Greg Kroah-Hartman Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7e885d069707..e82e9b82dfd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9302,8 +9302,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit; ret = netdev_register_kobject(dev); - if (ret) + if (ret) { + dev->reg_state = NETREG_UNREGISTERED; goto err_uninit; + } dev->reg_state = NETREG_REGISTERED; __netdev_update_features(dev); -- cgit From 5b2f1f3070b6447b76174ea8bfb7390dc6253ebd Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 20 Jan 2020 18:04:56 +0800 Subject: tcp_bbr: improve arithmetic division in bbr_update_bw() do_div() does a 64-by-32 division. Use div64_long() instead of it if the divisor is long, to avoid truncation to 32-bit. And as a nice side effect also cleans up the function a bit. Signed-off-by: Wen Yang Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index a6545ef0d27b..6c4d79baff26 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -779,8 +779,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. */ - bw = (u64)rs->delivered * BW_UNIT; - do_div(bw, rs->interval_us); + bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); /* If this sample is application-limited, it is likely to have a very * low delivered count that represents application behavior rather than -- cgit From ce896476c65d72b4b99fa09c2f33436b4198f034 Mon Sep 17 00:00:00 2001 From: James Hughes Date: Mon, 20 Jan 2020 11:12:40 +0000 Subject: net: usb: lan78xx: Add .ndo_features_check As reported by Eric Dumazet, there are still some outstanding cases where the driver does not handle TSO correctly when skb's are over a certain size. Most cases have been fixed, this patch should ensure that forwarded SKB's that are greater than MAX_SINGLE_PACKET_SIZE - TX_OVERHEAD are software segmented and handled correctly. Signed-off-by: James Hughes Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 75bdfae5f3e2..c2a58f05b9a1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -3668,6 +3669,19 @@ static void lan78xx_tx_timeout(struct net_device *net) tasklet_schedule(&dev->bh); } +static netdev_features_t lan78xx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + if (skb->len + TX_OVERHEAD > MAX_SINGLE_PACKET_SIZE) + features &= ~NETIF_F_GSO_MASK; + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + return features; +} + static const struct net_device_ops lan78xx_netdev_ops = { .ndo_open = lan78xx_open, .ndo_stop = lan78xx_stop, @@ -3681,6 +3695,7 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, + .ndo_features_check = lan78xx_features_check, }; static void lan78xx_stat_monitor(struct timer_list *t) -- cgit From bfe02b9f9476bc8784ebb0f78fa244ad15bb15ea Mon Sep 17 00:00:00 2001 From: Theodore Dubois Date: Mon, 20 Jan 2020 14:10:53 -0800 Subject: tcp: remove redundant assigment to snd_cwnd Not sure how this got in here. git blame says the second assignment was added in 3a9a57f6, but that commit also removed the first assignment. Signed-off-by: Theodore Dubois Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d885ba868822..04273d6aa36b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2614,7 +2614,6 @@ int tcp_disconnect(struct sock *sk, int flags) WRITE_ONCE(tp->write_seq, seq); icsk->icsk_backoff = 0; - tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -- cgit From d0f418516022c32ecceaf4275423e5bd3f8743a9 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Tue, 21 Jan 2020 15:26:24 +0100 Subject: net, ip_tunnel: fix namespaces move in the same manner as commit 690afc165bb3 ("net: ip6_gre: fix moving ip6gre between namespaces"), fix namespace moving as it was broken since commit 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata."). Indeed, the ip6_gre commit removed the local flag for collect_md condition, so there is no reason to keep it for ip_gre/ip_tunnel. this patch will fix both ip_tunnel and ip_gre modules. Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: William Dauchy Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0fe2a5d3e258..74e1d964a615 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1236,10 +1236,8 @@ int ip_tunnel_init(struct net_device *dev) iph->version = 4; iph->ihl = 5; - if (tunnel->collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (tunnel->collect_md) netif_keep_dst(dev); - } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); -- cgit From d829229e35f302fd49c052b5c5906c90ecf9911d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 3 Dec 2019 10:08:49 +0200 Subject: iwlwifi: mvm: don't send the IWL_MVM_RXQ_NSSN_SYNC notif to Rx queues The purpose of this was to keep all the queues updated with the Rx sequence numbers because unlikely yet possible situations where queues can't understand if a specific packet needs to be dropped or not. Unfortunately, it was reported that this caused issues in our DMA engine. We don't fully understand how this is related, but this is being currently debugged. For now, just don't send this notification to the Rx queues. This de-facto reverts my commit 3c514bf831ac12356b695ff054bef641b9e99593: iwlwifi: mvm: add a loose synchronization of the NSSN across Rx queues This issue was reported here: https://bugzilla.kernel.org/show_bug.cgi?id=204873 https://bugzilla.kernel.org/show_bug.cgi?id=205001 and others maybe. Fixes: 3c514bf831ac ("iwlwifi: mvm: add a loose synchronization of the NSSN across Rx queues") CC: # 5.3+ Signed-off-by: Emmanuel Grumbach Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/constants.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h index 60aff2ecec12..58df25e2fb32 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h @@ -154,5 +154,6 @@ #define IWL_MVM_D3_DEBUG false #define IWL_MVM_USE_TWT false #define IWL_MVM_AMPDU_CONSEC_DROPS_DELBA 10 +#define IWL_MVM_USE_NSSN_SYNC 0 #endif /* __MVM_CONSTANTS_H */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index ef99c49247b7..c15f7dbc9516 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -514,14 +514,17 @@ static bool iwl_mvm_is_sn_less(u16 sn1, u16 sn2, u16 buffer_size) static void iwl_mvm_sync_nssn(struct iwl_mvm *mvm, u8 baid, u16 nssn) { - struct iwl_mvm_rss_sync_notif notif = { - .metadata.type = IWL_MVM_RXQ_NSSN_SYNC, - .metadata.sync = 0, - .nssn_sync.baid = baid, - .nssn_sync.nssn = nssn, - }; - - iwl_mvm_sync_rx_queues_internal(mvm, (void *)¬if, sizeof(notif)); + if (IWL_MVM_USE_NSSN_SYNC) { + struct iwl_mvm_rss_sync_notif notif = { + .metadata.type = IWL_MVM_RXQ_NSSN_SYNC, + .metadata.sync = 0, + .nssn_sync.baid = baid, + .nssn_sync.nssn = nssn, + }; + + iwl_mvm_sync_rx_queues_internal(mvm, (void *)¬if, + sizeof(notif)); + } } #define RX_REORDER_BUF_TIMEOUT_MQ (HZ / 10) -- cgit From 58c8db929db1c1d785a6f5d8f8692e5dbcc35e84 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 21 Jan 2020 13:31:47 +0100 Subject: net, sk_msg: Don't check if sock is locked when tearing down psock As John Fastabend reports [0], psock state tear-down can happen on receive path *after* unlocking the socket, if the only other psock user, that is sockmap or sockhash, releases its psock reference before tcp_bpf_recvmsg does so: tcp_bpf_recvmsg() psock = sk_psock_get(sk) <- refcnt 2 lock_sock(sk); ... sock_map_free() <- refcnt 1 release_sock(sk) sk_psock_put() <- refcnt 0 Remove the lockdep check for socket lock in psock tear-down that got introduced in 7e81a3530206 ("bpf: Sockmap, ensure sock lock held during tear down"). [0] https://lore.kernel.org/netdev/5e25dc995d7d_74082aaee6e465b441@john-XPS-13-9370.notmuch/ Fixes: 7e81a3530206 ("bpf: Sockmap, ensure sock lock held during tear down") Reported-by: syzbot+d73682fcf7fee6982fe3@syzkaller.appspotmail.com Suggested-by: John Fastabend Signed-off-by: Jakub Sitnicki Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/skmsg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3866d7e20c07..ded2d5227678 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -594,8 +594,6 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sock_owned_by_me(sk); - sk_psock_cork_free(psock); sk_psock_zap_ingress(psock); -- cgit From 0ace17d56824165c7f4c68785d6b58971db954dd Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Tue, 21 Jan 2020 14:42:58 +0100 Subject: can, slip: Protect tty->disc_data in write_wakeup and close with RCU write_wakeup can happen in parallel with close/hangup where tty->disc_data is set to NULL and the netdevice is freed thus also freeing disc_data. write_wakeup accesses disc_data so we must prevent close from freeing the netdev while write_wakeup has a non-NULL view of tty->disc_data. We also need to make sure that accesses to disc_data are atomic. Which can all be done with RCU. This problem was found by Syzkaller on SLCAN, but the same issue is reproducible with the SLIP line discipline using an LTP test based on the Syzkaller reproducer. A fix which didn't use RCU was posted by Hillf Danton. Fixes: 661f7fda21b1 ("slip: Fix deadlock in write_wakeup") Fixes: a8e83b17536a ("slcan: Port write_wakeup deadlock fix from slip") Reported-by: syzbot+017e491ae13c0068598a@syzkaller.appspotmail.com Signed-off-by: Richard Palethorpe Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: Tyler Hall Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: syzkaller@googlegroups.com Signed-off-by: David S. Miller --- drivers/net/can/slcan.c | 12 ++++++++++-- drivers/net/slip/slip.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 2e57122f02fb..2f5c287eac95 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -344,9 +344,16 @@ static void slcan_transmit(struct work_struct *work) */ static void slcan_write_wakeup(struct tty_struct *tty) { - struct slcan *sl = tty->disc_data; + struct slcan *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out; schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); } /* Send a can_frame to a TTY queue. */ @@ -644,10 +651,11 @@ static void slcan_close(struct tty_struct *tty) return; spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); + synchronize_rcu(); flush_work(&sl->tx_work); /* Flush network side */ diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 2a91c192659f..61d7e0d1d77d 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -452,9 +452,16 @@ static void slip_transmit(struct work_struct *work) */ static void slip_write_wakeup(struct tty_struct *tty) { - struct slip *sl = tty->disc_data; + struct slip *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out; schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); } static void sl_tx_timeout(struct net_device *dev) @@ -882,10 +889,11 @@ static void slip_close(struct tty_struct *tty) return; spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); + synchronize_rcu(); flush_work(&sl->tx_work); /* VSV = very important to remove timers */ -- cgit From c80794323e82ac6ab45052ebba5757ce47b4b588 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 21 Jan 2020 15:09:40 +0000 Subject: net: Fix packet reordering caused by GRO and listified RX cooperation Commit 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs") introduces batching of GRO_NORMAL packets in napi_frags_finish, and commit 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") adds the same to napi_skb_finish. However, dev_gro_receive (that is called just before napi_{frags,skb}_finish) can also pass skbs to the networking stack: e.g., when the GRO session is flushed, napi_gro_complete is called, which passes pp directly to netif_receive_skb_internal, skipping napi->rx_list. It means that the packet stored in pp will be handled by the stack earlier than the packets that arrived before, but are still waiting in napi->rx_list. It leads to TCP reorderings that can be observed in the TCPOFOQueue counter in netstat. This commit fixes the reordering issue by making napi_gro_complete also use napi->rx_list, so that all packets going through GRO will keep their order. In order to keep napi_gro_flush working properly, gro_normal_list calls are moved after the flush to clear napi->rx_list. iwlwifi calls napi_gro_flush directly and does the same thing that is done by gro_normal_list, so the same change is applied there: napi_gro_flush is moved to be before the flush of napi->rx_list. A few other drivers also use napi_gro_flush (brocade/bna/bnad.c, cortina/gemini.c, hisilicon/hns3/hns3_enet.c). The first two also use napi_complete_done afterwards, which performs the gro_normal_list flush, so they are fine. The latter calls napi_gro_receive right after napi_gro_flush, so it can end up with non-empty napi->rx_list anyway. Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs") Signed-off-by: Maxim Mikityanskiy Cc: Alexander Lobakin Cc: Edward Cree Acked-by: Alexander Lobakin Acked-by: Saeed Mahameed Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 4 +- net/core/dev.c | 64 ++++++++++++++-------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 452da44a21e0..f0b8ff67a1bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1529,13 +1529,13 @@ out: napi = &rxq->napi; if (napi->poll) { + napi_gro_flush(napi, false); + if (napi->rx_count) { netif_receive_skb_list(&napi->rx_list); INIT_LIST_HEAD(&napi->rx_list); napi->rx_count = 0; } - - napi_gro_flush(napi, false); } iwl_pcie_rxq_restock(trans, rxq); diff --git a/net/core/dev.c b/net/core/dev.c index e82e9b82dfd9..cca03914108a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5491,9 +5491,29 @@ static void flush_all_backlogs(void) put_online_cpus(); } +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); -static int napi_gro_complete(struct sk_buff *skb) +static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { struct packet_offload *ptype; __be16 type = skb->protocol; @@ -5526,7 +5546,8 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb_internal(skb); + gro_normal_one(napi, skb); + return NET_RX_SUCCESS; } static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, @@ -5539,7 +5560,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; skb_list_del_init(skb); - napi_gro_complete(skb); + napi_gro_complete(napi, skb); napi->gro_hash[index].count--; } @@ -5641,7 +5662,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) } } -static void gro_flush_oldest(struct list_head *head) +static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) { struct sk_buff *oldest; @@ -5657,7 +5678,7 @@ static void gro_flush_oldest(struct list_head *head) * SKB to the chain. */ skb_list_del_init(oldest); - napi_gro_complete(oldest); + napi_gro_complete(napi, oldest); } INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, @@ -5733,7 +5754,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (pp) { skb_list_del_init(pp); - napi_gro_complete(pp); + napi_gro_complete(napi, pp); napi->gro_hash[hash].count--; } @@ -5744,7 +5765,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { - gro_flush_oldest(gro_head); + gro_flush_oldest(napi, gro_head); } else { napi->gro_hash[hash].count++; } @@ -5802,26 +5823,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); -/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static void gro_normal_list(struct napi_struct *napi) -{ - if (!napi->rx_count) - return; - netif_receive_skb_list_internal(&napi->rx_list); - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; -} - -/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, - * pass the whole batch up to the stack. - */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) -{ - list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) - gro_normal_list(napi); -} - static void napi_skb_free_stolen_head(struct sk_buff *skb) { skb_dst_drop(skb); @@ -6200,8 +6201,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; - gro_normal_list(n); - if (n->gro_bitmask) { unsigned long timeout = 0; @@ -6217,6 +6216,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); } + + gro_normal_list(n); + if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ local_irq_save(flags); @@ -6548,8 +6550,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } - gro_normal_list(n); - if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. @@ -6557,6 +6557,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) napi_gro_flush(n, HZ >= 1000); } + gro_normal_list(n); + /* Some drivers may have called napi_schedule * prior to exhausting their budget. */ -- cgit From bda6a35505e28b70d48096c933d5949c9b6caf9a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 22 Jan 2020 20:38:39 +0100 Subject: net: Add Jakub to MAINTAINERS for networking general. Signed-off-by: David S. Miller Acked-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cf6ccca6e61c..7b96ebff3511 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11499,6 +11499,7 @@ F: drivers/net/dsa/ NETWORKING [GENERAL] M: "David S. Miller" +M: Jakub Kicinski L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net Q: http://patchwork.ozlabs.org/project/netdev/list/ -- cgit From d39ca2590d10712f412add7a88e1dd467a7246f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 21 Jan 2020 16:50:49 +0100 Subject: Revert "udp: do rmem bulk free even if the rx sk queue is empty" This reverts commit 0d4a6608f68c7532dcbfec2ea1150c9761767d03. Williem reported that after commit 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") the memory allocated by an almost idle system with many UDP sockets can grow a lot. For stable kernel keep the solution as simple as possible and revert the offending commit. Reported-by: Willem de Bruijn Diagnosed-by: Eric Dumazet Fixes: 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") Signed-off-by: Paolo Abeni Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 93a355b6b092..030d43c7c957 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1368,7 +1368,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2)) + if (size < (sk->sk_rcvbuf >> 2) && + !skb_queue_empty(&up->reader_queue)) return; } else { size += up->forward_deficit; -- cgit From 36d79af7fb59d6d9106feb9c1855eb93d6d53fe6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jan 2020 11:02:20 -0800 Subject: net_sched: use validated TCA_KIND attribute in tc_new_tfilter() sysbot found another issue in tc_new_tfilter(). We probably should use @name which contains the sanitized version of TCA_KIND. BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:608 [inline] BUG: KMSAN: uninit-value in string+0x522/0x690 lib/vsprintf.c:689 CPU: 1 PID: 10753 Comm: syz-executor.1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 string_nocheck lib/vsprintf.c:608 [inline] string+0x522/0x690 lib/vsprintf.c:689 vsnprintf+0x207d/0x31b0 lib/vsprintf.c:2574 __request_module+0x2ad/0x11c0 kernel/kmod.c:143 tcf_proto_lookup_ops+0x241/0x720 net/sched/cls_api.c:139 tcf_proto_create net/sched/cls_api.c:262 [inline] tc_new_tfilter+0x2a4e/0x5010 net/sched/cls_api.c:2058 rtnetlink_rcv_msg+0xcb7/0x1570 net/core/rtnetlink.c:5415 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45b349 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f88b3948c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f88b39496d4 RCX: 000000000045b349 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 000000000075bfc8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000099f R14: 00000000004cb163 R15: 000000000075bfd4 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2774 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4382 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline] netlink_sendmsg+0x7d3/0x14d0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 6f96c3c6904c ("net_sched: fix backward compatibility for TCA_KIND") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Cong Wang Cc: Marcelo Ricardo Leitner Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 76e0d122616a..c2cdd0fc2e70 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2055,9 +2055,8 @@ replay: &chain_info)); mutex_unlock(&chain->filter_chain_lock); - tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, rtnl_held, - extack); + tp_new = tcf_proto_create(name, protocol, prio, chain, + rtnl_held, extack); if (IS_ERR(tp_new)) { err = PTR_ERR(tp_new); goto errout_tp; -- cgit From 5311a69aaca30fa849c3cc46fb25f75727fb72d0 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Tue, 21 Jan 2020 21:49:54 +0100 Subject: net, ip6_tunnel: fix namespaces move in the same manner as commit d0f418516022 ("net, ip_tunnel: fix namespaces move"), fix namespace moving as it was broken since commit 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel"), but for ipv6 this time; there is no reason to keep it for ip6_tunnel. Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel") Signed-off-by: William Dauchy Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2f376dbc37d5..b5dd20c4599b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1877,10 +1877,8 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); - if (t->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (t->parms.collect_md) netif_keep_dst(dev); - } return 0; } -- cgit From 3adb4eaa1b4cbadc7858679334d19e840e7df6db Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 22 Jan 2020 20:23:15 +0100 Subject: MAINTAINERS: Make Russell King designated reviewer of phylib phylink and phylib are interconnected. It makes sense for phylib and phy driver patches to be also reviewed by the phylink maintainer. So add Russell King as a designed reviewer of phylib. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7b96ebff3511..f2b57c4166e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6197,6 +6197,7 @@ ETHERNET PHY LIBRARY M: Andrew Lunn M: Florian Fainelli M: Heiner Kallweit +R: Russell King L: netdev@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-class-net-phydev -- cgit From d6bce2137f5d6bb1093e96d2f801479099b28094 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Jan 2020 15:07:27 +1100 Subject: airo: Fix possible info leak in AIROOLDIOCTL/SIOCDEVPRIVATE The driver for Cisco Aironet 4500 and 4800 series cards (airo.c), implements AIROOLDIOCTL/SIOCDEVPRIVATE in airo_ioctl(). The ioctl handler copies an aironet_ioctl struct from userspace, which includes a command and a length. Some of the commands are handled in readrids(), which kmalloc()'s a buffer of RIDSIZE (2048) bytes. That buffer is then passed to PC4500_readrid(), which has two cases. The else case does some setup and then reads up to RIDSIZE bytes from the hardware into the kmalloc()'ed buffer. Here len == RIDSIZE, pBuf is the kmalloc()'ed buffer: // read the rid length field bap_read(ai, pBuf, 2, BAP1); // length for remaining part of rid len = min(len, (int)le16_to_cpu(*(__le16*)pBuf)) - 2; ... // read remainder of the rid rc = bap_read(ai, ((__le16*)pBuf)+1, len, BAP1); PC4500_readrid() then returns to readrids() which does: len = comp->len; if (copy_to_user(comp->data, iobuf, min(len, (int)RIDSIZE))) { Where comp->len is the user controlled length field. So if the "rid length field" returned by the hardware is < 2048, and the user requests 2048 bytes in comp->len, we will leak the previous contents of the kmalloc()'ed buffer to userspace. Fix it by kzalloc()'ing the buffer. Found by Ilja by code inspection, not tested as I don't have the required hardware. Reported-by: Ilja Van Sprundel Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller --- drivers/net/wireless/cisco/airo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index f43c06569ea1..d69c2ee7e206 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -7813,7 +7813,7 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { return -EINVAL; } - if ((iobuf = kmalloc(RIDSIZE, GFP_KERNEL)) == NULL) + if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) return -ENOMEM; PC4500_readrid(ai,ridcode,iobuf,RIDSIZE, 1); -- cgit From 78f7a7566f5eb59321e99b55a6fdb16ea05b37d1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Jan 2020 15:07:28 +1100 Subject: airo: Add missing CAP_NET_ADMIN check in AIROOLDIOCTL/SIOCDEVPRIVATE The driver for Cisco Aironet 4500 and 4800 series cards (airo.c), implements AIROOLDIOCTL/SIOCDEVPRIVATE in airo_ioctl(). The ioctl handler copies an aironet_ioctl struct from userspace, which includes a command. Some of the commands are handled in readrids(), where the user controlled command is converted into a driver-internal value called "ridcode". There are two command values, AIROGWEPKTMP and AIROGWEPKNV, which correspond to ridcode values of RID_WEP_TEMP and RID_WEP_PERM respectively. These commands both have checks that the user has CAP_NET_ADMIN, with the comment that "Only super-user can read WEP keys", otherwise they return -EPERM. However there is another command value, AIRORRID, that lets the user specify the ridcode value directly, with no other checks. This means the user can bypass the CAP_NET_ADMIN check on AIROGWEPKTMP and AIROGWEPKNV. Fix it by moving the CAP_NET_ADMIN check out of the command handling and instead do it later based on the ridcode. That way regardless of whether the ridcode is set via AIROGWEPKTMP or AIROGWEPKNV, or passed in using AIRORID, we always do the CAP_NET_ADMIN check. Found by Ilja by code inspection, not tested as I don't have the required hardware. Reported-by: Ilja Van Sprundel Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller --- drivers/net/wireless/cisco/airo.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index d69c2ee7e206..c4c8f1b62e1e 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -7790,16 +7790,8 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { case AIROGVLIST: ridcode = RID_APLIST; break; case AIROGDRVNAM: ridcode = RID_DRVNAME; break; case AIROGEHTENC: ridcode = RID_ETHERENCAP; break; - case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; - /* Only super-user can read WEP keys */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - case AIROGWEPKNV: ridcode = RID_WEP_PERM; - /* Only super-user can read WEP keys */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; + case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; break; + case AIROGWEPKNV: ridcode = RID_WEP_PERM; break; case AIROGSTAT: ridcode = RID_STATUS; break; case AIROGSTATSD32: ridcode = RID_STATSDELTA; break; case AIROGSTATSC32: ridcode = RID_STATS; break; @@ -7813,6 +7805,12 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { return -EINVAL; } + if (ridcode == RID_WEP_TEMP || ridcode == RID_WEP_PERM) { + /* Only super-user can read WEP keys */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + } + if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) return -ENOMEM; -- cgit From d836f5c69d87473ff65c06a6123e5b2cf5e56f5b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jan 2020 22:47:29 -0800 Subject: net: rtnetlink: validate IFLA_MTU attribute in rtnl_create_link() rtnl_create_link() needs to apply dev->min_mtu and dev->max_mtu checks that we apply in do_setlink() Otherwise malicious users can crash the kernel, for example after an integer overflow : BUG: KASAN: use-after-free in memset include/linux/string.h:365 [inline] BUG: KASAN: use-after-free in __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 Write of size 32 at addr ffff88819f20b9c0 by task swapper/0/0 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.5.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x134/0x1a0 mm/kasan/generic.c:192 memset+0x24/0x40 mm/kasan/common.c:108 memset include/linux/string.h:365 [inline] __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 alloc_skb include/linux/skbuff.h:1049 [inline] alloc_skb_with_frags+0x93/0x590 net/core/skbuff.c:5664 sock_alloc_send_pskb+0x7ad/0x920 net/core/sock.c:2242 sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2259 mld_newpack+0x1d7/0x7f0 net/ipv6/mcast.c:1609 add_grhead.isra.0+0x299/0x370 net/ipv6/mcast.c:1713 add_grec+0x7db/0x10b0 net/ipv6/mcast.c:1844 mld_send_cr net/ipv6/mcast.c:1970 [inline] mld_ifc_timer_expire+0x3d3/0x950 net/ipv6/mcast.c:2477 call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0x6c3/0x1790 kernel/time/timer.c:1786 __do_softirq+0x262/0x98c kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x19b/0x1e0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829 RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61 Code: 98 6b ea f9 eb 8a cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 44 1c 60 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 34 1c 60 00 fb f4 cc 55 48 89 e5 41 57 41 56 41 55 41 54 53 e8 4e 5d 9a f9 e8 79 RSP: 0018:ffffffff89807ce8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: 1ffffffff13266ae RBX: ffffffff8987a1c0 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000006 RDI: ffffffff8987aa54 RBP: ffffffff89807d18 R08: ffffffff8987a1c0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffffffff8a799980 R14: 0000000000000000 R15: 0000000000000000 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:690 default_idle_call+0x84/0xb0 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x3c8/0x6e0 kernel/sched/idle.c:269 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:361 rest_init+0x23b/0x371 init/main.c:451 arch_call_rest_init+0xe/0x1b start_kernel+0x904/0x943 init/main.c:784 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242 The buggy address belongs to the page: page:ffffea00067c82c0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 raw: 057ffe0000000000 ffffea00067c82c8 ffffea00067c82c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88819f20b880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88819f20b900: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff88819f20b980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88819f20ba00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88819f20ba80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 29 +++++++++++++++++++---------- net/core/rtnetlink.c | 13 +++++++++++-- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae5e260911e2..cac56fb59af8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3698,6 +3698,8 @@ int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); +int dev_validate_mtu(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); int dev_set_mtu_ext(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); diff --git a/net/core/dev.c b/net/core/dev.c index cca03914108a..81befd0c2510 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8196,6 +8196,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL(__dev_set_mtu); +int dev_validate_mtu(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) +{ + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + NL_SET_ERR_MSG(extack, "mtu less than device minimum"); + return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); + return -EINVAL; + } + return 0; +} + /** * dev_set_mtu_ext - Change maximum transfer unit * @dev: device @@ -8212,16 +8228,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, if (new_mtu == dev->mtu) return 0; - /* MTU must be positive, and in range */ - if (new_mtu < 0 || new_mtu < dev->min_mtu) { - NL_SET_ERR_MSG(extack, "mtu less than device minimum"); - return -EINVAL; - } - - if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { - NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); - return -EINVAL; - } + err = dev_validate_mtu(dev, new_mtu, extack); + if (err) + return err; if (!netif_device_present(dev)) return -ENODEV; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02916f43bf63..d9001b5c48eb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3048,8 +3048,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + int err; + + err = dev_validate_mtu(dev, mtu, extack); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + dev->mtu = mtu; + } if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); -- cgit From 940ba14986657a50c15f694efca1beba31fa568f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jan 2020 23:17:14 -0800 Subject: gtp: make sure only SOCK_DGRAM UDP sockets are accepted A malicious user could use RAW sockets and fool GTP using them as standard SOCK_DGRAM UDP sockets. BUG: KMSAN: uninit-value in udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] BUG: KMSAN: uninit-value in setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 CPU: 0 PID: 11262 Comm: syz-executor613 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 gtp_encap_enable_socket+0x37f/0x5a0 drivers/net/gtp.c:827 gtp_encap_enable drivers/net/gtp.c:844 [inline] gtp_newlink+0xfb/0x1e50 drivers/net/gtp.c:666 __rtnl_newlink net/core/rtnetlink.c:3305 [inline] rtnl_newlink+0x2973/0x3920 net/core/rtnetlink.c:3363 rtnetlink_rcv_msg+0x1153/0x1570 net/core/rtnetlink.c:5424 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x441359 Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff1cd0ac28 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441359 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000246 R12: 00000000004020d0 R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags+0x3c/0x90 mm/kmsan/kmsan.c:144 kmsan_internal_alloc_meta_for_pages mm/kmsan/kmsan_shadow.c:307 [inline] kmsan_alloc_page+0x12a/0x310 mm/kmsan/kmsan_shadow.c:336 __alloc_pages_nodemask+0x57f2/0x5f60 mm/page_alloc.c:4800 alloc_pages_current+0x67d/0x990 mm/mempolicy.c:2207 alloc_pages include/linux/gfp.h:534 [inline] alloc_slab_page+0x111/0x12f0 mm/slub.c:1511 allocate_slab mm/slub.c:1656 [inline] new_slab+0x2bc/0x1130 mm/slub.c:1722 new_slab_objects mm/slub.c:2473 [inline] ___slab_alloc+0x1533/0x1f30 mm/slub.c:2624 __slab_alloc mm/slub.c:2664 [inline] slab_alloc_node mm/slub.c:2738 [inline] slab_alloc mm/slub.c:2783 [inline] kmem_cache_alloc+0xb23/0xd70 mm/slub.c:2788 sk_prot_alloc+0xf2/0x620 net/core/sock.c:1597 sk_alloc+0xf0/0xbe0 net/core/sock.c:1657 inet_create+0x7c7/0x1370 net/ipv4/af_inet.c:321 __sock_create+0x8eb/0xf00 net/socket.c:1420 sock_create net/socket.c:1471 [inline] __sys_socket+0x1a1/0x600 net/socket.c:1513 __do_sys_socket net/socket.c:1522 [inline] __se_sys_socket+0x8d/0xb0 net/socket.c:1520 __x64_sys_socket+0x4a/0x70 net/socket.c:1520 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Eric Dumazet Cc: Pablo Neira Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/gtp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index f6222ada6818..9b3ba98726d7 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -804,19 +804,21 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, return NULL; } - if (sock->sk->sk_protocol != IPPROTO_UDP) { + sk = sock->sk; + if (sk->sk_protocol != IPPROTO_UDP || + sk->sk_type != SOCK_DGRAM || + (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { pr_debug("socket fd=%d not UDP\n", fd); sk = ERR_PTR(-EINVAL); goto out_sock; } - lock_sock(sock->sk); - if (sock->sk->sk_user_data) { + lock_sock(sk); + if (sk->sk_user_data) { sk = ERR_PTR(-EBUSY); goto out_rel_sock; } - sk = sock->sk; sock_hold(sk); tuncfg.sk_user_data = gtp; -- cgit From a39142728d0e60a76b67db3cbc187d61fde7b46d Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:05 +0800 Subject: r8152: fix runtime resume for linking change Fix the runtime resume doesn't work normally for linking change. 1. Reset the settings and status of runtime suspend. 2. Sync the linking status. 3. Poll the linking change. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 031cb8fff909..115559707683 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -365,8 +365,10 @@ #define DEBUG_LTSSM 0x0082 /* PLA_EXTRA_STATUS */ +#define CUR_LINK_OK BIT(15) #define U3P3_CHECK_EN BIT(7) /* RTL_VER_05 only */ #define LINK_CHANGE_FLAG BIT(8) +#define POLL_LINK_CHG BIT(0) /* USB_USB2PHY */ #define USB2PHY_SUSPEND 0x0001 @@ -5387,6 +5389,16 @@ static void r8153_init(struct r8152 *tp) else ocp_data |= DYNAMIC_BURST; ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data); + + r8153_queue_wake(tp, false); + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); + if (rtl8152_get_speed(tp) & LINK_STATUS) + ocp_data |= CUR_LINK_OK; + else + ocp_data &= ~CUR_LINK_OK; + ocp_data |= POLL_LINK_CHG; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); } ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2); @@ -5416,6 +5428,7 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001); r8153_power_cut_en(tp, false); + rtl_runtime_suspend_enable(tp, false); r8153_u1u2en(tp, true); r8153_mac_clk_spd(tp, false); usb_enable_lpm(tp->udev); @@ -5484,6 +5497,14 @@ static void r8153b_init(struct r8152 *tp) r8153b_ups_en(tp, false); r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); + if (rtl8152_get_speed(tp) & LINK_STATUS) + ocp_data |= CUR_LINK_OK; + else + ocp_data &= ~CUR_LINK_OK; + ocp_data |= POLL_LINK_CHG; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); r8153b_u1u2en(tp, true); usb_enable_lpm(tp->udev); -- cgit From f99cd20eda12b1920ffc284a736437c016b3a5a2 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:06 +0800 Subject: r8152: reset flow control patch when linking on for RTL8153B When linking ON, the patch of flow control has to be reset. This makes sure the patch works normally. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 115559707683..504db2348a3e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2865,6 +2865,17 @@ static int rtl8153_enable(struct r8152 *tp) r8153_set_rx_early_timeout(tp); r8153_set_rx_early_size(tp); + if (tp->version == RTL_VER_09) { + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); + ocp_data &= ~FC_PATCH_TASK; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + usleep_range(1000, 2000); + ocp_data |= FC_PATCH_TASK; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + } + return rtl_enable(tp); } -- cgit From 9583a3638dc07cc1878f41265e85ed497f72efcb Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:07 +0800 Subject: r8152: get default setting of WOL before initializing Initailization would reset runtime suspend by tp->saved_wolopts, so the tp->saved_wolopts should be set before initializing. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 504db2348a3e..c3217a5c2fe1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6739,6 +6739,11 @@ static int rtl8152_probe(struct usb_interface *intf, intf->needs_remote_wakeup = 1; + if (!rtl_can_wakeup(tp)) + __rtl_set_wol(tp, 0); + else + tp->saved_wolopts = __rtl_get_wol(tp); + tp->rtl_ops.init(tp); #if IS_BUILTIN(CONFIG_USB_RTL8152) /* Retry in case request_firmware() is not ready yet. */ @@ -6756,10 +6761,6 @@ static int rtl8152_probe(struct usb_interface *intf, goto out1; } - if (!rtl_can_wakeup(tp)) - __rtl_set_wol(tp, 0); - - tp->saved_wolopts = __rtl_get_wol(tp); if (tp->saved_wolopts) device_set_wakeup_enable(&udev->dev, true); else -- cgit From 809a7fc6593f288d6f820ef6cc57b9d69b5f9474 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:08 +0800 Subject: r8152: disable U2P3 for RTL8153B Enable U2P3 may miss zero packet for bulk-in. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c3217a5c2fe1..bc6b2f8aaa7e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3389,7 +3389,6 @@ static void rtl8153b_runtime_enable(struct r8152 *tp, bool enable) r8153b_ups_en(tp, false); r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); - r8153_u2p3en(tp, true); r8153b_u1u2en(tp, true); } } @@ -4688,7 +4687,6 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) r8153_aldps_en(tp, true); r8152b_enable_fc(tp); - r8153_u2p3en(tp, true); set_bit(PHY_RESET, &tp->flags); } @@ -5018,7 +5016,6 @@ static void rtl8153b_up(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_B); r8153_aldps_en(tp, true); - r8153_u2p3en(tp, true); r8153b_u1u2en(tp, true); } -- cgit From 08997b5eec08a2c29367f19a74abdea54b299406 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:09 +0800 Subject: r8152: Disable PLA MCU clock speed down PLA MCU clock speed down could only be enabled when tx/rx are disabled. Otherwise, the packet loss may occur. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index bc6b2f8aaa7e..1fb85c79bd33 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -324,6 +324,7 @@ #define MAC_CLK_SPDWN_EN BIT(15) /* PLA_MAC_PWR_CTRL3 */ +#define PLA_MCU_SPDWN_EN BIT(14) #define PKT_AVAIL_SPDWN_EN 0x0100 #define SUSPEND_SPDWN_EN 0x0004 #define U1U2_SPDWN_EN 0x0002 @@ -5005,6 +5006,8 @@ static void rtl8153_down(struct r8152 *tp) static void rtl8153b_up(struct r8152 *tp) { + u32 ocp_data; + if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; @@ -5015,17 +5018,27 @@ static void rtl8153b_up(struct r8152 *tp) r8153_first_init(tp); ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_B); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); + ocp_data &= ~PLA_MCU_SPDWN_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + r8153_aldps_en(tp, true); r8153b_u1u2en(tp, true); } static void rtl8153b_down(struct r8152 *tp) { + u32 ocp_data; + if (test_bit(RTL8152_UNPLUG, &tp->flags)) { rtl_drop_queued_tx(tp); return; } + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); + ocp_data |= PLA_MCU_SPDWN_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + r8153b_u1u2en(tp, false); r8153_u2p3en(tp, false); r8153b_power_cut_en(tp, false); @@ -5521,6 +5534,10 @@ static void r8153b_init(struct r8152 *tp) ocp_data |= MAC_CLK_SPDWN_EN; ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); + ocp_data &= ~PLA_MCU_SPDWN_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + set_bit(GREEN_ETHERNET, &tp->flags); /* rx aggregation */ -- cgit From d7f1b59655efb5a285d227c8f9853a98eab5c2fd Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:10 +0800 Subject: r8152: disable test IO for RTL8153B For RTL8153B with QFN32, disable test IO. Otherwise, it may cause abnormal behavior for the device randomly. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1fb85c79bd33..7efeddad1fc8 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -312,6 +312,7 @@ /* PLA_PHY_PWR */ #define TX_10M_IDLE_EN 0x0080 #define PFM_PWM_SWITCH 0x0040 +#define TEST_IO_OFF BIT(4) /* PLA_MAC_PWR_CTRL */ #define D3_CLK_GATED_EN 0x00004000 @@ -5538,6 +5539,15 @@ static void r8153b_init(struct r8152 *tp) ocp_data &= ~PLA_MCU_SPDWN_EN; ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + if (tp->version == RTL_VER_09) { + /* Disable Test IO for 32QFN */ + if (ocp_read_byte(tp, MCU_TYPE_PLA, 0xdc00) & BIT(5)) { + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); + ocp_data |= TEST_IO_OFF; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); + } + } + set_bit(GREEN_ETHERNET, &tp->flags); /* rx aggregation */ -- cgit From a0246dafe684a6d5ad31ccd59af0334ccf0cc7b2 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:11 +0800 Subject: r8152: don't enable U1U2 with USB_SPEED_HIGH for RTL8153B For certain platforms, it causes USB reset periodically. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7efeddad1fc8..b1a00f29455b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3391,7 +3391,8 @@ static void rtl8153b_runtime_enable(struct r8152 *tp, bool enable) r8153b_ups_en(tp, false); r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); - r8153b_u1u2en(tp, true); + if (tp->udev->speed != USB_SPEED_HIGH) + r8153b_u1u2en(tp, true); } } @@ -5024,7 +5025,9 @@ static void rtl8153b_up(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); r8153_aldps_en(tp, true); - r8153b_u1u2en(tp, true); + + if (tp->udev->speed != USB_SPEED_HIGH) + r8153b_u1u2en(tp, true); } static void rtl8153b_down(struct r8152 *tp) @@ -5527,7 +5530,9 @@ static void r8153b_init(struct r8152 *tp) ocp_data &= ~CUR_LINK_OK; ocp_data |= POLL_LINK_CHG; ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); - r8153b_u1u2en(tp, true); + + if (tp->udev->speed != USB_SPEED_HIGH) + r8153b_u1u2en(tp, true); usb_enable_lpm(tp->udev); /* MAC clock speed down */ -- cgit From 19813162895a696c5814d76e5f8fb6203d70f6e0 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:12 +0800 Subject: r8152: avoid the MCU to clear the lanwake Avoid the MCU to clear the lanwake after suspending. It may cause the WOL fail. Disable LANWAKE_CLR_EN before suspending. Besides,enable it and reset the lanwake status when resuming or initializing. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b1a00f29455b..c037fc7adcea 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -68,6 +68,7 @@ #define PLA_LED_FEATURE 0xdd92 #define PLA_PHYAR 0xde00 #define PLA_BOOT_CTRL 0xe004 +#define PLA_LWAKE_CTRL_REG 0xe007 #define PLA_GPHY_INTR_IMR 0xe022 #define PLA_EEE_CR 0xe040 #define PLA_EEEP_CR 0xe080 @@ -95,6 +96,7 @@ #define PLA_TALLYCNT 0xe890 #define PLA_SFF_STS_7 0xe8de #define PLA_PHYSTATUS 0xe908 +#define PLA_CONFIG6 0xe90a /* CONFIG6 */ #define PLA_BP_BA 0xfc26 #define PLA_BP_0 0xfc28 #define PLA_BP_1 0xfc2a @@ -300,6 +302,9 @@ #define LINK_ON_WAKE_EN 0x0010 #define LINK_OFF_WAKE_EN 0x0008 +/* PLA_CONFIG6 */ +#define LANWAKE_CLR_EN BIT(0) + /* PLA_CONFIG5 */ #define BWF_EN 0x0040 #define MWF_EN 0x0020 @@ -356,6 +361,9 @@ /* PLA_BOOT_CTRL */ #define AUTOLOAD_DONE 0x0002 +/* PLA_LWAKE_CTRL_REG */ +#define LANWAKE_PIN BIT(7) + /* PLA_SUSPEND_FLAG */ #define LINK_CHG_EVENT BIT(0) @@ -4968,6 +4976,8 @@ static void rtl8152_down(struct r8152 *tp) static void rtl8153_up(struct r8152 *tp) { + u32 ocp_data; + if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; @@ -4975,6 +4985,15 @@ static void rtl8153_up(struct r8152 *tp) r8153_u2p3en(tp, false); r8153_aldps_en(tp, false); r8153_first_init(tp); + + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); + ocp_data |= LANWAKE_CLR_EN; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data); + + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG); + ocp_data &= ~LANWAKE_PIN; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data); + r8153_aldps_en(tp, true); switch (tp->version) { @@ -4993,11 +5012,17 @@ static void rtl8153_up(struct r8152 *tp) static void rtl8153_down(struct r8152 *tp) { + u32 ocp_data; + if (test_bit(RTL8152_UNPLUG, &tp->flags)) { rtl_drop_queued_tx(tp); return; } + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); + ocp_data &= ~LANWAKE_CLR_EN; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data); + r8153_u1u2en(tp, false); r8153_u2p3en(tp, false); r8153_power_cut_en(tp, false); @@ -5458,6 +5483,14 @@ static void r8153_init(struct r8152 *tp) r8153_mac_clk_spd(tp, false); usb_enable_lpm(tp->udev); + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); + ocp_data |= LANWAKE_CLR_EN; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data); + + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG); + ocp_data &= ~LANWAKE_PIN; + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data); + /* rx aggregation */ ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); -- cgit From aa475d935272481c9ffb1ae54eeca5c1819fbe1a Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:13 +0800 Subject: r8152: disable DelayPhyPwrChg When enabling this, the device would wait an internal signal which wouldn't be triggered. Then, the device couldn't enter P3 mode, so the power consumption is increased. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c037fc7adcea..3f425f974d03 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -31,7 +31,7 @@ #define NETNEXT_VERSION "11" /* Information for net */ -#define NET_VERSION "10" +#define NET_VERSION "11" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -109,6 +109,7 @@ #define PLA_BP_EN 0xfc38 #define USB_USB2PHY 0xb41e +#define USB_SSPHYLINK1 0xb426 #define USB_SSPHYLINK2 0xb428 #define USB_U2P3_CTRL 0xb460 #define USB_CSR_DUMMY1 0xb464 @@ -384,6 +385,9 @@ #define USB2PHY_SUSPEND 0x0001 #define USB2PHY_L1 0x0002 +/* USB_SSPHYLINK1 */ +#define DELAY_PHY_PWR_CHG BIT(1) + /* USB_SSPHYLINK2 */ #define pwd_dn_scale_mask 0x3ffe #define pwd_dn_scale(x) ((x) << 1) @@ -4994,6 +4998,10 @@ static void rtl8153_up(struct r8152 *tp) ocp_data &= ~LANWAKE_PIN; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SSPHYLINK1); + ocp_data &= ~DELAY_PHY_PWR_CHG; + ocp_write_word(tp, MCU_TYPE_USB, USB_SSPHYLINK1, ocp_data); + r8153_aldps_en(tp, true); switch (tp->version) { -- cgit From 971de2e572118c1128bff295341e37b6c8b8f108 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 22 Jan 2020 20:09:52 +0200 Subject: mlxsw: spectrum_acl: Fix use-after-free during reload During reload (or module unload), the router block is de-initialized. Among other things, this results in the removal of a default multicast route from each active virtual router (VRF). These default routes are configured during initialization to trap packets to the CPU. In Spectrum-2, unlike Spectrum-1, multicast routes are implemented using ACL rules. Since the router block is de-initialized before the ACL block, it is possible that the ACL rules corresponding to the default routes are deleted while being accessed by the ACL delayed work that queries rules' activity from the device. This can result in a rare use-after-free [1]. Fix this by protecting the rules list accessed by the delayed work with a lock. We cannot use a spinlock as the activity read operation is blocking. [1] [ 123.331662] ================================================================== [ 123.339920] BUG: KASAN: use-after-free in mlxsw_sp_acl_rule_activity_update_work+0x330/0x3b0 [ 123.349381] Read of size 8 at addr ffff8881f3bb4520 by task kworker/0:2/78 [ 123.357080] [ 123.358773] CPU: 0 PID: 78 Comm: kworker/0:2 Not tainted 5.5.0-rc5-custom-33108-gf5df95d3ef41 #2209 [ 123.368898] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 [ 123.378456] Workqueue: mlxsw_core mlxsw_sp_acl_rule_activity_update_work [ 123.385970] Call Trace: [ 123.388734] dump_stack+0xc6/0x11e [ 123.392568] print_address_description.constprop.4+0x21/0x340 [ 123.403236] __kasan_report.cold.8+0x76/0xb1 [ 123.414884] kasan_report+0xe/0x20 [ 123.418716] mlxsw_sp_acl_rule_activity_update_work+0x330/0x3b0 [ 123.444034] process_one_work+0xb06/0x19a0 [ 123.453731] worker_thread+0x91/0xe90 [ 123.467348] kthread+0x348/0x410 [ 123.476847] ret_from_fork+0x24/0x30 [ 123.480863] [ 123.482545] Allocated by task 73: [ 123.486273] save_stack+0x19/0x80 [ 123.490000] __kasan_kmalloc.constprop.6+0xc1/0xd0 [ 123.495379] mlxsw_sp_acl_rule_create+0xa7/0x230 [ 123.500566] mlxsw_sp2_mr_tcam_route_create+0xf6/0x3e0 [ 123.506334] mlxsw_sp_mr_tcam_route_create+0x5b4/0x820 [ 123.512102] mlxsw_sp_mr_table_create+0x3b5/0x690 [ 123.517389] mlxsw_sp_vr_get+0x289/0x4d0 [ 123.521797] mlxsw_sp_fib_node_get+0xa2/0x990 [ 123.526692] mlxsw_sp_router_fib4_event_work+0x54c/0x2d60 [ 123.532752] process_one_work+0xb06/0x19a0 [ 123.537352] worker_thread+0x91/0xe90 [ 123.541471] kthread+0x348/0x410 [ 123.545103] ret_from_fork+0x24/0x30 [ 123.549113] [ 123.550795] Freed by task 518: [ 123.554231] save_stack+0x19/0x80 [ 123.557958] __kasan_slab_free+0x125/0x170 [ 123.562556] kfree+0xd7/0x3a0 [ 123.565895] mlxsw_sp_acl_rule_destroy+0x63/0xd0 [ 123.571081] mlxsw_sp2_mr_tcam_route_destroy+0xd5/0x130 [ 123.576946] mlxsw_sp_mr_tcam_route_destroy+0xba/0x260 [ 123.582714] mlxsw_sp_mr_table_destroy+0x1ab/0x290 [ 123.588091] mlxsw_sp_vr_put+0x1db/0x350 [ 123.592496] mlxsw_sp_fib_node_put+0x298/0x4c0 [ 123.597486] mlxsw_sp_vr_fib_flush+0x15b/0x360 [ 123.602476] mlxsw_sp_router_fib_flush+0xba/0x470 [ 123.607756] mlxsw_sp_vrs_fini+0xaa/0x120 [ 123.612260] mlxsw_sp_router_fini+0x137/0x384 [ 123.617152] mlxsw_sp_fini+0x30a/0x4a0 [ 123.621374] mlxsw_core_bus_device_unregister+0x159/0x600 [ 123.627435] mlxsw_devlink_core_bus_device_reload_down+0x7e/0xb0 [ 123.634176] devlink_reload+0xb4/0x380 [ 123.638391] devlink_nl_cmd_reload+0x610/0x700 [ 123.643382] genl_rcv_msg+0x6a8/0xdc0 [ 123.647497] netlink_rcv_skb+0x134/0x3a0 [ 123.651904] genl_rcv+0x29/0x40 [ 123.655436] netlink_unicast+0x4d4/0x700 [ 123.659843] netlink_sendmsg+0x7c0/0xc70 [ 123.664251] __sys_sendto+0x265/0x3c0 [ 123.668367] __x64_sys_sendto+0xe2/0x1b0 [ 123.672773] do_syscall_64+0xa0/0x530 [ 123.676892] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 123.682552] [ 123.684238] The buggy address belongs to the object at ffff8881f3bb4500 [ 123.684238] which belongs to the cache kmalloc-128 of size 128 [ 123.698261] The buggy address is located 32 bytes inside of [ 123.698261] 128-byte region [ffff8881f3bb4500, ffff8881f3bb4580) [ 123.711303] The buggy address belongs to the page: [ 123.716682] page:ffffea0007ceed00 refcount:1 mapcount:0 mapping:ffff888236403500 index:0x0 [ 123.725958] raw: 0200000000000200 dead000000000100 dead000000000122 ffff888236403500 [ 123.734646] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 [ 123.743315] page dumped because: kasan: bad access detected [ 123.749562] [ 123.751241] Memory state around the buggy address: [ 123.756620] ffff8881f3bb4400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 123.764716] ffff8881f3bb4480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 123.772812] >ffff8881f3bb4500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 123.780904] ^ [ 123.785697] ffff8881f3bb4580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 123.793793] ffff8881f3bb4600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 123.801883] ================================================================== Fixes: cf7221a4f5a5 ("mlxsw: spectrum_router: Add Multicast routing support for Spectrum-2") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 150b3a144b83..3d3cca596116 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ struct mlxsw_sp_acl { struct mlxsw_sp_fid *dummy_fid; struct rhashtable ruleset_ht; struct list_head rules; + struct mutex rules_lock; /* Protects rules list */ struct { struct delayed_work dw; unsigned long interval; /* ms */ @@ -701,7 +703,9 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, goto err_ruleset_block_bind; } + mutex_lock(&mlxsw_sp->acl->rules_lock); list_add_tail(&rule->list, &mlxsw_sp->acl->rules); + mutex_unlock(&mlxsw_sp->acl->rules_lock); block->rule_count++; block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0; @@ -723,7 +727,9 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; ruleset->ht_key.block->rule_count--; + mutex_lock(&mlxsw_sp->acl->rules_lock); list_del(&rule->list); + mutex_unlock(&mlxsw_sp->acl->rules_lock); if (!ruleset->ht_key.chain_index && mlxsw_sp_acl_ruleset_is_singular(ruleset)) mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, @@ -783,19 +789,18 @@ static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl) struct mlxsw_sp_acl_rule *rule; int err; - /* Protect internal structures from changes */ - rtnl_lock(); + mutex_lock(&acl->rules_lock); list_for_each_entry(rule, &acl->rules, list) { err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp, rule); if (err) goto err_rule_update; } - rtnl_unlock(); + mutex_unlock(&acl->rules_lock); return 0; err_rule_update: - rtnl_unlock(); + mutex_unlock(&acl->rules_lock); return err; } @@ -880,6 +885,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) acl->dummy_fid = fid; INIT_LIST_HEAD(&acl->rules); + mutex_init(&acl->rules_lock); err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam); if (err) goto err_acl_ops_init; @@ -892,6 +898,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) return 0; err_acl_ops_init: + mutex_destroy(&acl->rules_lock); mlxsw_sp_fid_put(fid); err_fid_get: rhashtable_destroy(&acl->ruleset_ht); @@ -908,6 +915,7 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam); + mutex_destroy(&acl->rules_lock); WARN_ON(!list_empty(&acl->rules)); mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); -- cgit From 1efba987c48629c0c64703bb4ea76ca1a3771d17 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jan 2020 09:07:35 -0800 Subject: tun: add mutex_unlock() call and napi.skb clearing in tun_get_user() If both IFF_NAPI_FRAGS mode and XDP are enabled, and the XDP program consumes the skb, we need to clear the napi.skb (or risk a use-after-free) and release the mutex (or risk a deadlock) WARNING: lock held when returning to user space! 5.5.0-rc6-syzkaller #0 Not tainted ------------------------------------------------ syz-executor.0/455 is leaving the kernel with locks still held! 1 lock held by syz-executor.0/455: #0: ffff888098f6e748 (&tfile->napi_mutex){+.+.}, at: tun_get_user+0x1604/0x3fc0 drivers/net/tun.c:1835 Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Petar Penkov Cc: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/tun.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 683d371e6e82..35e884a8242d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1936,6 +1936,10 @@ drop: if (ret != XDP_PASS) { rcu_read_unlock(); local_bh_enable(); + if (frags) { + tfile->napi.skb = NULL; + mutex_unlock(&tfile->napi_mutex); + } return total_len; } } -- cgit From bb48eb9b12a95db9d679025927269d4adda6dbd1 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 23 Jan 2020 13:20:18 +0100 Subject: fou: Fix IPv6 netlink policy When submitting v2 of "fou: Support binding FoU socket" (1713cb37bf67), I accidentally sent the wrong version of the patch and one fix was missing. In the initial version of the patch, as well as the version 2 that I submitted, I incorrectly used ".type" for the two V6-attributes. The correct is to use ".len". Reported-by: Dmitry Vyukov Fixes: 1713cb37bf67 ("fou: Support binding FoU socket") Signed-off-by: Kristian Evensen Signed-off-by: David S. Miller --- net/ipv4/fou.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 30fa771d382a..dcc79ff54b41 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -662,8 +662,8 @@ static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, - [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), }, - [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), }, + [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), }, [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, }; -- cgit From 22e984493a41bf8081f13d9ed84def3ca8cfd427 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 22 Jan 2020 01:43:38 -0800 Subject: qlcnic: Fix CPU soft lockup while collecting firmware dump Driver while collecting firmware dump takes longer time to collect/process some of the firmware dump entries/memories. Bigger capture masks makes it worse as it results in larger amount of data being collected and results in CPU soft lockup. Place cond_resched() in some of the driver flows that are expectedly time consuming to relinquish the CPU to avoid CPU soft lockup panic. Signed-off-by: Shahed Shaikh Tested-by: Yonggen Xu Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index a496390b8632..07f9067affc6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2043,6 +2043,7 @@ static void qlcnic_83xx_exec_template_cmd(struct qlcnic_adapter *p_dev, break; } entry += p_hdr->size; + cond_resched(); } p_dev->ahw->reset.seq_index = index; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index afa10a163da1..f34ae8c75bc5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -703,6 +703,7 @@ static u32 qlcnic_read_memory_test_agent(struct qlcnic_adapter *adapter, addr += 16; reg_read -= 16; ret += 16; + cond_resched(); } out: mutex_unlock(&adapter->ahw->mem_lock); @@ -1383,6 +1384,7 @@ int qlcnic_dump_fw(struct qlcnic_adapter *adapter) buf_offset += entry->hdr.cap_size; entry_offset += entry->hdr.offset; buffer = fw_dump->data + buf_offset; + cond_resched(); } fw_dump->clr = 1; -- cgit From 2934d2c67838ba442fbd0b0560ba641044a5ceba Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 15:20:27 +0200 Subject: dt-bindings: net: add fsl,erratum-a011043 Add an entry for erratum A011043: the MDIO_CFG[MDIO_RD_ER] bit may be falsely set when reading internal PCS registers. MDIO reads to internal PCS registers may result in having the MDIO_CFG[MDIO_RD_ER] bit set, even when there is no error and read data (MDIO_DATA[MDIO_DATA]) is correct. Software may get false read error when reading internal PCS registers through MDIO. As a workaround, all internal MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/fsl-fman.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index 299c0dcd67db..250f8d8cdce4 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -403,6 +403,19 @@ PROPERTIES The settings and programming routines for internal/external MDIO are different. Must be included for internal MDIO. +- fsl,erratum-a011043 + Usage: optional + Value type: + Definition: Indicates the presence of the A011043 erratum + describing that the MDIO_CFG[MDIO_RD_ER] bit may be falsely + set when reading internal PCS registers. MDIO reads to + internal PCS registers may result in having the + MDIO_CFG[MDIO_RD_ER] bit set, even when there is no error and + read data (MDIO_DATA[MDIO_DATA]) is correct. + Software may get false read error when reading internal + PCS registers through MDIO. As a workaround, all internal + MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit. + For internal PHY device on internal mdio bus, a PHY node should be created. See the definition of the PHY node in booting-without-of.txt for an example of how to define a PHY (Internal PHY has no interrupt line). -- cgit From 73d527aef68f7644e59f22ce7f9ac75e7b533aea Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 15:20:28 +0200 Subject: powerpc/fsl/dts: add fsl,erratum-a011043 Add fsl,erratum-a011043 to internal MDIO buses. Software may get false read error when reading internal PCS registers through MDIO. As a workaround, all internal MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi | 1 + 18 files changed, 18 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd..baa0c503e741 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c637..93095600e808 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e7175012..ff4bd38f0645 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214..1fa38ed6f59e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8..a8cc9780c0c4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c6348..8b8bd70c9382 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039..619c880b54d8 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec51075..d7ebb73a400d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367..b151d696a069 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602..adc0ae0013a3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74..435047e0e250 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266..c098657cca0a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117..9d06824815f3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa13634..70e947730c4b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee0..ad96e6529595 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb0..034bc4b71f7a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b15..93ca23d82b39 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc17..23b3117a2fd2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; -- cgit From 1d3ca681b9d9575ccf696ebc2840a1ebb1fd4074 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 15:20:29 +0200 Subject: net/fsl: treat fsl,erratum-a011043 When fsl,erratum-a011043 is set, adjust for erratum A011043: MDIO reads to internal PCS registers may result in having the MDIO_CFG[MDIO_RD_ER] bit set, even when there is no error and read data (MDIO_DATA[MDIO_DATA]) is correct. Software may get false read error when reading internal PCS registers through MDIO. As a workaround, all internal MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/xgmac_mdio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index e03b30c60dcf..c82c85ef5fb3 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -49,6 +49,7 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; bool is_little_endian; + bool has_a011043; }; static u32 xgmac_read32(void __iomem *regs, @@ -226,7 +227,8 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) return ret; /* Return all Fs if nothing was there */ - if (xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) { + if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && + !priv->has_a011043) { dev_err(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); @@ -274,6 +276,9 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->is_little_endian = of_property_read_bool(pdev->dev.of_node, "little-endian"); + priv->has_a011043 = of_property_read_bool(pdev->dev.of_node, + "fsl,erratum-a011043"); + ret = of_mdiobus_register(bus, np); if (ret) { dev_err(&pdev->dev, "cannot register MDIO bus\n"); -- cgit From 457bfc0a4bf531487ecc3cf82ec728a5e114fb1e Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 16:15:14 +0200 Subject: net: fsl/fman: rename IF_MODE_XGMII to IF_MODE_10G As the only 10G PHY interface type defined at the moment the code was developed was XGMII, although the PHY interface mode used was not XGMII, XGMII was used in the code to denote 10G. This patch renames the 10G interface mode to remove the ambiguity. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_memac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 41c6fa200e74..e1901874c19f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -110,7 +110,7 @@ do { \ /* Interface Mode Register (IF_MODE) */ #define IF_MODE_MASK 0x00000003 /* 30-31 Mask on i/f mode bits */ -#define IF_MODE_XGMII 0x00000000 /* 30-31 XGMII (10G) interface */ +#define IF_MODE_10G 0x00000000 /* 30-31 10G interface */ #define IF_MODE_GMII 0x00000002 /* 30-31 GMII (1G) interface */ #define IF_MODE_RGMII 0x00000004 #define IF_MODE_RGMII_AUTO 0x00008000 @@ -440,7 +440,7 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg, tmp = 0; switch (phy_if) { case PHY_INTERFACE_MODE_XGMII: - tmp |= IF_MODE_XGMII; + tmp |= IF_MODE_10G; break; default: tmp |= IF_MODE_GMII; -- cgit From 865ad2f2201dc18685ba2686f13217f8b3a9c52c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Add mutual exclusion for accessing shared state The netif_stop_queue() call in sonic_send_packet() races with the netif_wake_queue() call in sonic_interrupt(). This causes issues like "NETDEV WATCHDOG: eth0 (macsonic): transmit queue 0 timed out". Fix this by disabling interrupts when accessing tx_skb[] and next_tx. Update a comment to clarify the synchronization properties. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 49 +++++++++++++++++++++++++----------- drivers/net/ethernet/natsemi/sonic.h | 1 + 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b339125b2f09..8a7cff516281 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -64,6 +64,8 @@ static int sonic_open(struct net_device *dev) netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__); + spin_lock_init(&lp->lock); + for (i = 0; i < SONIC_NUM_RRS; i++) { struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (skb == NULL) { @@ -206,8 +208,6 @@ static void sonic_tx_timeout(struct net_device *dev) * wake the tx queue * Concurrently with all of this, the SONIC is potentially writing to * the status flags of the TDs. - * Until some mutual exclusion is added, this code will not work with SMP. However, - * MIPS Jazz machines and m68k Macs were all uni-processor machines. */ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) @@ -215,7 +215,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); dma_addr_t laddr; int length; - int entry = lp->next_tx; + int entry; + unsigned long flags; netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb); @@ -237,6 +238,10 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + spin_lock_irqsave(&lp->lock, flags); + + entry = lp->next_tx; + sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1); /* single fragment */ sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */ @@ -246,10 +251,6 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) sonic_tda_put(dev, entry, SONIC_TD_LINK, sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL); - /* - * Must set tx_skb[entry] only after clearing status, and - * before clearing EOL and before stopping queue - */ wmb(); lp->tx_len[entry] = length; lp->tx_laddr[entry] = laddr; @@ -272,6 +273,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); + spin_unlock_irqrestore(&lp->lock, flags); + return NETDEV_TX_OK; } @@ -284,9 +287,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) struct net_device *dev = dev_id; struct sonic_local *lp = netdev_priv(dev); int status; + unsigned long flags; + + /* The lock has two purposes. Firstly, it synchronizes sonic_interrupt() + * with sonic_send_packet() so that the two functions can share state. + * Secondly, it makes sonic_interrupt() re-entrant, as that is required + * by macsonic which must use two IRQs with different priority levels. + */ + spin_lock_irqsave(&lp->lock, flags); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + if (!status) { + spin_unlock_irqrestore(&lp->lock, flags); - if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)) return IRQ_NONE; + } do { if (status & SONIC_INT_PKTRX) { @@ -300,11 +315,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) int td_status; int freed_some = 0; - /* At this point, cur_tx is the index of a TD that is one of: - * unallocated/freed (status set & tx_skb[entry] clear) - * allocated and sent (status set & tx_skb[entry] set ) - * allocated and not yet sent (status clear & tx_skb[entry] set ) - * still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear) + /* The state of a Transmit Descriptor may be inferred + * from { tx_skb[entry], td_status } as follows. + * { clear, clear } => the TD has never been used + * { set, clear } => the TD was handed to SONIC + * { set, set } => the TD was handed back + * { clear, set } => the TD is available for re-use */ netif_dbg(lp, intr, dev, "%s: tx done\n", __func__); @@ -406,7 +422,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* load CAM done */ if (status & SONIC_INT_LCD) SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ - } while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + } while (status); + + spin_unlock_irqrestore(&lp->lock, flags); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 2b27f7049acb..f9506863e9d1 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -322,6 +322,7 @@ struct sonic_local { int msg_enable; struct device *device; /* generic device */ struct net_device_stats stats; + spinlock_t lock; }; #define TX_TIMEOUT (3 * HZ) -- cgit From 5fedabf5a70be26b19d7520f09f12a62274317c6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Clear interrupt flags immediately The chip can change a packet's descriptor status flags at any time. However, an active interrupt flag gets cleared rather late. This allows a race condition that could theoretically lose an interrupt. Fix this by clearing asserted interrupt flags immediately. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 8a7cff516281..1109070a5154 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -304,10 +304,11 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) } do { + SONIC_WRITE(SONIC_ISR, status); /* clear the interrupt(s) */ + if (status & SONIC_INT_PKTRX) { netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__); sonic_rx(dev); /* got packet(s) */ - SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */ } if (status & SONIC_INT_TXDN) { @@ -362,7 +363,6 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) if (freed_some || lp->tx_skb[entry] == NULL) netif_wake_queue(dev); /* The ring is no longer full */ lp->cur_tx = entry; - SONIC_WRITE(SONIC_ISR, SONIC_INT_TXDN); /* clear the interrupt */ } /* @@ -372,42 +372,31 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n", __func__); lp->stats.rx_fifo_errors++; - SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */ } if (status & SONIC_INT_RDE) { netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n", __func__); lp->stats.rx_dropped++; - SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */ } if (status & SONIC_INT_RBAE) { netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n", __func__); lp->stats.rx_dropped++; - SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */ } /* counter overruns; all counters are 16bit wide */ - if (status & SONIC_INT_FAE) { + if (status & SONIC_INT_FAE) lp->stats.rx_frame_errors += 65536; - SONIC_WRITE(SONIC_ISR, SONIC_INT_FAE); /* clear the interrupt */ - } - if (status & SONIC_INT_CRC) { + if (status & SONIC_INT_CRC) lp->stats.rx_crc_errors += 65536; - SONIC_WRITE(SONIC_ISR, SONIC_INT_CRC); /* clear the interrupt */ - } - if (status & SONIC_INT_MP) { + if (status & SONIC_INT_MP) lp->stats.rx_missed_errors += 65536; - SONIC_WRITE(SONIC_ISR, SONIC_INT_MP); /* clear the interrupt */ - } /* transmit error */ - if (status & SONIC_INT_TXER) { + if (status & SONIC_INT_TXER) if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n", __func__); - SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */ - } /* bus retry */ if (status & SONIC_INT_BR) { @@ -416,13 +405,8 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* ... to help debug DMA problems causing endless interrupts. */ /* Bounce the eth interface to turn on the interrupt again. */ SONIC_WRITE(SONIC_IMR, 0); - SONIC_WRITE(SONIC_ISR, SONIC_INT_BR); /* clear the interrupt */ } - /* load CAM done */ - if (status & SONIC_INT_LCD) - SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ - status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; } while (status); -- cgit From e3885f576196ddfc670b3d53e745de96ffcb49ab Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Use MMIO accessors The driver accesses descriptor memory which is simultaneously accessed by the chip, so the compiler must not be allowed to re-order CPU accesses. sonic_buf_get() used 'volatile' to prevent that. sonic_buf_put() should have done so too but was overlooked. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index f9506863e9d1..fb160dfdf4ca 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -345,30 +345,30 @@ static void sonic_msg_init(struct net_device *dev); as far as we can tell. */ /* OpenBSD calls this "SWO". I'd like to think that sonic_buf_put() is a much better name. */ -static inline void sonic_buf_put(void* base, int bitmode, +static inline void sonic_buf_put(u16 *base, int bitmode, int offset, __u16 val) { if (bitmode) #ifdef __BIG_ENDIAN - ((__u16 *) base + (offset*2))[1] = val; + __raw_writew(val, base + (offset * 2) + 1); #else - ((__u16 *) base + (offset*2))[0] = val; + __raw_writew(val, base + (offset * 2) + 0); #endif else - ((__u16 *) base)[offset] = val; + __raw_writew(val, base + (offset * 1) + 0); } -static inline __u16 sonic_buf_get(void* base, int bitmode, +static inline __u16 sonic_buf_get(u16 *base, int bitmode, int offset) { if (bitmode) #ifdef __BIG_ENDIAN - return ((volatile __u16 *) base + (offset*2))[1]; + return __raw_readw(base + (offset * 2) + 1); #else - return ((volatile __u16 *) base + (offset*2))[0]; + return __raw_readw(base + (offset * 2) + 0); #endif else - return ((volatile __u16 *) base)[offset]; + return __raw_readw(base + (offset * 1) + 0); } /* Inlines that you should actually use for reading/writing DMA buffers */ -- cgit From 427db97df1ee721c20bdc9a66db8a9e1da719855 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Fix interface error stats collection The tx_aborted_errors statistic should count packets flagged with EXD, EXC, FU, or BCM bits because those bits denote an aborted transmission. That corresponds to the bitmask 0x0446, not 0x0642. Use macros for these constants to avoid mistakes. Better to leave out FIFO Underruns (FU) as there's a separate counter for that purpose. Don't lump all these errors in with the general tx_errors counter as that's used for tx timeout events. On the rx side, don't count RDE and RBAE interrupts as dropped packets. These interrupts don't indicate a lost packet, just a lack of resources. When a lack of resources results in a lost packet, this gets reported in the rx_missed_errors counter (along with RFO events). Don't double-count rx_frame_errors and rx_crc_errors. Don't use the general rx_errors counter for events that already have special counters. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 21 +++++++-------------- drivers/net/ethernet/natsemi/sonic.h | 1 + 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 1109070a5154..b31c7af32dfa 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -330,18 +330,19 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0) break; - if (td_status & 0x0001) { + if (td_status & SONIC_TCR_PTX) { lp->stats.tx_packets++; lp->stats.tx_bytes += sonic_tda_get(dev, entry, SONIC_TD_PKTSIZE); } else { - lp->stats.tx_errors++; - if (td_status & 0x0642) + if (td_status & (SONIC_TCR_EXD | + SONIC_TCR_EXC | SONIC_TCR_BCM)) lp->stats.tx_aborted_errors++; - if (td_status & 0x0180) + if (td_status & + (SONIC_TCR_NCRS | SONIC_TCR_CRLS)) lp->stats.tx_carrier_errors++; - if (td_status & 0x0020) + if (td_status & SONIC_TCR_OWC) lp->stats.tx_window_errors++; - if (td_status & 0x0004) + if (td_status & SONIC_TCR_FU) lp->stats.tx_fifo_errors++; } @@ -371,17 +372,14 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) if (status & SONIC_INT_RFO) { netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n", __func__); - lp->stats.rx_fifo_errors++; } if (status & SONIC_INT_RDE) { netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n", __func__); - lp->stats.rx_dropped++; } if (status & SONIC_INT_RBAE) { netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n", __func__); - lp->stats.rx_dropped++; } /* counter overruns; all counters are 16bit wide */ @@ -473,11 +471,6 @@ static void sonic_rx(struct net_device *dev) sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ - lp->stats.rx_errors++; - if (status & SONIC_RCR_FAER) - lp->stats.rx_frame_errors++; - if (status & SONIC_RCR_CRCR) - lp->stats.rx_crc_errors++; } if (status & SONIC_RCR_LPKT) { /* diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index fb160dfdf4ca..9e4ff8dd032d 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -175,6 +175,7 @@ #define SONIC_TCR_NCRS 0x0100 #define SONIC_TCR_CRLS 0x0080 #define SONIC_TCR_EXC 0x0040 +#define SONIC_TCR_OWC 0x0020 #define SONIC_TCR_PMB 0x0008 #define SONIC_TCR_FU 0x0004 #define SONIC_TCR_BCM 0x0002 -- cgit From 9e311820f67e740f4fb8dcb82b4c4b5b05bdd1a5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Fix receive buffer handling The SONIC can sometimes advance its rx buffer pointer (RRP register) without advancing its rx descriptor pointer (CRDA register). As a result the index of the current rx descriptor may not equal that of the current rx buffer. The driver mistakenly assumes that they are always equal. This assumption leads to incorrect packet lengths and possible packet duplication. Avoid this by calling a new function to locate the buffer corresponding to a given descriptor. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 35 ++++++++++++++++++++++++++++++----- drivers/net/ethernet/natsemi/sonic.h | 5 +++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b31c7af32dfa..c666bbf15116 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -413,6 +413,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +/* Return the array index corresponding to a given Receive Buffer pointer. */ +static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, + unsigned int last) +{ + unsigned int i = last; + + do { + i = (i + 1) & SONIC_RRS_MASK; + if (addr == lp->rx_laddr[i]) + return i; + } while (i != last); + + return -ENOENT; +} + /* * We have a good packet(s), pass it/them up the network stack. */ @@ -432,6 +447,16 @@ static void sonic_rx(struct net_device *dev) status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); if (status & SONIC_RCR_PRX) { + u32 addr = (sonic_rda_get(dev, entry, + SONIC_RD_PKTPTR_H) << 16) | + sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); + int i = index_from_addr(lp, addr, entry); + + if (i < 0) { + WARN_ONCE(1, "failed to find buffer!\n"); + break; + } + /* Malloc up new buffer. */ new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (new_skb == NULL) { @@ -453,7 +478,7 @@ static void sonic_rx(struct net_device *dev) /* now we have a new skb to replace it, pass the used one up the stack */ dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); - used_skb = lp->rx_skb[entry]; + used_skb = lp->rx_skb[i]; pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); skb_trim(used_skb, pkt_len); used_skb->protocol = eth_type_trans(used_skb, dev); @@ -462,13 +487,13 @@ static void sonic_rx(struct net_device *dev) lp->stats.rx_bytes += pkt_len; /* and insert the new skb */ - lp->rx_laddr[entry] = new_laddr; - lp->rx_skb[entry] = new_skb; + lp->rx_laddr[i] = new_laddr; + lp->rx_skb[i] = new_skb; bufadr_l = (unsigned long)new_laddr & 0xffff; bufadr_h = (unsigned long)new_laddr >> 16; - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l); - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ } diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 9e4ff8dd032d..e6d47e45c5c2 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -275,8 +275,9 @@ #define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ #define SONIC_NUM_TDS 16 /* number of transmit descriptors */ -#define SONIC_RDS_MASK (SONIC_NUM_RDS-1) -#define SONIC_TDS_MASK (SONIC_NUM_TDS-1) +#define SONIC_RRS_MASK (SONIC_NUM_RRS - 1) +#define SONIC_RDS_MASK (SONIC_NUM_RDS - 1) +#define SONIC_TDS_MASK (SONIC_NUM_TDS - 1) #define SONIC_RBSIZE 1520 /* size of one resource buffer */ -- cgit From eaabfd19b2c787bbe88dc32424b9a43d67293422 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Avoid needless receive descriptor EOL flag updates The while loop in sonic_rx() traverses the rx descriptor ring. It stops when it reaches a descriptor that the SONIC has not used. Each iteration advances the EOL flag so the SONIC can keep using more descriptors. Therefore, the while loop has no definite termination condition. The algorithm described in the National Semiconductor literature is quite different. It consumes descriptors up to the one with its EOL flag set (which will also have its "in use" flag set). All freed descriptors are then returned to the ring at once, by adjusting the EOL flags (and link pointers). Adopt the algorithm from datasheet as it's simpler, terminates quickly and avoids a lot of pointless descriptor EOL flag changes. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index c666bbf15116..2cee702f49b8 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -436,6 +436,7 @@ static void sonic_rx(struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); int status; int entry = lp->cur_rx; + int prev_entry = lp->eol_rx; while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) { struct sk_buff *used_skb; @@ -516,13 +517,21 @@ static void sonic_rx(struct net_device *dev) /* * give back the descriptor */ - sonic_rda_put(dev, entry, SONIC_RD_LINK, - sonic_rda_get(dev, entry, SONIC_RD_LINK) | SONIC_EOL); sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1); - sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK, - sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK) & ~SONIC_EOL); - lp->eol_rx = entry; - lp->cur_rx = entry = (entry + 1) & SONIC_RDS_MASK; + + prev_entry = entry; + entry = (entry + 1) & SONIC_RDS_MASK; + } + + lp->cur_rx = entry; + + if (prev_entry != lp->eol_rx) { + /* Advance the EOL flag to put descriptors back into service */ + sonic_rda_put(dev, prev_entry, SONIC_RD_LINK, SONIC_EOL | + sonic_rda_get(dev, prev_entry, SONIC_RD_LINK)); + sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK, ~SONIC_EOL & + sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK)); + lp->eol_rx = prev_entry; } /* * If any worth-while packets have been received, netif_rx() -- cgit From 94b166349503957079ef5e7d6f667f157aea014a Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Improve receive descriptor status flag check After sonic_tx_timeout() calls sonic_init(), it can happen that sonic_rx() will subsequently encounter a receive descriptor with no flags set. Remove the comment that says that this can't happen. When giving a receive descriptor to the SONIC, clear the descriptor status field. That way, any rx descriptor with flags set can only be a newly received packet. Don't process a descriptor without the LPKT bit set. The buffer is still in use by the SONIC. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 2cee702f49b8..9d2bddc4951b 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -434,7 +434,6 @@ static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, static void sonic_rx(struct net_device *dev) { struct sonic_local *lp = netdev_priv(dev); - int status; int entry = lp->cur_rx; int prev_entry = lp->eol_rx; @@ -445,9 +444,10 @@ static void sonic_rx(struct net_device *dev) u16 bufadr_l; u16 bufadr_h; int pkt_len; + u16 status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); - status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); - if (status & SONIC_RCR_PRX) { + /* If the RD has LPKT set, the chip has finished with the RB */ + if ((status & SONIC_RCR_PRX) && (status & SONIC_RCR_LPKT)) { u32 addr = (sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_H) << 16) | sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); @@ -495,10 +495,6 @@ static void sonic_rx(struct net_device *dev) bufadr_h = (unsigned long)new_laddr >> 16; sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); - } else { - /* This should only happen, if we enable accepting broken packets. */ - } - if (status & SONIC_RCR_LPKT) { /* * this was the last packet out of the current receive buffer * give the buffer back to the SONIC @@ -511,12 +507,11 @@ static void sonic_rx(struct net_device *dev) __func__); SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */ } - } else - printk(KERN_ERR "%s: rx desc without RCR_LPKT. Shouldn't happen !?\n", - dev->name); + } /* * give back the descriptor */ + sonic_rda_put(dev, entry, SONIC_RD_STATUS, 0); sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1); prev_entry = entry; -- cgit From 89ba879e95582d3bba55081e45b5409e883312ca Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Fix receive buffer replenishment As soon as the driver is finished with a receive buffer it allocs a new one and overwrites the corresponding RRA entry with a new buffer pointer. Problem is, the buffer pointer is split across two word-sized registers. It can't be updated in one atomic store. So this operation races with the chip while it stores received packets and advances its RRP register. This could result in memory corruption by a DMA write. Avoid this problem by adding buffers only at the location given by the RWP register, in accordance with the National Semiconductor datasheet. Re-factor this code into separate functions to calculate a RRA pointer and to update the RWP. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 150 +++++++++++++++++++++-------------- drivers/net/ethernet/natsemi/sonic.h | 18 ++++- 2 files changed, 105 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 9d2bddc4951b..26848acec63d 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -428,6 +428,59 @@ static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, return -ENOENT; } +/* Allocate and map a new skb to be used as a receive buffer. */ +static bool sonic_alloc_rb(struct net_device *dev, struct sonic_local *lp, + struct sk_buff **new_skb, dma_addr_t *new_addr) +{ + *new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); + if (!*new_skb) + return false; + + if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2) + skb_reserve(*new_skb, 2); + + *new_addr = dma_map_single(lp->device, skb_put(*new_skb, SONIC_RBSIZE), + SONIC_RBSIZE, DMA_FROM_DEVICE); + if (!*new_addr) { + dev_kfree_skb(*new_skb); + *new_skb = NULL; + return false; + } + + return true; +} + +/* Place a new receive resource in the Receive Resource Area and update RWP. */ +static void sonic_update_rra(struct net_device *dev, struct sonic_local *lp, + dma_addr_t old_addr, dma_addr_t new_addr) +{ + unsigned int entry = sonic_rr_entry(dev, SONIC_READ(SONIC_RWP)); + unsigned int end = sonic_rr_entry(dev, SONIC_READ(SONIC_RRP)); + u32 buf; + + /* The resources in the range [RRP, RWP) belong to the SONIC. This loop + * scans the other resources in the RRA, those in the range [RWP, RRP). + */ + do { + buf = (sonic_rra_get(dev, entry, SONIC_RR_BUFADR_H) << 16) | + sonic_rra_get(dev, entry, SONIC_RR_BUFADR_L); + + if (buf == old_addr) + break; + + entry = (entry + 1) & SONIC_RRS_MASK; + } while (entry != end); + + WARN_ONCE(buf != old_addr, "failed to find resource!\n"); + + sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, new_addr >> 16); + sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, new_addr & 0xffff); + + entry = (entry + 1) & SONIC_RRS_MASK; + + SONIC_WRITE(SONIC_RWP, sonic_rr_addr(dev, entry)); +} + /* * We have a good packet(s), pass it/them up the network stack. */ @@ -436,18 +489,15 @@ static void sonic_rx(struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); int entry = lp->cur_rx; int prev_entry = lp->eol_rx; + bool rbe = false; while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) { - struct sk_buff *used_skb; - struct sk_buff *new_skb; - dma_addr_t new_laddr; - u16 bufadr_l; - u16 bufadr_h; - int pkt_len; u16 status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); /* If the RD has LPKT set, the chip has finished with the RB */ if ((status & SONIC_RCR_PRX) && (status & SONIC_RCR_LPKT)) { + struct sk_buff *new_skb; + dma_addr_t new_laddr; u32 addr = (sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_H) << 16) | sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); @@ -458,55 +508,35 @@ static void sonic_rx(struct net_device *dev) break; } - /* Malloc up new buffer. */ - new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); - if (new_skb == NULL) { + if (sonic_alloc_rb(dev, lp, &new_skb, &new_laddr)) { + struct sk_buff *used_skb = lp->rx_skb[i]; + int pkt_len; + + /* Pass the used buffer up the stack */ + dma_unmap_single(lp->device, addr, SONIC_RBSIZE, + DMA_FROM_DEVICE); + + pkt_len = sonic_rda_get(dev, entry, + SONIC_RD_PKTLEN); + skb_trim(used_skb, pkt_len); + used_skb->protocol = eth_type_trans(used_skb, + dev); + netif_rx(used_skb); + lp->stats.rx_packets++; + lp->stats.rx_bytes += pkt_len; + + lp->rx_skb[i] = new_skb; + lp->rx_laddr[i] = new_laddr; + } else { + /* Failed to obtain a new buffer so re-use it */ + new_laddr = addr; lp->stats.rx_dropped++; - break; } - /* provide 16 byte IP header alignment unless DMA requires otherwise */ - if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2) - skb_reserve(new_skb, 2); - - new_laddr = dma_map_single(lp->device, skb_put(new_skb, SONIC_RBSIZE), - SONIC_RBSIZE, DMA_FROM_DEVICE); - if (!new_laddr) { - dev_kfree_skb(new_skb); - printk(KERN_ERR "%s: Failed to map rx buffer, dropping packet.\n", dev->name); - lp->stats.rx_dropped++; - break; - } - - /* now we have a new skb to replace it, pass the used one up the stack */ - dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); - used_skb = lp->rx_skb[i]; - pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); - skb_trim(used_skb, pkt_len); - used_skb->protocol = eth_type_trans(used_skb, dev); - netif_rx(used_skb); - lp->stats.rx_packets++; - lp->stats.rx_bytes += pkt_len; - - /* and insert the new skb */ - lp->rx_laddr[i] = new_laddr; - lp->rx_skb[i] = new_skb; - - bufadr_l = (unsigned long)new_laddr & 0xffff; - bufadr_h = (unsigned long)new_laddr >> 16; - sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); - sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); - /* - * this was the last packet out of the current receive buffer - * give the buffer back to the SONIC + /* If RBE is already asserted when RWP advances then + * it's safe to clear RBE after processing this packet. */ - lp->cur_rwp += SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode); - if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff; - SONIC_WRITE(SONIC_RWP, lp->cur_rwp); - if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) { - netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n", - __func__); - SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */ - } + rbe = rbe || SONIC_READ(SONIC_ISR) & SONIC_INT_RBE; + sonic_update_rra(dev, lp, addr, new_laddr); } /* * give back the descriptor @@ -528,6 +558,9 @@ static void sonic_rx(struct net_device *dev) sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK)); lp->eol_rx = prev_entry; } + + if (rbe) + SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* * If any worth-while packets have been received, netif_rx() * has done a mark_bh(NET_BH) for us and will work on them @@ -642,15 +675,10 @@ static int sonic_init(struct net_device *dev) } /* initialize all RRA registers */ - lp->rra_end = (lp->rra_laddr + SONIC_NUM_RRS * SIZEOF_SONIC_RR * - SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff; - lp->cur_rwp = (lp->rra_laddr + (SONIC_NUM_RRS - 1) * SIZEOF_SONIC_RR * - SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff; - - SONIC_WRITE(SONIC_RSA, lp->rra_laddr & 0xffff); - SONIC_WRITE(SONIC_REA, lp->rra_end); - SONIC_WRITE(SONIC_RRP, lp->rra_laddr & 0xffff); - SONIC_WRITE(SONIC_RWP, lp->cur_rwp); + SONIC_WRITE(SONIC_RSA, sonic_rr_addr(dev, 0)); + SONIC_WRITE(SONIC_REA, sonic_rr_addr(dev, SONIC_NUM_RRS)); + SONIC_WRITE(SONIC_RRP, sonic_rr_addr(dev, 0)); + SONIC_WRITE(SONIC_RWP, sonic_rr_addr(dev, SONIC_NUM_RRS - 1)); SONIC_WRITE(SONIC_URRA, lp->rra_laddr >> 16); SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1)); diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index e6d47e45c5c2..cc2f7b4b77e3 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -314,8 +314,6 @@ struct sonic_local { u32 rda_laddr; /* logical DMA address of RDA */ dma_addr_t rx_laddr[SONIC_NUM_RRS]; /* logical DMA addresses of rx skbuffs */ dma_addr_t tx_laddr[SONIC_NUM_TDS]; /* logical DMA addresses of tx skbuffs */ - unsigned int rra_end; - unsigned int cur_rwp; unsigned int cur_rx; unsigned int cur_tx; /* first unacked transmit packet */ unsigned int eol_rx; @@ -450,6 +448,22 @@ static inline __u16 sonic_rra_get(struct net_device* dev, int entry, (entry * SIZEOF_SONIC_RR) + offset); } +static inline u16 sonic_rr_addr(struct net_device *dev, int entry) +{ + struct sonic_local *lp = netdev_priv(dev); + + return lp->rra_laddr + + entry * SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode); +} + +static inline u16 sonic_rr_entry(struct net_device *dev, u16 addr) +{ + struct sonic_local *lp = netdev_priv(dev); + + return (addr - (u16)lp->rra_laddr) / (SIZEOF_SONIC_RR * + SONIC_BUS_SCALE(lp->dma_bitmode)); +} + static const char version[] = "sonic.c:v0.92 20.9.98 tsbogend@alpha.franken.de\n"; -- cgit From 3f4b7e6a2be982fd8820a2b54d46dd9c351db899 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Quiesce SONIC before re-initializing descriptor memory Make sure the SONIC's DMA engine is idle before altering the transmit and receive descriptors. Add a helper for this as it will be needed again. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 25 +++++++++++++++++++++++++ drivers/net/ethernet/natsemi/sonic.h | 3 +++ 2 files changed, 28 insertions(+) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 26848acec63d..42a6b87d8886 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -116,6 +116,24 @@ static int sonic_open(struct net_device *dev) return 0; } +/* Wait for the SONIC to become idle. */ +static void sonic_quiesce(struct net_device *dev, u16 mask) +{ + struct sonic_local * __maybe_unused lp = netdev_priv(dev); + int i; + u16 bits; + + for (i = 0; i < 1000; ++i) { + bits = SONIC_READ(SONIC_CMD) & mask; + if (!bits) + return; + if (irqs_disabled() || in_interrupt()) + udelay(20); + else + usleep_range(100, 200); + } + WARN_ONCE(1, "command deadline expired! 0x%04x\n", bits); +} /* * Close the SONIC device @@ -132,6 +150,9 @@ static int sonic_close(struct net_device *dev) /* * stop the SONIC, disable interrupts */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -171,6 +192,9 @@ static void sonic_tx_timeout(struct net_device *dev) * put the Sonic into software-reset mode and * disable all interrupts before releasing DMA buffers */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -658,6 +682,7 @@ static int sonic_init(struct net_device *dev) */ SONIC_WRITE(SONIC_CMD, 0); SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); /* * initialize the receive resource area diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index cc2f7b4b77e3..1df6d2f06cc4 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -110,6 +110,9 @@ #define SONIC_CR_TXP 0x0002 #define SONIC_CR_HTX 0x0001 +#define SONIC_CR_ALL (SONIC_CR_LCAM | SONIC_CR_RRRA | \ + SONIC_CR_RXEN | SONIC_CR_TXP) + /* * SONIC data configuration bits */ -- cgit From 27e0c31c5f27c1d1a1d9d135c123069f60dcf97b Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Fix command register usage There are several issues relating to command register usage during chip initialization. Firstly, the SONIC sometimes comes out of software reset with the Start Timer bit set. This gets logged as, macsonic macsonic eth0: sonic_init: status=24, i=101 Avoid this by giving the Stop Timer command earlier than later. Secondly, the loop that waits for the Read RRA command to complete has the break condition inverted. That's why the for loop iterates until its termination condition. Call the helper for this instead. Finally, give the Receiver Enable command after clearing interrupts, not before, to avoid the possibility of losing an interrupt. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 42a6b87d8886..914cec2ccc28 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -664,7 +664,6 @@ static void sonic_multicast_list(struct net_device *dev) */ static int sonic_init(struct net_device *dev) { - unsigned int cmd; struct sonic_local *lp = netdev_priv(dev); int i; @@ -681,7 +680,7 @@ static int sonic_init(struct net_device *dev) * enable interrupts, then completely initialize the SONIC */ SONIC_WRITE(SONIC_CMD, 0); - SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS | SONIC_CR_STP); sonic_quiesce(dev, SONIC_CR_ALL); /* @@ -711,14 +710,7 @@ static int sonic_init(struct net_device *dev) netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__); SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA); - i = 0; - while (i++ < 100) { - if (SONIC_READ(SONIC_CMD) & SONIC_CR_RRRA) - break; - } - - netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__, - SONIC_READ(SONIC_CMD), i); + sonic_quiesce(dev, SONIC_CR_RRRA); /* * Initialize the receive descriptors so that they @@ -806,15 +798,11 @@ static int sonic_init(struct net_device *dev) * enable receiver, disable loopback * and enable all interrupts */ - SONIC_WRITE(SONIC_CMD, SONIC_CR_RXEN | SONIC_CR_STP); SONIC_WRITE(SONIC_RCR, SONIC_RCR_DEFAULT); SONIC_WRITE(SONIC_TCR, SONIC_TCR_DEFAULT); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_IMR, SONIC_IMR_DEFAULT); - - cmd = SONIC_READ(SONIC_CMD); - if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0) - printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXEN); netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__, SONIC_READ(SONIC_CMD)); -- cgit From 772f66421d5aa0b9f256056f513bbc38ac132271 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Fix CAM initialization Section 4.3.1 of the datasheet says, This bit [TXP] must not be set if a Load CAM operation is in progress (LCAM is set). The SONIC will lock up if both bits are set simultaneously. Testing has shown that the driver sometimes attempts to set LCAM while TXP is set. Avoid this by waiting for command completion before and after giving the LCAM command. After issuing the Load CAM command, poll for !SONIC_CR_LCAM rather than SONIC_INT_LCD, because the SONIC_CR_TXP bit can't be used until !SONIC_CR_LCAM. When in reset mode, take the opportunity to reset the CAM Enable register. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 914cec2ccc28..27b6f6585527 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -634,6 +634,8 @@ static void sonic_multicast_list(struct net_device *dev) (netdev_mc_count(dev) > 15)) { rcr |= SONIC_RCR_AMC; } else { + unsigned long flags; + netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__, netdev_mc_count(dev)); sonic_set_cam_enable(dev, 1); /* always enable our own address */ @@ -647,9 +649,14 @@ static void sonic_multicast_list(struct net_device *dev) i++; } SONIC_WRITE(SONIC_CDC, 16); - /* issue Load CAM command */ SONIC_WRITE(SONIC_CDP, lp->cda_laddr & 0xffff); + + /* LCAM and TXP commands can't be used simultaneously */ + spin_lock_irqsave(&lp->lock, flags); + sonic_quiesce(dev, SONIC_CR_TXP); SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM); + sonic_quiesce(dev, SONIC_CR_LCAM); + spin_unlock_irqrestore(&lp->lock, flags); } } @@ -675,6 +682,9 @@ static int sonic_init(struct net_device *dev) SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); + /* While in reset mode, clear CAM Enable register */ + SONIC_WRITE(SONIC_CE, 0); + /* * clear software reset flag, disable receiver, clear and * enable interrupts, then completely initialize the SONIC @@ -785,14 +795,7 @@ static int sonic_init(struct net_device *dev) * load the CAM */ SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM); - - i = 0; - while (i++ < 100) { - if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD) - break; - } - netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__, - SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i); + sonic_quiesce(dev, SONIC_CR_LCAM); /* * enable receiver, disable loopback -- cgit From 686f85d71d095f1d26b807e23b0f0bfd22042c45 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: net/sonic: Prevent tx watchdog timeout Section 5.5.3.2 of the datasheet says, If FIFO Underrun, Byte Count Mismatch, Excessive Collision, or Excessive Deferral (if enabled) errors occur, transmission ceases. In this situation, the chip asserts a TXER interrupt rather than TXDN. But the handler for the TXDN is the only way that the transmit queue gets restarted. Hence, an aborted transmission can result in a watchdog timeout. This problem can be reproduced on congested link, as that can result in excessive transmitter collisions. Another way to reproduce this is with a FIFO Underrun, which may be caused by DMA latency. In event of a TXER interrupt, prevent a watchdog timeout by restarting transmission. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 27b6f6585527..05e760444a92 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -415,10 +415,19 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) lp->stats.rx_missed_errors += 65536; /* transmit error */ - if (status & SONIC_INT_TXER) - if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) - netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n", - __func__); + if (status & SONIC_INT_TXER) { + u16 tcr = SONIC_READ(SONIC_TCR); + + netif_dbg(lp, tx_err, dev, "%s: TXER intr, TCR %04x\n", + __func__, tcr); + + if (tcr & (SONIC_TCR_EXD | SONIC_TCR_EXC | + SONIC_TCR_FU | SONIC_TCR_BCM)) { + /* Aborted transmission. Try again. */ + netif_stop_queue(dev); + SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); + } + } /* bus retry */ if (status & SONIC_INT_BR) { -- cgit From 61678d28d4a45ef376f5d02a839cc37509ae9281 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Jan 2020 15:42:02 -0800 Subject: net_sched: fix datalen for ematch syzbot reported an out-of-bound access in em_nbyte. As initially analyzed by Eric, this is because em_nbyte sets its own em->datalen in em_nbyte_change() other than the one specified by user, but this value gets overwritten later by its caller tcf_em_validate(). We should leave em->datalen untouched to respect their choices. I audit all the in-tree ematch users, all of those implement ->change() set em->datalen, so we can just avoid setting it twice in this case. Reported-and-tested-by: syzbot+5af9a90dad568aa9f611@syzkaller.appspotmail.com Reported-by: syzbot+2f07903a5b05e7f36410@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/ematch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 8f2ad706784d..d0140a92694a 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -263,12 +263,12 @@ static int tcf_em_validate(struct tcf_proto *tp, } em->data = (unsigned long) v; } + em->datalen = data_len; } } em->matchid = em_hdr->matchid; em->flags = em_hdr->flags; - em->datalen = data_len; em->net = net; err = 0; -- cgit From 959b1825d9ab5081f7446ed3f4329384c7818af7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jan 2020 00:14:29 +0000 Subject: i40e: fix spelling mistake "to" -> "too" There is a spelling mistake in a hw_dbg message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index d4055037af89..45b90eb11adb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1113,7 +1113,7 @@ i40e_status i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num, */ pba_size--; if (pba_num_size < (((u32)pba_size * 2) + 1)) { - hw_dbg(hw, "Buffer to small for PBA data.\n"); + hw_dbg(hw, "Buffer too small for PBA data.\n"); return I40E_ERR_PARAM; } -- cgit From 971485a0d681954677ec6a7ed990359aceabc06e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jan 2020 00:43:28 +0000 Subject: ipvs: fix spelling mistake "to" -> "too" There is a spelling mistake in a IP_VS_ERR_RL message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 8dc892a9dc91..605e0f68f8bd 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1239,7 +1239,7 @@ static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer, p = msg_end; if (p + sizeof(s->v4) > buffer+buflen) { - IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); + IP_VS_ERR_RL("BACKUP, Dropping buffer, too small\n"); return; } s = (union ip_vs_sync_conn *)p; -- cgit From 43d88774d138496c88920e2bb012f5fd098ee6ad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jan 2020 00:45:55 +0000 Subject: caif_usb: fix spelling mistake "to" -> "too" There is a spelling mistake in a pr_warn message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/caif/caif_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 76bd67891fb3..a0116b9503d9 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -62,7 +62,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt) hpad = (info->hdr_len + CFUSB_PAD_DESCR_SZ) & (CFUSB_ALIGNMENT - 1); if (skb_headroom(skb) < ETH_HLEN + CFUSB_PAD_DESCR_SZ + hpad) { - pr_warn("Headroom to small\n"); + pr_warn("Headroom too small\n"); kfree_skb(skb); return -EIO; } -- cgit From 4d299f183314aa79131abdfa2c528f8807010ea8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jan 2020 09:27:30 +0000 Subject: net/rose: fix spelling mistake "to" -> "too" There is a spelling mistake in a printk message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/rose/af_rose.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 46b8ff24020d..1e8eeb044b07 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1475,7 +1475,7 @@ static int __init rose_proto_init(void) int rc; if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { - printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); + printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter too large\n"); rc = -EINVAL; goto out; } -- cgit From 2bec445f9bf35e52e395b971df48d3e1e5dc704a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jan 2020 21:03:00 -0800 Subject: tcp: do not leave dangling pointers in tp->highest_sack Latest commit 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") apparently allowed syzbot to trigger various crashes in TCP stack [1] I believe this commit only made things easier for syzbot to find its way into triggering use-after-frees. But really the bugs could lead to bad TCP behavior or even plain crashes even for non malicious peers. I have audited all calls to tcp_rtx_queue_unlink() and tcp_rtx_queue_unlink_and_free() and made sure tp->highest_sack would be updated if we are removing from rtx queue the skb that tp->highest_sack points to. These updates were missing in three locations : 1) tcp_clean_rtx_queue() [This one seems quite serious, I have no idea why this was not caught earlier] 2) tcp_rtx_queue_purge() [Probably not a big deal for normal operations] 3) tcp_send_synack() [Probably not a big deal for normal operations] [1] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1864 [inline] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1856 [inline] BUG: KASAN: use-after-free in tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 Read of size 4 at addr ffff8880a488d068 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:134 tcp_highest_sack_seq include/net/tcp.h:1864 [inline] tcp_highest_sack_seq include/net/tcp.h:1856 [inline] tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 tcp_try_undo_partial net/ipv4/tcp_input.c:2730 [inline] tcp_fastretrans_alert+0xf74/0x23f0 net/ipv4/tcp_input.c:2847 tcp_ack+0x2577/0x5bf0 net/ipv4/tcp_input.c:3710 tcp_rcv_established+0x6dd/0x1e90 net/ipv4/tcp_input.c:5706 tcp_v4_do_rcv+0x619/0x8d0 net/ipv4/tcp_ipv4.c:1619 tcp_v4_rcv+0x307f/0x3b40 net/ipv4/tcp_ipv4.c:2001 ip_protocol_deliver_rcu+0x5a/0x880 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x23b/0x380 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x1db/0x2f0 net/ipv4/ip_input.c:428 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:538 __netif_receive_skb_one_core+0x113/0x1a0 net/core/dev.c:5148 __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5262 process_backlog+0x206/0x750 net/core/dev.c:6093 napi_poll net/core/dev.c:6530 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6598 __do_softirq+0x262/0x98c kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:603 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:595 smpboot_thread_fn+0x6a3/0xa40 kernel/smpboot.c:165 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Allocated by task 10091: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:513 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:521 slab_post_alloc_hook mm/slab.h:584 [inline] slab_alloc_node mm/slab.c:3263 [inline] kmem_cache_alloc_node+0x138/0x740 mm/slab.c:3575 __alloc_skb+0xd5/0x5e0 net/core/skbuff.c:198 alloc_skb_fclone include/linux/skbuff.h:1099 [inline] sk_stream_alloc_skb net/ipv4/tcp.c:875 [inline] sk_stream_alloc_skb+0x113/0xc90 net/ipv4/tcp.c:852 tcp_sendmsg_locked+0xcf9/0x3470 net/ipv4/tcp.c:1282 tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1432 inet_sendmsg+0x9e/0xe0 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 __sys_sendto+0x262/0x380 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto net/socket.c:2006 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:2006 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 10095: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:335 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 __cache_free mm/slab.c:3426 [inline] kmem_cache_free+0x86/0x320 mm/slab.c:3694 kfree_skbmem+0x178/0x1c0 net/core/skbuff.c:645 __kfree_skb+0x1e/0x30 net/core/skbuff.c:681 sk_eat_skb include/net/sock.h:2453 [inline] tcp_recvmsg+0x1252/0x2930 net/ipv4/tcp.c:2166 inet_recvmsg+0x136/0x610 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:886 [inline] sock_recvmsg net/socket.c:904 [inline] sock_recvmsg+0xce/0x110 net/socket.c:900 __sys_recvfrom+0x1ff/0x350 net/socket.c:2055 __do_sys_recvfrom net/socket.c:2073 [inline] __se_sys_recvfrom net/socket.c:2069 [inline] __x64_sys_recvfrom+0xe1/0x1a0 net/socket.c:2069 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8880a488d040 which belongs to the cache skbuff_fclone_cache of size 456 The buggy address is located 40 bytes inside of 456-byte region [ffff8880a488d040, ffff8880a488d208) The buggy address belongs to the page: page:ffffea0002922340 refcount:1 mapcount:0 mapping:ffff88821b057000 index:0x0 raw: 00fffe0000000200 ffffea00022a5788 ffffea0002624a48 ffff88821b057000 raw: 0000000000000000 ffff8880a488d040 0000000100000006 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880a488cf00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880a488cf80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880a488d000: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8880a488d080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a488d100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") Fixes: 50895b9de1d3 ("tcp: highest_sack fix") Fixes: 737ff314563c ("tcp: use sequence distance to detect reordering") Signed-off-by: Eric Dumazet Cc: Cambda Zhu Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_output.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 04273d6aa36b..a7d766e6390e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2524,6 +2524,7 @@ static void tcp_rtx_queue_purge(struct sock *sk) { struct rb_node *p = rb_first(&sk->tcp_rtx_queue); + tcp_sk(sk)->highest_sack = NULL; while (p) { struct sk_buff *skb = rb_to_skb(p); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5347ab2c9c58..2a976f57f7e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3164,6 +3164,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->retransmit_skb_hint = NULL; if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; + tcp_highest_sack_replace(sk, skb, next); tcp_rtx_queue_unlink_and_free(skb, sk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 58c92a7d671c..b62b59b18db9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3232,6 +3232,7 @@ int tcp_send_synack(struct sock *sk) if (!nskb) return -ENOMEM; INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); + tcp_highest_sack_replace(sk, skb, nskb); tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); -- cgit From 70a87287c821e9721b62463777f55ba588ac4623 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:08 +0300 Subject: seq_tab_next() should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index aca9f7a20a2a..4144c230dc97 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -70,8 +70,7 @@ static void *seq_tab_start(struct seq_file *seq, loff_t *pos) static void *seq_tab_next(struct seq_file *seq, void *v, loff_t *pos) { v = seq_tab_get_idx(seq->private, *pos + 1); - if (v) - ++*pos; + ++(*pos); return v; } -- cgit From 66018a102f7756cf72db4d2704e1b93969d9d332 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:13 +0300 Subject: l2t_seq_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index e9e45006632d..1a16449e9deb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -678,8 +678,7 @@ static void *l2t_seq_start(struct seq_file *seq, loff_t *pos) static void *l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = l2t_get_idx(seq, *pos); - if (v) - ++*pos; + ++(*pos); return v; } -- cgit From 8bf7092021f283944f0c5f4c364853201c45c611 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:20 +0300 Subject: vcc_seq_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/atm/proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/atm/proc.c b/net/atm/proc.c index d79221fd4dae..c31896707313 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -134,8 +134,7 @@ static void vcc_seq_stop(struct seq_file *seq, void *v) static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = vcc_walk(seq, 1); - if (v) - (*pos)++; + (*pos)++; return v; } -- cgit From 1e3f9f073c47bee7c23e77316b07bc12338c5bba Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:28 +0300 Subject: neigh_stat_seq_next() should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 920784a9b7ff..789a73aa7bd8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3290,6 +3290,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu+1; return per_cpu_ptr(tbl->stats, cpu); } + (*pos)++; return NULL; } -- cgit From a3ea86739f1bc7e121d921842f0f4a8ab1af94d9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:35 +0300 Subject: rt_cpu_seq_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 87e979f2b74a..e356ea779227 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -271,6 +271,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu+1; return &per_cpu(rt_cache_stat, cpu); } + (*pos)++; return NULL; } -- cgit From 4fc427e0515811250647d44de38d87d7b0e0790f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:12:06 +0300 Subject: ipv6_route_seq_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7bae6a91b487..cfae0a1529a1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2495,14 +2495,13 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; + ++(*pos); if (!v) goto iter_table; n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next); - if (n) { - ++*pos; + if (n) return n; - } iter_table: ipv6_route_check_sernum(iter); @@ -2510,8 +2509,6 @@ iter_table: r = fib6_walk_continue(&iter->w); spin_unlock_bh(&iter->tbl->tb6_lock); if (r > 0) { - if (v) - ++*pos; return iter->w.leaf; } else if (r < 0) { fib6_walker_unlink(net, &iter->w); -- cgit From ab658b9fa7a2c467f79eac8b53ea308b8f98113d Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Sat, 18 Jan 2020 13:10:50 +0100 Subject: netfilter: conntrack: sctp: use distinct states for new SCTP connections The netlink notifications triggered by the INIT and INIT_ACK chunks for a tracked SCTP association do not include protocol information for the corresponding connection - SCTP state and verification tags for the original and reply direction are missing. Since the connection tracking implementation allows user space programs to receive notifications about a connection and then create a new connection based on the values received in a notification, it makes sense that INIT and INIT_ACK notifications should contain the SCTP state and verification tags available at the time when a notification is sent. The missing verification tags cause a newly created netfilter connection to fail to verify the tags of SCTP packets when this connection has been created from the values previously received in an INIT or INIT_ACK notification. A PROTOINFO event is cached in sctp_packet() when the state of a connection changes. The CLOSED and COOKIE_WAIT state will be used for connections that have seen an INIT and INIT_ACK chunk, respectively. The distinct states will cause a connection state change in sctp_packet(). Signed-off-by: Jiri Wiesner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 0399ae8f1188..4f897b14b606 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -114,7 +114,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, +/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, @@ -130,7 +130,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* REPLY */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, +/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, @@ -316,7 +316,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; } - ct->proto.sctp.state = new_state; + ct->proto.sctp.state = SCTP_CONNTRACK_NONE; } return true; -- cgit From c83de17dd6308fb74696923e5245de0e3c427206 Mon Sep 17 00:00:00 2001 From: wenxu Date: Sun, 19 Jan 2020 13:18:30 +0800 Subject: netfilter: nf_tables_offload: fix check the chain offload flag In the nft_indr_block_cb the chain should check the flag with NFT_CHAIN_HW_OFFLOAD. Fixes: 9a32669fecfb ("netfilter: nf_tables_offload: support indr block call") Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index a9ea29afb09f..2bb28483af22 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -564,7 +564,7 @@ static void nft_indr_block_cb(struct net_device *dev, mutex_lock(&net->nft.commit_mutex); chain = __nft_offload_get_chain(dev); - if (chain) { + if (chain && chain->flags & NFT_CHAIN_HW_OFFLOAD) { struct nft_base_chain *basechain; basechain = nft_base_chain(chain); -- cgit From 826035498ec14b77b62a44f0cb6b94d45530db6f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jan 2020 16:07:00 +0100 Subject: netfilter: nf_tables: add __nft_chain_type_get() This new helper function validates that unknown family and chain type coming from userspace do not trigger an out-of-bound array access. Bail out in case __nft_chain_type_get() returns NULL from nft_chain_parse_hook(). Fixes: 9370761c56b6 ("netfilter: nf_tables: convert built-in tables/chains to chain types") Reported-by: syzbot+156a04714799b1d480bc@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65f51a2e9c2a..4aa01c1253b1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -552,15 +552,28 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; +static const struct nft_chain_type * +__nft_chain_type_get(u8 family, enum nft_chain_types type) +{ + if (family >= NFPROTO_NUMPROTO || + type >= NFT_CHAIN_T_MAX) + return NULL; + + return chain_type[family][type]; +} + static const struct nft_chain_type * __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) { + const struct nft_chain_type *type; int i; for (i = 0; i < NFT_CHAIN_T_MAX; i++) { - if (chain_type[family][i] != NULL && - !nla_strcmp(nla, chain_type[family][i]->name)) - return chain_type[family][i]; + type = __nft_chain_type_get(family, i); + if (!type) + continue; + if (!nla_strcmp(nla, type->name)) + return type; } return NULL; } @@ -1162,11 +1175,8 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) void nft_register_chain_type(const struct nft_chain_type *ctype) { - if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO)) - return; - nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) { + if (WARN_ON(__nft_chain_type_get(ctype->family, ctype->type))) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); return; } @@ -1768,7 +1778,10 @@ static int nft_chain_parse_hook(struct net *net, hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - type = chain_type[family][NFT_CHAIN_T_DEFAULT]; + type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT); + if (!type) + return -EOPNOTSUPP; + if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], family, autoload); -- cgit From eb014de4fd418de1a277913cba244e47274fe392 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jan 2020 16:48:03 +0100 Subject: netfilter: nf_tables: autoload modules from the abort path This patch introduces a list of pending module requests. This new module list is composed of nft_module_request objects that contain the module name and one status field that tells if the module has been already loaded (the 'done' field). In the first pass, from the preparation phase, the netlink command finds that a module is missing on this list. Then, a module request is allocated and added to this list and nft_request_module() returns -EAGAIN. This triggers the abort path with the autoload parameter set on from nfnetlink, request_module() is called and the module request enters the 'done' state. Since the mutex is released when loading modules from the abort phase, the module list is zapped so this is iteration occurs over a local list. Therefore, the request_module() calls happen when object lists are in consistent state (after fulling aborting the transaction) and the commit list is empty. On the second pass, the netlink command will find that it already tried to load the module, so it does not request it again and nft_request_module() returns 0. Then, there is a look up to find the object that the command was missing. If the module was successfully loaded, the command proceeds normally since it finds the missing object in place, otherwise -ENOENT is reported to userspace. This patch also updates nfnetlink to include the reason to enter the abort phase, which is required for this new autoload module rationale. Fixes: ec7470b834fe ("netfilter: nf_tables: store transaction list locally while requesting module") Reported-by: syzbot+29125d208b3dae9a7019@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 2 +- include/net/netns/nftables.h | 1 + net/netfilter/nf_tables_api.c | 126 ++++++++++++++++++++++++------------ net/netfilter/nfnetlink.c | 6 +- 4 files changed, 91 insertions(+), 44 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index cf09ab37b45b..851425c3178f 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -31,7 +31,7 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); - int (*abort)(struct net *net, struct sk_buff *skb); + int (*abort)(struct net *net, struct sk_buff *skb, bool autoload); void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 286fd960896f..a1a8d45adb42 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,6 +7,7 @@ struct netns_nftables { struct list_head tables; struct list_head commit_list; + struct list_head module_list; struct mutex commit_mutex; unsigned int base_seq; u8 gencursor; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4aa01c1253b1..7e63b481cc86 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -578,35 +578,45 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) return NULL; } -/* - * Loading a module requires dropping mutex that guards the transaction. - * A different client might race to start a new transaction meanwhile. Zap the - * list of pending transaction and then restore it once the mutex is grabbed - * again. Users of this function return EAGAIN which implicitly triggers the - * transaction abort path to clean up the list of pending transactions. - */ +struct nft_module_request { + struct list_head list; + char module[MODULE_NAME_LEN]; + bool done; +}; + #ifdef CONFIG_MODULES -static void nft_request_module(struct net *net, const char *fmt, ...) +static int nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; - LIST_HEAD(commit_list); + struct nft_module_request *req; va_list args; int ret; - list_splice_init(&net->nft.commit_list, &commit_list); - va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); if (ret >= MODULE_NAME_LEN) - return; + return 0; - mutex_unlock(&net->nft.commit_mutex); - request_module("%s", module_name); - mutex_lock(&net->nft.commit_mutex); + list_for_each_entry(req, &net->nft.module_list, list) { + if (!strcmp(req->module, module_name)) { + if (req->done) + return 0; - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); - list_splice(&commit_list, &net->nft.commit_list); + /* A request to load this module already exists. */ + return -EAGAIN; + } + } + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->done = false; + strlcpy(req->module, module_name, MODULE_NAME_LEN); + list_add_tail(&req->list, &net->nft.module_list); + + return -EAGAIN; } #endif @@ -630,10 +640,9 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (autoload) { - nft_request_module(net, "nft-chain-%u-%.*s", family, - nla_len(nla), (const char *)nla_data(nla)); - type = __nf_tables_chain_type_lookup(nla, family); - if (type != NULL) + if (nft_request_module(net, "nft-chain-%u-%.*s", family, + nla_len(nla), + (const char *)nla_data(nla)) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif @@ -2341,9 +2350,8 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, static int nft_expr_type_request_module(struct net *net, u8 family, struct nlattr *nla) { - nft_request_module(net, "nft-expr-%u-%.*s", family, - nla_len(nla), (char *)nla_data(nla)); - if (__nft_expr_type_get(family, nla)) + if (nft_request_module(net, "nft-expr-%u-%.*s", family, + nla_len(nla), (char *)nla_data(nla)) == -EAGAIN) return -EAGAIN; return 0; @@ -2369,9 +2377,9 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nft_expr_type_request_module(net, family, nla) == -EAGAIN) return ERR_PTR(-EAGAIN); - nft_request_module(net, "nft-expr-%.*s", - nla_len(nla), (char *)nla_data(nla)); - if (__nft_expr_type_get(family, nla)) + if (nft_request_module(net, "nft-expr-%.*s", + nla_len(nla), + (char *)nla_data(nla)) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif @@ -2462,9 +2470,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); #ifdef CONFIG_MODULES if (err == -EAGAIN) - nft_expr_type_request_module(ctx->net, - ctx->family, - tb[NFTA_EXPR_NAME]); + if (nft_expr_type_request_module(ctx->net, + ctx->family, + tb[NFTA_EXPR_NAME]) != -EAGAIN) + err = -ENOENT; #endif goto err1; } @@ -3301,8 +3310,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (list_empty(&nf_tables_set_types)) { - nft_request_module(ctx->net, "nft-set"); - if (!list_empty(&nf_tables_set_types)) + if (nft_request_module(ctx->net, "nft-set") == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif @@ -5428,8 +5436,7 @@ nft_obj_type_get(struct net *net, u32 objtype) lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (type == NULL) { - nft_request_module(net, "nft-obj-%u", objtype); - if (__nft_obj_type_get(objtype)) + if (nft_request_module(net, "nft-obj-%u", objtype) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif @@ -6002,8 +6009,7 @@ nft_flowtable_type_get(struct net *net, u8 family) lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (type == NULL) { - nft_request_module(net, "nf-flowtable-%u", family); - if (__nft_flowtable_type_get(family)) + if (nft_request_module(net, "nf-flowtable-%u", family) == -EAGAIN) return ERR_PTR(-EAGAIN); } #endif @@ -7005,6 +7011,18 @@ static void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } +static void nf_tables_module_autoload_cleanup(struct net *net) +{ + struct nft_module_request *req, *next; + + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); + list_for_each_entry_safe(req, next, &net->nft.module_list, list) { + WARN_ON_ONCE(!req->done); + list_del(&req->list); + kfree(req); + } +} + static void nf_tables_commit_release(struct net *net) { struct nft_trans *trans; @@ -7017,6 +7035,7 @@ static void nf_tables_commit_release(struct net *net) * to prevent expensive synchronize_rcu() in commit phase. */ if (list_empty(&net->nft.commit_list)) { + nf_tables_module_autoload_cleanup(net); mutex_unlock(&net->nft.commit_mutex); return; } @@ -7031,6 +7050,7 @@ static void nf_tables_commit_release(struct net *net) list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list); spin_unlock(&nf_tables_destroy_list_lock); + nf_tables_module_autoload_cleanup(net); mutex_unlock(&net->nft.commit_mutex); schedule_work(&trans_destroy_work); @@ -7222,6 +7242,26 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } +static void nf_tables_module_autoload(struct net *net) +{ + struct nft_module_request *req, *next; + LIST_HEAD(module_list); + + list_splice_init(&net->nft.module_list, &module_list); + mutex_unlock(&net->nft.commit_mutex); + list_for_each_entry_safe(req, next, &module_list, list) { + if (req->done) { + list_del(&req->list); + kfree(req); + } else { + request_module("%s", req->module); + req->done = true; + } + } + mutex_lock(&net->nft.commit_mutex); + list_splice(&module_list, &net->nft.module_list); +} + static void nf_tables_abort_release(struct nft_trans *trans) { switch (trans->msg_type) { @@ -7251,7 +7291,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } -static int __nf_tables_abort(struct net *net) +static int __nf_tables_abort(struct net *net, bool autoload) { struct nft_trans *trans, *next; struct nft_trans_elem *te; @@ -7373,6 +7413,11 @@ static int __nf_tables_abort(struct net *net) nf_tables_abort_release(trans); } + if (autoload) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + return 0; } @@ -7381,9 +7426,9 @@ static void nf_tables_cleanup(struct net *net) nft_validate_state_update(net, NFT_VALIDATE_SKIP); } -static int nf_tables_abort(struct net *net, struct sk_buff *skb) +static int nf_tables_abort(struct net *net, struct sk_buff *skb, bool autoload) { - int ret = __nf_tables_abort(net); + int ret = __nf_tables_abort(net, autoload); mutex_unlock(&net->nft.commit_mutex); @@ -7978,6 +8023,7 @@ static int __net_init nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); + INIT_LIST_HEAD(&net->nft.module_list); mutex_init(&net->nft.commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; @@ -7989,7 +8035,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) { mutex_lock(&net->nft.commit_mutex); if (!list_empty(&net->nft.commit_list)) - __nf_tables_abort(net); + __nf_tables_abort(net, false); __nft_release_tables(net); mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4abbb452cf6c..99127e2d95a8 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -476,7 +476,7 @@ ack: } done: if (status & NFNL_BATCH_REPLAY) { - ss->abort(net, oskb); + ss->abort(net, oskb, true); nfnl_err_reset(&err_list); kfree_skb(skb); module_put(ss->owner); @@ -487,11 +487,11 @@ done: status |= NFNL_BATCH_REPLAY; goto done; } else if (err) { - ss->abort(net, oskb); + ss->abort(net, oskb, false); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } } else { - ss->abort(net, oskb); + ss->abort(net, oskb, false); } if (ss->cleanup) ss->cleanup(net); -- cgit From 189c9b1e94539b11c80636bc13e9cf47529e7bba Mon Sep 17 00:00:00 2001 From: Praveen Chaudhary Date: Thu, 23 Jan 2020 12:33:28 -0800 Subject: net: Fix skb->csum update in inet_proto_csum_replace16(). skb->csum is updated incorrectly, when manipulation for NF_NAT_MANIP_SRC\DST is done on IPV6 packet. Fix: There is no need to update skb->csum in inet_proto_csum_replace16(), because update in two fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to update skb->csum, because update in 3 fields a.) IPv4 src/dst address, b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as L4 Header checksum for skb->csum calculation. [ pablo@netfilter.org: a few comestic documentation edits ] Signed-off-by: Praveen Chaudhary Signed-off-by: Zhenggen Xu Signed-off-by: Andy Stracner Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/core/utils.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/core/utils.c b/net/core/utils.c index 6b6e51db9f3b..1f31a39236d5 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -438,6 +438,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace4); +/** + * inet_proto_csum_replace16 - update layer 4 header checksum field + * @sum: Layer 4 header checksum field + * @skb: sk_buff for the packet + * @from: old IPv6 address + * @to: new IPv6 address + * @pseudohdr: True if layer 4 header checksum includes pseudoheader + * + * Update layer 4 header as per the update in IPv6 src/dst address. + * + * There is no need to update skb->csum in this function, because update in two + * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other + * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to + * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, + * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as + * L4 Header checksum for skb->csum calculation. + */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) @@ -449,9 +466,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial(diff, sizeof(diff), - ~skb->csum); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); -- cgit From 93b8a7ecb7287cc9b0196f12a25b57c2462d11dc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 31 Dec 2019 17:04:15 +0200 Subject: net/mlx5: Fix lowest FDB pool size The pool sizes represent the pool sizes in the fw. when we request a pool size from fw, it will return the next possible group. We track how many pools the fw has left and start requesting groups from the big to the small. When we start request 4k group, which doesn't exists in fw, fw wants to allocate the next possible size, 64k, but will fail since its exhausted. The correct smallest pool size in fw is 128 and not 4k. Fixes: e52c28024008 ("net/mlx5: E-Switch, Add chains and priorities") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 243a5440867e..b8fe44ea44c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -866,7 +866,7 @@ out: */ #define ESW_SIZE (16 * 1024 * 1024) const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, - 64 * 1024, 4 * 1024 }; + 64 * 1024, 128 }; static int get_sz_from_pool(struct mlx5_eswitch *esw) -- cgit From 505a7f5478062c6cd11e22022d9f1bf64cd8eab3 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Thu, 12 Dec 2019 16:09:33 +0200 Subject: net/mlx5: Update the list of the PCI supported devices Add the upcoming ConnectX-7 device ID. Fixes: 85327a9c4150 ("net/mlx5: Update the list of the PCI supported devices") Signed-off-by: Meir Lichtinger Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index cf7b8da0f010..f554cfddcf4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1563,6 +1563,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ + { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ -- cgit From b850a82114df9b0ec1d191dc64eed1f20a772e0f Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 8 Jan 2020 14:17:32 +0200 Subject: net/mlx5: DR, Enable counter on non-fwd-dest objects The current code handles only counters that attached to dest, we still have the cases where we have counter on non-dest, like over drop etc. Fixes: 6a48faeeca10 ("net/mlx5: Add direct rule fs_cmd implementation") Signed-off-by: Hamdan Igbaria Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/steering/fs_dr.c | 42 +++++++++++++++------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 3d587d0bdbbe..1e32e2443f73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -352,26 +352,16 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { list_for_each_entry(dst, &fte->node.children, node.list) { enum mlx5_flow_destination_type type = dst->dest_attr.type; - u32 id; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { err = -ENOSPC; goto free_actions; } - switch (type) { - case MLX5_FLOW_DESTINATION_TYPE_COUNTER: - id = dst->dest_attr.counter_id; + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; - tmp_action = - mlx5dr_action_create_flow_counter(id); - if (!tmp_action) { - err = -ENOMEM; - goto free_actions; - } - fs_dr_actions[fs_dr_num_actions++] = tmp_action; - actions[num_actions++] = tmp_action; - break; + switch (type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: tmp_action = create_ft_action(dev, dst); if (!tmp_action) { @@ -397,6 +387,32 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } } + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + list_for_each_entry(dst, &fte->node.children, node.list) { + u32 id; + + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -ENOSPC; + goto free_actions; + } + + id = dst->dest_attr.counter_id; + tmp_action = + mlx5dr_action_create_flow_counter(id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + } + params.match_sz = match_sz; params.match_buf = (u64 *)fte->val; -- cgit From e401a1848be87123a2b2049addbf21138cb47081 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 12 Jan 2020 13:43:37 +0200 Subject: net/mlx5: E-Switch, Prevent ingress rate configuration of uplink rep Since the implementation relies on limiting the VF transmit rate to simulate ingress rate limiting, and since either uplink representor or ecpf are not associated with a VF, we limit the rate limit configuration for those ports. Fixes: fcb64c0f5640 ("net/mlx5: E-Switch, add ingress rate support") Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 024e1cddfd0e..7e32b9e3667c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4036,6 +4036,13 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate, u32 rate_mbps; int err; + vport_num = rpriv->rep->vport; + if (vport_num >= MLX5_VPORT_ECPF) { + NL_SET_ERR_MSG_MOD(extack, + "Ingress rate limit is supported only for Eswitch ports connected to VFs"); + return -EOPNOTSUPP; + } + esw = priv->mdev->priv.eswitch; /* rate is given in bytes/sec. * First convert to bits/sec and then round to the nearest mbit/secs. @@ -4044,8 +4051,6 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate, * 1 mbit/sec. */ rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; - vport_num = rpriv->rep->vport; - err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); if (err) NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); -- cgit From c0702a4bd41829f05638ec2dab70f6bb8d8010ce Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 12 Jan 2020 08:57:59 +0200 Subject: net/mlx5: DR, use non preemptible call to get the current cpu number Use raw_smp_processor_id instead of smp_processor_id() otherwise we will get the following trace in debug-kernel: BUG: using smp_processor_id() in preemptible [00000000] code: devlink caller is dr_create_cq.constprop.2+0x31d/0x970 [mlx5_core] Call Trace: dump_stack+0x9a/0xf0 debug_smp_processor_id+0x1f3/0x200 dr_create_cq.constprop.2+0x31d/0x970 genl_family_rcv_msg+0x5fd/0x1170 genl_rcv_msg+0xb8/0x160 netlink_rcv_skb+0x11e/0x340 Fixes: 297cccebdc5a ("net/mlx5: DR, Expose an internal API to issue RDMA operations") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 51803eef13dd..c7f10d4f8f8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies. */ +#include #include "dr_types.h" #define QUEUE_SIZE 128 @@ -729,7 +730,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, if (!in) goto err_cqwq; - vector = smp_processor_id() % mlx5_comp_vectors_count(mdev); + vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); if (err) { kvfree(in); -- cgit From 3b83b6c2e024d85b770ddb1e19a513b5d7587f6f Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Mon, 13 Jan 2020 13:46:13 +0200 Subject: net/mlx5e: Clear VF config when switching modes Currently VF in LEGACY mode are not able to go up. Also in OFFLOADS mode, when switching to it first time, VF can go up independently to his representor, which is not expected. Perform clearing of VF config when switching modes and set link state to AUTO as default value. Also, when switching to OFFLOADS mode set link state to DOWN, which allow VF link state to be controlled by its REP. Fixes: 1ab2068a4c66 ("net/mlx5: Implement vports admin state backup/restore") Fixes: 556b9d16d3f5 ("net/mlx5: Clear VF's configuration on disabling SRIOV") Signed-off-by: Dmytro Linkin Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2c965ad0d744..3df3604e8929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1928,8 +1928,10 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } } /* Public E-Switch API */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b8fe44ea44c3..3e6412783078 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1377,7 +1377,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, false); + mlx5_eswitch_disable(esw, true); mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { @@ -2220,7 +2220,8 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type int esw_offloads_enable(struct mlx5_eswitch *esw) { - int err; + struct mlx5_vport *vport; + int err, i; if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap)) @@ -2237,6 +2238,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; + /* Representor will control the vport link state */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); if (err) goto err_vports; @@ -2266,7 +2271,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, false); + mlx5_eswitch_disable(esw, true); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); -- cgit From ffbd9ca94e2ebbfe802d4b28bab5ba19818de853 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 12 Jan 2020 16:22:14 +0200 Subject: net/mlx5e: kTLS, Fix corner-case checks in TX resync flow There are the following cases: 1. Packet ends before start marker: bypass offload. 2. Packet starts before start marker and ends after it: drop, not supported, breaks contract with kernel. 3. packet ends before tls record info starts: drop, this packet was already acknowledged and its record info was released. Add the above as comment in code. Mind possible wraparounds of the TCP seq, replace the simple comparison with a call to the TCP before() method. In addition, remove logic that handles negative sync_len values, as it became impossible. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Fixes: 46a3ea98074e ("net/mlx5e: kTLS, Enhance TX resync flow") Signed-off-by: Tariq Toukan Signed-off-by: Boris Pismenny Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 33 +++++++++++++--------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 778dab1af8fc..8dbb92176bd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -180,7 +180,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, struct tx_sync_info { u64 rcd_sn; - s32 sync_len; + u32 sync_len; int nr_frags; skb_frag_t frags[MAX_SKB_FRAGS]; }; @@ -193,13 +193,14 @@ enum mlx5e_ktls_sync_retval { static enum mlx5e_ktls_sync_retval tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, - u32 tcp_seq, struct tx_sync_info *info) + u32 tcp_seq, int datalen, struct tx_sync_info *info) { struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE; struct tls_record_info *record; int remaining, i = 0; unsigned long flags; + bool ends_before; spin_lock_irqsave(&tx_ctx->lock, flags); record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); @@ -209,9 +210,21 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, goto out; } - if (unlikely(tcp_seq < tls_record_start_seq(record))) { - ret = tls_record_is_start_marker(record) ? - MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; + /* There are the following cases: + * 1. packet ends before start marker: bypass offload. + * 2. packet starts before start marker and ends after it: drop, + * not supported, breaks contract with kernel. + * 3. packet ends before tls record info starts: drop, + * this packet was already acknowledged and its record info + * was released. + */ + ends_before = before(tcp_seq + datalen, tls_record_start_seq(record)); + + if (unlikely(tls_record_is_start_marker(record))) { + ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; + goto out; + } else if (ends_before) { + ret = MLX5E_KTLS_SYNC_FAIL; goto out; } @@ -337,7 +350,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, u8 num_wqebbs; int i = 0; - ret = tx_sync_info_get(priv_tx, seq, &info); + ret = tx_sync_info_get(priv_tx, seq, datalen, &info); if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { stats->tls_skip_no_sync_data++; @@ -351,14 +364,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, goto err_out; } - if (unlikely(info.sync_len < 0)) { - if (likely(datalen <= -info.sync_len)) - return MLX5E_KTLS_SYNC_DONE; - - stats->tls_drop_bypass_req++; - goto err_out; - } - stats->tls_ooo++; tx_post_resync_params(sq, priv_tx, info.rcd_sn); -- cgit From 1e92899791358dba94a9db7cc3b6004636b5a2f6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 13 Jan 2020 14:46:09 +0200 Subject: net/mlx5e: kTLS, Remove redundant posts in TX resync flow The call to tx_post_resync_params() is done earlier in the flow, the post of the control WQEs is unnecessarily repeated. Remove it. Fixes: 700ec4974240 ("net/mlx5e: kTLS, Fix missing SQ edge fill") Signed-off-by: Tariq Toukan Signed-off-by: Boris Pismenny Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 8dbb92176bd7..592e921aa167 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -383,8 +383,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, if (unlikely(contig_wqebbs_room < num_wqebbs)) mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); - tx_post_resync_params(sq, priv_tx, info.rcd_sn); - for (; i < info.nr_frags; i++) { unsigned int orig_fsz, frag_offset = 0, n = 0; skb_frag_t *f = &info.frags[i]; -- cgit From 342508c1c7540e281fd36151c175ba5ff954a99f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 20 Jan 2020 13:42:00 +0200 Subject: net/mlx5e: kTLS, Do not send decrypted-marked SKBs via non-accel path When TCP out-of-order is identified (unexpected tcp seq mismatch), driver analyzes the packet and decides what handling should it get: 1. go to accelerated path (to be encrypted in HW), 2. go to regular xmit path (send w/o encryption), 3. drop. Packets marked with skb->decrypted by the TLS stack in the TX flow skips SW encryption, and rely on the HW offload. Verify that such packets are never sent un-encrypted on the wire. Add a WARN to catch such bugs, and prefer dropping the packet in these cases. Fixes: 46a3ea98074e ("net/mlx5e: kTLS, Enhance TX resync flow") Signed-off-by: Tariq Toukan Signed-off-by: Boris Pismenny Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 592e921aa167..f260dd96873b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -458,12 +458,18 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, enum mlx5e_ktls_sync_retval ret = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); - if (likely(ret == MLX5E_KTLS_SYNC_DONE)) + switch (ret) { + case MLX5E_KTLS_SYNC_DONE: *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); - else if (ret == MLX5E_KTLS_SYNC_FAIL) + break; + case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + if (likely(!skb->decrypted)) + goto out; + WARN_ON_ONCE(1); + /* fall-through */ + default: /* MLX5E_KTLS_SYNC_FAIL */ goto err_out; - else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */ - goto out; + } } priv_tx->expected_seq = seq + datalen; -- cgit From b9f0b2f634c0765999bec5547bc7a4ac08fda3ff Mon Sep 17 00:00:00 2001 From: Ajay Gupta Date: Wed, 22 Jan 2020 17:16:35 -0800 Subject: net: stmmac: platform: fix probe for ACPI devices Use generic device API to get phy mode to fix probe failure with ACPI based devices. Signed-off-by: Ajay Gupta Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 4775f49d7f3b..d10ac54bf385 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -412,9 +412,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) *mac = NULL; } - rc = of_get_phy_mode(np, &plat->phy_interface); - if (rc) - return ERR_PTR(rc); + plat->phy_interface = device_get_phy_mode(&pdev->dev); + if (plat->phy_interface < 0) + return ERR_PTR(plat->phy_interface); plat->interface = stmmac_of_get_mac_mode(np); if (plat->interface < 0) -- cgit From 61b1f2aff41141cd39686285c9b290a07e1e3051 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 23 Jan 2020 10:09:39 -0500 Subject: tipc: change maintainer email address Reflecting new realities. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b6476496a135..56765f542244 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16603,7 +16603,7 @@ F: kernel/time/ntp.c F: tools/testing/selftests/timers/ TIPC NETWORK LAYER -M: Jon Maloy +M: Jon Maloy M: Ying Xue L: netdev@vger.kernel.org (core kernel code) L: tipc-discussion@lists.sourceforge.net (user apps, general discussion) -- cgit From 148965df1a990af98b2c84092c2a2274c7489284 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jan 2020 09:49:34 -0800 Subject: net: bcmgenet: Use netif_tx_napi_add() for TX NAPI Before commit 7587935cfa11 ("net: bcmgenet: move NAPI initialization to ring initialization") moved the code, this used to be netif_tx_napi_add(), but we lost that small semantic change in the process, restore that. Fixes: 7587935cfa11 ("net: bcmgenet: move NAPI initialization to ring initialization") Signed-off-by: Florian Fainelli Acked-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 120fa05a39ff..0a8624be44a9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2164,8 +2164,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, DMA_END_ADDR); /* Initialize Tx NAPI */ - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, - NAPI_POLL_WEIGHT); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, + NAPI_POLL_WEIGHT); } /* Initialize a RDMA ring */ -- cgit From 3546d8f1bbe992488ed91592cf6bf76e7114791a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 24 Jan 2020 20:41:44 +1100 Subject: net: cxgb3_main: Add CAP_NET_ADMIN check to CHELSIO_GET_MEM The cxgb3 driver for "Chelsio T3-based gigabit and 10Gb Ethernet adapters" implements a custom ioctl as SIOCCHIOCTL/SIOCDEVPRIVATE in cxgb_extension_ioctl(). One of the subcommands of the ioctl is CHELSIO_GET_MEM, which appears to read memory directly out of the adapter and return it to userspace. It's not entirely clear what the contents of the adapter memory contains, but the assumption is that it shouldn't be accessible to all users. So add a CAP_NET_ADMIN check to the CHELSIO_GET_MEM case. Put it after the is_offload() check, which matches two of the other subcommands in the same function which also check for is_offload() and CAP_NET_ADMIN. Found by Ilja by code inspection, not tested as I don't have the required hardware. Reported-by: Ilja Van Sprundel Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 58f89f6a040f..97ff8608f0ab 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2448,6 +2448,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (!is_offload(adapter)) return -EOPNOTSUPP; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; if (!(adapter->flags & FULL_INIT_DONE)) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) -- cgit From fa865ba183d61c1ec8cbcab8573159c3b72b89a4 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 25 Jan 2020 14:33:29 +0000 Subject: firestream: fix memory leaks In fs_open(), 'vcc' is allocated through kmalloc() and assigned to 'atm_vcc->dev_data.' In the following execution, if an error occurs, e.g., there is no more free channel, an error code EBUSY or ENOMEM will be returned. However, 'vcc' is not deallocated, leading to memory leaks. Note that, in normal cases where fs_open() returns 0, 'vcc' will be deallocated in fs_close(). But, if fs_open() fails, there is no guarantee that fs_close() will be invoked. To fix this issue, deallocate 'vcc' before the error code is returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/atm/firestream.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index aad00d2b28f5..cc87004d5e2d 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -912,6 +912,7 @@ static int fs_open(struct atm_vcc *atm_vcc) } if (!to) { printk ("No more free channels for FS50..\n"); + kfree(vcc); return -EBUSY; } vcc->channo = dev->channo; @@ -922,6 +923,7 @@ static int fs_open(struct atm_vcc *atm_vcc) if (((DO_DIRECTION(rxtp) && dev->atm_vccs[vcc->channo])) || ( DO_DIRECTION(txtp) && test_bit (vcc->channo, dev->tx_inuse))) { printk ("Channel is in use for FS155.\n"); + kfree(vcc); return -EBUSY; } } @@ -935,6 +937,7 @@ static int fs_open(struct atm_vcc *atm_vcc) tc, sizeof (struct fs_transmit_config)); if (!tc) { fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n"); + kfree(vcc); return -ENOMEM; } -- cgit