From 19d7df69fdb2636856dc8919de72fc1bf8f79598 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 1 Feb 2018 08:49:23 +0100 Subject: xfrm: Refuse to insert 32 bit userspace socket policies on 64 bit systems We don't have a compat layer for xfrm, so userspace and kernel structures have different sizes in this case. This results in a broken configuration, so refuse to configure socket policies when trying to insert from 32 bit userspace as we do it already with policies inserted via netlink. Reported-and-tested-by: syzbot+e1a1577ca8bcb47b769a@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 54e21f19d722..f9d2f2233f09 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen struct xfrm_mgr *km; struct xfrm_policy *pol = NULL; +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + return -EOPNOTSUPP; +#endif + if (!optval && !optlen) { xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); -- cgit From 2471c98165494173a3cd03231b216b909c063e41 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 1 Feb 2018 11:26:12 +0100 Subject: xfrm: Fix policy hold queue after flowcache removal. Now that the flowcache is removed we need to generate a new dummy bundle every time we check if the needed SAs are in place because the dummy bundle is not cached anymore. Fix it by passing the XFRM_LOOKUP_QUEUE flag to xfrm_lookup(). This makes sure that we get a dummy bundle in case the SAs are not yet in place. Fixes: 3ca28286ea80 ("xfrm_policy: bypass flow_cache_lookup") Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7a23078132cf..8b3811ff002d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1891,7 +1891,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); - dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0); + dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) goto purge_queue; -- cgit From d97ca5d714a5334aecadadf696875da40f1fbf3e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 12 Feb 2018 14:42:01 +0100 Subject: xfrm_user: uncoditionally validate esn replay attribute struct The sanity test added in ecd7918745234 can be bypassed, validation only occurs if XFRM_STATE_ESN flag is set, but rest of code doesn't care and just checks if the attribute itself is present. So always validate. Alternative is to reject if we have the attribute without the flag but that would change abi. Reported-by: syzbot+0ab777c27d2bb7588f73@syzkaller.appspotmail.com Cc: Mathias Krause Fixes: ecd7918745234 ("xfrm_user: ensure user supplied esn replay window is valid") Fixes: d8647b79c3b7e ("xfrm: Add user interface for esn and big anti-replay windows") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7f52b8eb177d..080035f056d9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; struct xfrm_replay_state_esn *rs; - if (p->flags & XFRM_STATE_ESN) { - if (!rt) - return -EINVAL; + if (!rt) + return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0; - rs = nla_data(rt); + rs = nla_data(rt); - if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) - return -EINVAL; - - if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && - nla_len(rt) != sizeof(*rs)) - return -EINVAL; - } + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; - if (!rt) - return 0; + if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; /* As only ESP and AH support ESN feature. */ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) -- cgit From 510c321b557121861601f9d259aadd65aa274f35 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Feb 2018 19:06:02 +0800 Subject: xfrm: reuse uncached_list to track xdsts In early time, when freeing a xdst, it would be inserted into dst_garbage.list first. Then if it's refcnt was still held somewhere, later it would be put into dst_busy_list in dst_gc_task(). When one dev was being unregistered, the dev of these dsts in dst_busy_list would be set with loopback_dev and put this dev. So that this dev's removal wouldn't get blocked, and avoid the kmsg warning: kernel:unregister_netdevice: waiting for veth0 to become \ free. Usage count = 2 However after Commit 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle"), the xdst will not be freed with dst gc, and this warning happens. To fix it, we need to find these xdsts that are still held by others when removing the dev, and free xdst's dev and set it with loopback_dev. But unfortunately after flow_cache for xfrm was deleted, no list tracks them anymore. So we need to save these xdsts somewhere to release the xdst's dev later. To make this easier, this patch is to reuse uncached_list to track xdsts, so that the dev refcnt can be released in the event NETDEV_UNREGISTER process of fib_netdev_notifier. Thanks to Florian, we could move forward this fix quickly. Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") Reported-by: Jianlin Shi Reported-by: Hangbin Liu Tested-by: Eyal Birger Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/ip6_route.h | 3 +++ include/net/route.h | 3 +++ net/ipv4/route.c | 21 +++++++++++++-------- net/ipv4/xfrm4_policy.c | 4 +++- net/ipv6/route.c | 4 ++-- net/ipv6/xfrm6_policy.c | 5 +++++ 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 27d23a65f3cd..ac0866bb9e93 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -179,6 +179,9 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); void rt6_multipath_rebalance(struct rt6_info *rt); +void rt6_uncached_list_add(struct rt6_info *rt); +void rt6_uncached_list_del(struct rt6_info *rt); + static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/route.h b/include/net/route.h index 1eb9ce470e25..40b870d58f38 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -227,6 +227,9 @@ struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); +void rt_add_uncached_list(struct rtable *rt); +void rt_del_uncached_list(struct rtable *rt); + static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1df1ba..1d1e4abe04b0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1383,7 +1383,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); -static void rt_add_uncached_list(struct rtable *rt) +void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); @@ -1394,14 +1394,8 @@ static void rt_add_uncached_list(struct rtable *rt) spin_unlock_bh(&ul->lock); } -static void ipv4_dst_destroy(struct dst_entry *dst) +void rt_del_uncached_list(struct rtable *rt) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); - struct rtable *rt = (struct rtable *) dst; - - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1411,6 +1405,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) } } +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + struct rtable *rt = (struct rtable *)dst; + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); + + rt_del_uncached_list(rt); +} + void rt_flush_dev(struct net_device *dev) { struct net *net = dev_net(dev); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 05017e2c849c..8d33f7b311f4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -102,6 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); + rt_add_uncached_list(&xdst->u.rt); return 0; } @@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); - + if (xdst->u.rt.rt_uncached_list) + rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb2d251c0500..38b75e9d6eae 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -128,7 +128,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); -static void rt6_uncached_list_add(struct rt6_info *rt) +void rt6_uncached_list_add(struct rt6_info *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); @@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt) spin_unlock_bh(&ul->lock); } -static void rt6_uncached_list_del(struct rt6_info *rt) +void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 09fb44ee3b45..416fe67271a9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; + INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); + rt6_uncached_list_add(&xdst->u.rt6); + atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } @@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); + if (xdst->u.rt6.rt6i_uncached_list) + rt6_uncached_list_del(&xdst->u.rt6); xfrm_dst_destroy(xdst); } -- cgit From 5ab2ba931255d8bf03009c06d58dce97de32797c Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 29 Mar 2016 10:56:57 +0300 Subject: iwlwifi: mvm: fix security bug in PN checking A previous patch allowed the same PN for packets originating from the same AMSDU by copying PN only for the last packet in the series. This however is bogus since we cannot assume the last frame will be received on the same queue, and if it is received on a different ueue we will end up not incrementing the PN and possibly let the next packet to have the same PN and pass through. Change the logic instead to driver explicitly indicate for the second sub frame and on to be allowed to have the same PN as the first subframe. Indicate it to mac80211 as well for the fallback queue. Fixes: f1ae02b186d9 ("iwlwifi: mvm: allow same PN for de-aggregated AMSDU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a3f7c1bf3cc8..580de5851fc7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -71,6 +71,7 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_rx_status *stats = IEEE80211_SKB_RXCB(skb); struct iwl_mvm_key_pn *ptk_pn; + int res; u8 tid, keyidx; u8 pn[IEEE80211_CCMP_PN_LEN]; u8 *extiv; @@ -127,12 +128,13 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, pn[4] = extiv[1]; pn[5] = extiv[0]; - if (memcmp(pn, ptk_pn->q[queue].pn[tid], - IEEE80211_CCMP_PN_LEN) <= 0) + res = memcmp(pn, ptk_pn->q[queue].pn[tid], IEEE80211_CCMP_PN_LEN); + if (res < 0) + return -1; + if (!res && !(stats->flag & RX_FLAG_ALLOW_SAME_PN)) return -1; - if (!(stats->flag & RX_FLAG_AMSDU_MORE)) - memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); + memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); stats->flag |= RX_FLAG_PN_VALIDATED; return 0; @@ -314,28 +316,21 @@ static void iwl_mvm_rx_csum(struct ieee80211_sta *sta, } /* - * returns true if a packet outside BA session is a duplicate and - * should be dropped + * returns true if a packet is a duplicate and should be dropped. + * Updates AMSDU PN tracking info */ -static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue, - struct ieee80211_rx_status *rx_status, - struct ieee80211_hdr *hdr, - struct iwl_rx_mpdu_desc *desc) +static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue, + struct ieee80211_rx_status *rx_status, + struct ieee80211_hdr *hdr, + struct iwl_rx_mpdu_desc *desc) { struct iwl_mvm_sta *mvm_sta; struct iwl_mvm_rxq_dup_data *dup_data; - u8 baid, tid, sub_frame_idx; + u8 tid, sub_frame_idx; if (WARN_ON(IS_ERR_OR_NULL(sta))) return false; - baid = (le32_to_cpu(desc->reorder_data) & - IWL_RX_MPDU_REORDER_BAID_MASK) >> - IWL_RX_MPDU_REORDER_BAID_SHIFT; - - if (baid != IWL_RX_REORDER_DATA_INVALID_BAID) - return false; - mvm_sta = iwl_mvm_sta_from_mac80211(sta); dup_data = &mvm_sta->dup_data[queue]; @@ -365,6 +360,12 @@ static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue, dup_data->last_sub_frame[tid] >= sub_frame_idx)) return true; + /* Allow same PN as the first subframe for following sub frames */ + if (dup_data->last_seq[tid] == hdr->seq_ctrl && + sub_frame_idx > dup_data->last_sub_frame[tid] && + desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) + rx_status->flag |= RX_FLAG_ALLOW_SAME_PN; + dup_data->last_seq[tid] = hdr->seq_ctrl; dup_data->last_sub_frame[tid] = sub_frame_idx; @@ -971,7 +972,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (ieee80211_is_data(hdr->frame_control)) iwl_mvm_rx_csum(sta, skb, desc); - if (iwl_mvm_is_nonagg_dup(sta, queue, rx_status, hdr, desc)) { + if (iwl_mvm_is_dup(sta, queue, rx_status, hdr, desc)) { kfree_skb(skb); goto out; } -- cgit From fc07bd8ce19bff9e7479c04077ddb5957d1a27be Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 21 Dec 2017 15:05:28 +0200 Subject: iwlwifi: mvm: fix IBSS for devices that support station type API In IBSS, the mac80211 sets the cab_queue to be invalid. However, the multicast station uses it, so we need to override it. A previous patch did it, but it was nested inside the if's and was applied only for legacy FWs that don't support the new station type API, instead of being applied for all paths. In addition, add a missing NL80211_IFTYPE_ADHOC to the initialization of the queues in iwl_mvm_mac_ctxt_init() Fixes: ee48b72211f8 ("iwlwifi: mvm: support ibss in dqa mode") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 24 +++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 2f22e14e00fe..8ba16fc24e3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -438,7 +438,8 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) } /* Allocate the CAB queue for softAP and GO interfaces */ - if (vif->type == NL80211_IFTYPE_AP) { + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) { /* * For TVQM this will be overwritten later with the FW assigned * queue value (when queue is enabled). diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 6b2674e02606..dbd2ba2bb714 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2052,6 +2052,17 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) vif->type != NL80211_IFTYPE_ADHOC)) return -ENOTSUPP; + /* + * In IBSS, ieee80211_check_queues() sets the cab_queue to be + * invalid, so make sure we use the queue we want. + * Note that this is done here as we want to avoid making DQA + * changes in mac80211 layer. + */ + if (vif->type == NL80211_IFTYPE_ADHOC) { + vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; + mvmvif->cab_queue = vif->cab_queue; + } + /* * While in previous FWs we had to exclude cab queue from TFD queue * mask, now it is needed as any other queue. @@ -2083,20 +2094,9 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) timeout); mvmvif->cab_queue = queue; } else if (!fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_STA_TYPE)) { - /* - * In IBSS, ieee80211_check_queues() sets the cab_queue to be - * invalid, so make sure we use the queue we want. - * Note that this is done here as we want to avoid making DQA - * changes in mac80211 layer. - */ - if (vif->type == NL80211_IFTYPE_ADHOC) { - vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; - mvmvif->cab_queue = vif->cab_queue; - } + IWL_UCODE_TLV_API_STA_TYPE)) iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0, &cfg, timeout); - } return 0; } -- cgit From 4437ba7ee7de7e71d11deb91c87a370e4ffd2601 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 27 Dec 2017 08:58:02 +0200 Subject: iwlwifi: pcie: don't warn if we use all the transmit pointers Our Transmit Frame Descriptor (TFD) is a DMA descriptor that includes several pointers to be able to transmit a packet which is not physically contiguous. Depending on the hardware being use, we can have 20 or 25 pointers in a single TFD. In both cases, it is more than enough and it is quite hard to hit this limit. It has been reported that when using specific applications (Ktorrent), we can actually use all the pointers and then a long standing bug showed up. When we free the TFD, we check its number of valid pointers and make sure it doesn't exceed the number of pointers the hardware support. This check had an off by one bug: it is perfectly valid to free the 20 pointers if the TFD has 20 pointers. Fix that. https://bugzilla.kernel.org/show_bug.cgi?id=197981 Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 6d0a907d5ba5..fabae0f60683 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -147,7 +147,7 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans, /* Sanity check on number of chunks */ num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd); - if (num_tbs >= trans_pcie->max_tbs) { + if (num_tbs > trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); return; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 3f85713c41dc..1a566287993d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -378,7 +378,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, /* Sanity check on number of chunks */ num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); - if (num_tbs >= trans_pcie->max_tbs) { + if (num_tbs > trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); /* @todo issue fatal error, it is quite serious situation */ return; -- cgit From 4b7f7ee2a5f523cf002328c6c02cb3e28a0fb30b Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 28 Dec 2017 13:28:30 +0200 Subject: iwlwifi: align timestamp cancel with timestamp start Canceling the periodic timestamp work should be done in the opposite flow to where it was started. This also prevents from sending the MARKER command during the mac_stop flow - causing a false queue hang (FW is no longer there to send a response). Fixes: 93b167c13a3a ("iwlwifi: runtime: sync FW and host clocks for logs") Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/init.c | 6 ------ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c index c39fe84bb4c4..45f21acbd842 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/init.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c @@ -76,9 +76,3 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir); } IWL_EXPORT_SYMBOL(iwl_fw_runtime_init); - -void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt) -{ - iwl_fw_cancel_timestamp(fwrt); -} -IWL_EXPORT_SYMBOL(iwl_fw_runtime_exit); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 2d28e0804218..89ff02d7c876 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -90,6 +90,7 @@ #include "fw/runtime.h" #include "fw/dbg.h" #include "fw/acpi.h" +#include "fw/debugfs.h" #define IWL_MVM_MAX_ADDRESSES 5 /* RSSI offset for WkP */ @@ -1783,6 +1784,7 @@ static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm) static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm) { + iwl_fw_cancel_timestamp(&mvm->fwrt); iwl_free_fw_paging(&mvm->fwrt); clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status); iwl_fw_dump_conf_clear(&mvm->fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 5d525a0023dc..6bb347bf0d6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -802,7 +802,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, iwl_mvm_leds_exit(mvm); iwl_mvm_thermal_exit(mvm); out_free: - iwl_fw_runtime_exit(&mvm->fwrt); iwl_fw_flush_dump(&mvm->fwrt); if (iwlmvm_mod_params.init_dbg) @@ -843,7 +842,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_IWLWIFI_DEBUGFS) kfree(mvm->d3_resume_sram); #endif - iwl_fw_runtime_exit(&mvm->fwrt); iwl_trans_op_mode_leave(mvm->trans); iwl_phy_db_free(mvm->phy_db); -- cgit From 6b7a5aea71b342ec0593d23b08383e1f33da4c9a Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Thu, 28 Dec 2017 15:53:04 +0200 Subject: iwlwifi: mvm: always init rs with 20mhz bandwidth rates In AP mode, when a new station associates, rs is initialized immediately upon association completion, before the phy context is updated with the association parameters, so the sta bandwidth might be wider than the phy context allows. To avoid this issue, always initialize rs with 20mhz bandwidth rate, and after authorization, when the phy context is already up-to-date, re-init rs with the correct bw. Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ++++ drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 28 ++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 8aed40a8bc38..08de822c3ef0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2682,6 +2682,10 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, /* enable beacon filtering */ WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0)); + + iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band, + false); + ret = 0; } else if (old_state == IEEE80211_STA_AUTHORIZED && new_state == IEEE80211_STA_ASSOC) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 60abb0084ee5..47f4c7a1d80d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2684,7 +2684,8 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_lq_sta *lq_sta, enum nl80211_band band, - struct rs_rate *rate) + struct rs_rate *rate, + bool init) { int i, nentries; unsigned long active_rate; @@ -2738,14 +2739,25 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm, */ if (sta->vht_cap.vht_supported && best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) { - switch (sta->bandwidth) { - case IEEE80211_STA_RX_BW_160: - case IEEE80211_STA_RX_BW_80: - case IEEE80211_STA_RX_BW_40: + /* + * In AP mode, when a new station associates, rs is initialized + * immediately upon association completion, before the phy + * context is updated with the association parameters, so the + * sta bandwidth might be wider than the phy context allows. + * To avoid this issue, always initialize rs with 20mhz + * bandwidth rate, and after authorization, when the phy context + * is already up-to-date, re-init rs with the correct bw. + */ + u32 bw = init ? RATE_MCS_CHAN_WIDTH_20 : rs_bw_from_sta_bw(sta); + + switch (bw) { + case RATE_MCS_CHAN_WIDTH_40: + case RATE_MCS_CHAN_WIDTH_80: + case RATE_MCS_CHAN_WIDTH_160: initial_rates = rs_optimal_rates_vht; nentries = ARRAY_SIZE(rs_optimal_rates_vht); break; - case IEEE80211_STA_RX_BW_20: + case RATE_MCS_CHAN_WIDTH_20: initial_rates = rs_optimal_rates_vht_20mhz; nentries = ARRAY_SIZE(rs_optimal_rates_vht_20mhz); break; @@ -2756,7 +2768,7 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm, active_rate = lq_sta->active_siso_rate; rate->type = LQ_VHT_SISO; - rate->bw = rs_bw_from_sta_bw(sta); + rate->bw = bw; } else if (sta->ht_cap.ht_supported && best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) { initial_rates = rs_optimal_rates_ht; @@ -2839,7 +2851,7 @@ static void rs_initialize_lq(struct iwl_mvm *mvm, tbl = &(lq_sta->lq_info[active_tbl]); rate = &tbl->rate; - rs_get_initial_rate(mvm, sta, lq_sta, band, rate); + rs_get_initial_rate(mvm, sta, lq_sta, band, rate, init); rs_init_optimal_rate(mvm, sta, lq_sta); WARN_ONCE(rate->ant != ANT_A && rate->ant != ANT_B, -- cgit From 143a4454daaf0e80a2b9f37159a0d6d2b61e64ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 17 Feb 2018 15:16:22 +0800 Subject: xfrm: do not call rcu_read_unlock when afinfo is NULL in xfrm_get_tos When xfrm_policy_get_afinfo returns NULL, it will not hold rcu read lock. In this case, rcu_read_unlock should not be called in xfrm_get_tos, just like other places where it's calling xfrm_policy_get_afinfo. Fixes: f5e2bb4f5b22 ("xfrm: policy: xfrm_get_tos cannot fail") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8b3811ff002d..150d46633ce6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static int xfrm_get_tos(const struct flowi *fl, int family) { const struct xfrm_policy_afinfo *afinfo; - int tos = 0; + int tos; afinfo = xfrm_policy_get_afinfo(family); - tos = afinfo ? afinfo->get_tos(fl) : 0; + if (!afinfo) + return 0; + + tos = afinfo->get_tos(fl); rcu_read_unlock(); -- cgit From 013cb81e89f8a70deef086ca29a923faf5585ab0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 Feb 2018 07:44:07 +0100 Subject: xfrm: Fix infinite loop in xfrm_get_dst_nexthop with transport mode. On transport mode we forget to fetch the child dst_entry before we continue the while loop, this leads to an infinite loop. Fix this by fetching the child dst_entry before we continue the while loop. Fixes: 0f6c480f23f4 ("xfrm: Move dst->path into struct xfrm_dst") Reported-by: syzbot+7d03c810e50aaedef98a@syzkaller.appspotmail.com Tested-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 150d46633ce6..625b3fca5704 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2732,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm; + dst = xfrm_dst_child(dst); + if (xfrm->props.mode == XFRM_MODE_TRANSPORT) continue; if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; - - dst = xfrm_dst_child(dst); } return daddr; } -- cgit From 78dc897b7ee67205423dbbc6b56be49fb18d15b5 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 22 Feb 2018 14:28:59 -0600 Subject: rtlwifi: rtl8723be: Fix loss of signal In commit c713fb071edc ("rtlwifi: rtl8821ae: Fix connection lost problem correctly") a problem in rtl8821ae that caused loss of signal was fixed. That same problem has now been reported for rtl8723be. Accordingly, the ASPM L1 latency has been increased from 0 to 7 to fix the instability. Signed-off-by: Larry Finger Cc: Stable Tested-by: James Cameron Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index f9ccd13c79f9..e7bbbc95cdb1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -1125,7 +1125,8 @@ static void _rtl8723be_enable_aspm_back_door(struct ieee80211_hw *hw) /* Configuration Space offset 0x70f BIT7 is used to control L0S */ tmp8 = _rtl8723be_dbi_read(rtlpriv, 0x70f); - _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7)); + _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7) | + ASPM_L1_LATENCY << 3); /* Configuration Space offset 0x719 Bit3 is for L1 * BIT4 is for clock request -- cgit From b8b549eec8187ac1b12075d69a2d84d89b5e811a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 28 Feb 2018 09:23:19 +0100 Subject: xfrm: Fix ESN sequence number handling for IPsec GSO packets. When IPsec offloading was introduced, we accidentally incremented the sequence number counter on the xfrm_state by one packet too much in the ESN case. This leads to a sequence number gap of one packet after each GSO packet. Fix this by setting the sequence number to the correct value. Fixes: d7dbefc45cf5 ("xfrm: Add xfrm_replay_overflow functions for offloading") Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 1d38c6acf8af..9e3a5e85f828 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff } else { XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; - xo->seq.low = oseq = oseq + 1; + xo->seq.low = oseq + 1; xo->seq.hi = oseq_hi; oseq += skb_shinfo(skb)->gso_segs; } -- cgit From f0e8c61110c2c85903b136ba070daf643a8b6842 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 28 Feb 2018 11:57:50 +0100 Subject: Bluetooth: btusb: Remove Yoga 920 from the btusb_needs_reset_resume_table Commit 1fdb92697469 ("Bluetooth: btusb: Use DMI matching for QCA reset_resume quirking"), added the Lenovo Yoga 920 to the btusb_needs_reset_resume_table. Testing has shown that this is a false positive and the problems where caused by issues with the initial fix: commit fd865802c66b ("Bluetooth: btusb: fix QCA Rome suspend/resume"), which has already been reverted. So the QCA Rome BT in the Yoga 920 does not need a reset-resume quirk at all and this commit removes it from the btusb_needs_reset_resume_table. Note that after this commit the btusb_needs_reset_resume_table is now empty. It is kept around on purpose, since this whole series of commits started for a reason and there are actually broken platforms around, which need to be added to it. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Fixes: 1fdb92697469 ("Bluetooth: btusb: Use DMI matching for QCA ...") Cc: stable@vger.kernel.org Cc: Brian Norris Cc: Kai-Heng Feng Tested-by: Kevin Fenzi Suggested-by: Brian Norris Signed-off-by: Hans de Goede Reviewed-by: Brian Norris Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 60bf04b8f103..041c17e0c668 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -385,13 +385,6 @@ static const struct usb_device_id blacklist_table[] = { * the module itself. So we use a DMI list to match known broken platforms. */ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { - { - /* Lenovo Yoga 920 (QCA Rome device 0cf3:e300) */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 920"), - }, - }, {} }; -- cgit From 0c6e526646c04ce31d4aaa280ed2237dd1cd774c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 1 Mar 2018 13:42:52 +0800 Subject: Bluetooth: btusb: Add Dell OptiPlex 3060 to btusb_needs_reset_resume_table The issue can be reproduced before commit fd865802c66b ("Bluetooth: btusb: fix QCA Rome suspend/resume") gets introduced, so the reset resume quirk is still needed for this system. T: Bus=01 Lev=01 Prnt=01 Port=13 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e007 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Cc: stable@vger.kernel.org Cc: Brian Norris Cc: Hans de Goede Signed-off-by: Kai-Heng Feng Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 041c17e0c668..382be00a8329 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -385,6 +385,13 @@ static const struct usb_device_id blacklist_table[] = { * the module itself. So we use a DMI list to match known broken platforms. */ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { + { + /* Dell OptiPlex 3060 (QCA ROME device 0cf3:e007) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"), + }, + }, {} }; -- cgit From 64e759f58f128730b97a3c3a26d283c075ad7c86 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 26 Feb 2018 15:41:53 +0100 Subject: Bluetooth: Fix missing encryption refresh on Security Request If Security Request is received on connection that is already encrypted with sufficient security master should perform encryption key refresh procedure instead of just ignoring Slave Security Request (Core Spec 5.0 Vol 3 Part H 2.4.6). > ACL Data RX: Handle 3585 flags 0x02 dlen 6 SMP: Security Request (0x0b) len 1 Authentication requirement: Bonding, No MITM, SC, No Keypresses (0x09) < HCI Command: LE Start Encryption (0x08|0x0019) plen 28 Handle: 3585 Random number: 0x0000000000000000 Encrypted diversifier: 0x0000 Long term key: 44264272a5c426a9e868f034cf0e69f3 > HCI Event: Command Status (0x0f) plen 4 LE Start Encryption (0x08|0x0019) ncmd 1 Status: Success (0x00) > HCI Event: Encryption Key Refresh Complete (0x30) plen 3 Status: Success (0x00) Handle: 3585 Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 01117ae84f1d..a2ddae2f37d7 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) else sec_level = authreq_to_seclevel(auth); - if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) + if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) { + /* If link is already encrypted with sufficient security we + * still need refresh encryption as per Core Spec 5.0 Vol 3, + * Part H 2.4.6 + */ + smp_ltk_encrypt(conn, hcon->sec_level); return 0; + } if (sec_level > hcon->pending_sec_level) hcon->pending_sec_level = sec_level; -- cgit From 7f8ae00f63725e835b5ed8a97bc18bf1c950acb2 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 27 Dec 2017 15:21:18 +0200 Subject: iwlwifi: Cancel and set MARKER_CMD timer during suspend-resume While entering to D3 mode there is a gap between the time the driver handles the D3_CONFIG_CMD response to the time the host is going to sleep. In between there might be cases which MARKER_CMD can tailgate. Also during resume flow the MARKER_CMD might get sent while D0I3_CMD is being handled in the FW. Cancel MARKER_CMD timer and set it again properly during suspend resume flows to prevent this command from being sent accidentlly. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/debugfs.h | 18 ++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/init.c | 12 ++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 4 ++++ drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 8 ++++++++ 4 files changed, 42 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h index e57ff92a68ae..3da468d2cc92 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h @@ -75,6 +75,20 @@ static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt) cancel_delayed_work_sync(&fwrt->timestamp.wk); } +static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) +{ + cancel_delayed_work_sync(&fwrt->timestamp.wk); +} + +static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) +{ + if (!fwrt->timestamp.delay) + return; + + schedule_delayed_work(&fwrt->timestamp.wk, + round_jiffies_relative(fwrt->timestamp.delay)); +} + #else static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt, struct dentry *dbgfs_dir) @@ -84,4 +98,8 @@ static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt, static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt) {} +static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) {} + +static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) {} + #endif /* CONFIG_IWLWIFI_DEBUGFS */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c index 45f21acbd842..2efac307909e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/init.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c @@ -76,3 +76,15 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir); } IWL_EXPORT_SYMBOL(iwl_fw_runtime_init); + +void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt) +{ + iwl_fw_suspend_timestamp(fwrt); +} +IWL_EXPORT_SYMBOL(iwl_fw_runtime_suspend); + +void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt) +{ + iwl_fw_resume_timestamp(fwrt); +} +IWL_EXPORT_SYMBOL(iwl_fw_runtime_resume); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index e25c049f980f..f3197f7c13b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -150,6 +150,10 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt); +void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt); + +void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt); + static inline void iwl_fw_set_current_image(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type cur_fw_img) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 0e6cf39285f4..2efe9b099556 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1098,6 +1098,8 @@ int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* make sure the d0i3 exit work is not pending */ flush_work(&mvm->d0i3_exit_work); + iwl_fw_runtime_suspend(&mvm->fwrt); + ret = iwl_trans_suspend(trans); if (ret) return ret; @@ -2012,6 +2014,8 @@ int iwl_mvm_resume(struct ieee80211_hw *hw) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; + iwl_fw_runtime_resume(&mvm->fwrt); + return ret; } @@ -2038,6 +2042,8 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_D3; + iwl_fw_runtime_suspend(&mvm->fwrt); + /* start pseudo D3 */ rtnl_lock(); err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true); @@ -2098,6 +2104,8 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) __iwl_mvm_resume(mvm, true); rtnl_unlock(); + iwl_fw_runtime_resume(&mvm->fwrt); + mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; iwl_abort_notification_waits(&mvm->notif_wait); -- cgit From de04d4fbf87b769ab18c480e4f020c53e74bbdd2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 2 Jan 2018 11:40:15 +0200 Subject: iwlwifi: mvm: fix TX of CCMP 256 We don't have enough room in the TX command for a CCMP 256 key, and need to use key from table. Fixes: 3264bf032bd9 ("[BUGFIX] iwlwifi: mvm: Fix CCMP IV setting") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index dda77b327c98..57ad6019ffad 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -419,11 +419,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, { struct ieee80211_key_conf *keyconf = info->control.hw_key; u8 *crypto_hdr = skb_frag->data + hdrlen; + enum iwl_tx_cmd_sec_ctrl type = TX_CMD_SEC_CCM; u64 pn; switch (keyconf->cipher) { case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: iwl_mvm_set_tx_cmd_ccmp(info, tx_cmd); iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; @@ -447,13 +447,16 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: + type = TX_CMD_SEC_GCMP; + /* Fall through */ + case WLAN_CIPHER_SUITE_CCMP_256: /* TODO: Taking the key from the table might introduce a race * when PTK rekeying is done, having an old packets with a PN * based on the old key but the message encrypted with a new * one. * Need to handle this. */ - tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE; + tx_cmd->sec_ctl |= type | TX_CMD_SEC_KEY_FROM_TABLE; tx_cmd->key[0] = keyconf->hw_key_idx; iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; -- cgit From 40d53f4a60c9eb10d4fa58066c23ba1af8a59e39 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Thu, 4 Jan 2018 17:39:08 +0200 Subject: iwlwifi: mvm: Fix channel switch for count 0 and 1 It was assumed that apply_time==0 implies immediate scheduling, which is wrong. Instead, the fw expects the START_IMMEDIATELY flag to be set. Otherwise, this resulted in 0x3063 assert. Fix that. While at it rename the T2_V2_START_IMMEDIATELY to TE_V2_START_IMMEDIATELY. Fixes: f5d8f50f271d ("iwlwifi: mvm: Fix channel switch in case of count <= 1") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h index 3721a3ed358b..f824bebceb06 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h @@ -211,7 +211,7 @@ enum { * @TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end * @TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use. * @TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use. - * @T2_V2_START_IMMEDIATELY: start time event immediately + * @TE_V2_START_IMMEDIATELY: start time event immediately * @TE_V2_DEP_OTHER: depends on another time event * @TE_V2_DEP_TSF: depends on a specific time * @TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC @@ -230,7 +230,7 @@ enum iwl_time_event_policy { TE_V2_NOTIF_HOST_FRAG_END = BIT(5), TE_V2_NOTIF_INTERNAL_FRAG_START = BIT(6), TE_V2_NOTIF_INTERNAL_FRAG_END = BIT(7), - T2_V2_START_IMMEDIATELY = BIT(11), + TE_V2_START_IMMEDIATELY = BIT(11), /* placement characteristics */ TE_V2_DEP_OTHER = BIT(TE_V2_PLACEMENT_POS), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 200ab50ec86b..acb217e666db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -616,7 +616,7 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm, time_cmd.repeat = 1; time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START | TE_V2_NOTIF_HOST_EVENT_END | - T2_V2_START_IMMEDIATELY); + TE_V2_START_IMMEDIATELY); if (!wait_for_notif) { iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); @@ -803,7 +803,7 @@ int iwl_mvm_start_p2p_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif, time_cmd.repeat = 1; time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START | TE_V2_NOTIF_HOST_EVENT_END | - T2_V2_START_IMMEDIATELY); + TE_V2_START_IMMEDIATELY); return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); } @@ -913,6 +913,8 @@ int iwl_mvm_schedule_csa_period(struct iwl_mvm *mvm, time_cmd.interval = cpu_to_le32(1); time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START | TE_V2_ABSENCE); + if (!apply_time) + time_cmd.policy |= cpu_to_le16(TE_V2_START_IMMEDIATELY); return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); } -- cgit From 63dd5d022f4766e6b05ee611124afcc7cbfbb953 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 7 Jan 2018 14:30:49 +0200 Subject: iwlwifi: mvm: fix assert 0x2B00 on older FWs We should add the multicast station before adding the broadcast station. However, in older FW, the firmware will start beaconing when we add the multicast station, and since the broadcast station is not added at this point so the transmission of the beacon will fail on assert 0x2b00. This is fixed in later firmware, so make the order of addition depend on the TLV. Fixes: 26d6c16bed53 ("iwlwifi: mvm: add multicast station") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 45 ++++++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 08de822c3ef0..ebf511150f4d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -2106,15 +2107,40 @@ static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw, if (ret) goto out_remove; - ret = iwl_mvm_add_mcast_sta(mvm, vif); - if (ret) - goto out_unbind; - - /* Send the bcast station. At this stage the TBTT and DTIM time events - * are added and applied to the scheduler */ - ret = iwl_mvm_send_add_bcast_sta(mvm, vif); - if (ret) - goto out_rm_mcast; + /* + * This is not very nice, but the simplest: + * For older FWs adding the mcast sta before the bcast station may + * cause assert 0x2b00. + * This is fixed in later FW so make the order of removal depend on + * the TLV + */ + if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) { + ret = iwl_mvm_add_mcast_sta(mvm, vif); + if (ret) + goto out_unbind; + /* + * Send the bcast station. At this stage the TBTT and DTIM time + * events are added and applied to the scheduler + */ + ret = iwl_mvm_send_add_bcast_sta(mvm, vif); + if (ret) { + iwl_mvm_rm_mcast_sta(mvm, vif); + goto out_unbind; + } + } else { + /* + * Send the bcast station. At this stage the TBTT and DTIM time + * events are added and applied to the scheduler + */ + iwl_mvm_send_add_bcast_sta(mvm, vif); + if (ret) + goto out_unbind; + iwl_mvm_add_mcast_sta(mvm, vif); + if (ret) { + iwl_mvm_send_rm_bcast_sta(mvm, vif); + goto out_unbind; + } + } /* must be set before quota calculations */ mvmvif->ap_ibss_active = true; @@ -2144,7 +2170,6 @@ out_quota_failed: iwl_mvm_power_update_mac(mvm); mvmvif->ap_ibss_active = false; iwl_mvm_send_rm_bcast_sta(mvm, vif); -out_rm_mcast: iwl_mvm_rm_mcast_sta(mvm, vif); out_unbind: iwl_mvm_binding_remove_vif(mvm, vif); -- cgit From 8745f12a6600dd9d31122588621d4c8ddb332cd7 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 11 Jan 2018 16:18:46 +0200 Subject: iwlwifi: avoid collecting firmware dump if not loaded Trying to collect firmware debug data while firmware is not loaded causes various errors (e.g. failing NIC access). This causes even a bigger issue if at that time the HW radio is off. In that case, when later turning the radio on, the Driver fails to read the HW (registers contain garbage values). (It may be that the CSR_GP_CNTRL_REG_FLAG_RFKILL_WAKE_L1A_EN bit is cleared on faulty NIC access - since the same behavior was seen in HW RFKILL toggling before setting that bit.) Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 13 +++++++++++-- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 3 +++ drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 5 ++--- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 8 ++++++++ 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 67aefc8fc9ac..7bd704a3e640 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -8,6 +8,7 @@ * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -942,7 +944,6 @@ dump_trans_data: out: iwl_fw_free_dump_desc(fwrt); - fwrt->dump.trig = NULL; clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status); IWL_DEBUG_INFO(fwrt, "WRT dump done\n"); } @@ -1112,6 +1113,14 @@ void iwl_fw_error_dump_wk(struct work_struct *work) fwrt->ops->dump_start(fwrt->ops_ctx)) return; + if (fwrt->ops && fwrt->ops->fw_running && + !fwrt->ops->fw_running(fwrt->ops_ctx)) { + IWL_ERR(fwrt, "Firmware not running - cannot dump error\n"); + iwl_fw_free_dump_desc(fwrt); + clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status); + goto out; + } + if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { /* stop recording */ iwl_fw_dbg_stop_recording(fwrt); @@ -1145,7 +1154,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work) iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, out_ctrl); } } - +out: if (fwrt->ops && fwrt->ops->dump_end) fwrt->ops->dump_end(fwrt->ops_ctx); } diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 223fb77a3aa9..72259bff9922 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -8,6 +8,7 @@ * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -91,6 +93,7 @@ static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt) if (fwrt->dump.desc != &iwl_dump_desc_assert) kfree(fwrt->dump.desc); fwrt->dump.desc = NULL; + fwrt->dump.trig = NULL; } void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index f3197f7c13b6..3fb940ebd74a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -6,6 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -26,6 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -68,6 +70,7 @@ struct iwl_fw_runtime_ops { int (*dump_start)(void *ctx); void (*dump_end)(void *ctx); + bool (*fw_running)(void *ctx); }; #define MAX_NUM_LMAC 2 diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index a7892c1254a2..9c436d8d001d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -35,6 +36,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1281,9 +1283,6 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm, { int ret; - if (!iwl_mvm_firmware_running(mvm)) - return -EIO; - ret = iwl_mvm_ref_sync(mvm, IWL_MVM_REF_PRPH_WRITE); if (ret) return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 6bb347bf0d6e..ab7fb5aad984 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -35,6 +36,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -552,9 +554,15 @@ static void iwl_mvm_fwrt_dump_end(void *ctx) iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT); } +static bool iwl_mvm_fwrt_fw_running(void *ctx) +{ + return iwl_mvm_firmware_running(ctx); +} + static const struct iwl_fw_runtime_ops iwl_mvm_fwrt_ops = { .dump_start = iwl_mvm_fwrt_dump_start, .dump_end = iwl_mvm_fwrt_dump_end, + .fw_running = iwl_mvm_fwrt_fw_running, }; static struct iwl_op_mode * -- cgit From e4f13ad07823b24a1537518d2163bd164292fb10 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 15 Jan 2018 13:50:59 +0200 Subject: iwlwifi: mvm: fix "failed to remove key" message When the GTK is installed, we install it to HW with the station ID of the AP. Mac80211 will try to remove it only after the AP sta is removed, which will result in a failure to remove key since we do not have any station for it. This is a valid situation, but a previous commit removed the early return and added a return with error value, which resulted in an error message that is confusing to users. Remove the error return value. Fixes: 85aeb58cec1a ("iwlwifi: mvm: Enable security on new TX API") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index dbd2ba2bb714..3739e42b34e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3170,8 +3170,9 @@ static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id, int ret, size; u32 status; + /* This is a valid situation for GTK removal */ if (sta_id == IWL_MVM_INVALID_STA) - return -EINVAL; + return 0; key_flags = cpu_to_le16((keyconf->keyidx << STA_KEY_FLG_KEYID_POS) & STA_KEY_FLG_KEYID_MSK); -- cgit From 7c305de2b9548ab6b65fce342c78618bbed5db73 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 22 Jan 2018 08:55:06 +0200 Subject: iwlwifi: mvm: Direct multicast frames to the correct station Multicast frames for NL80211_IFTYPE_AP and NL80211_IFTYPE_ADHOC were directed to the broadcast station, however, as the broadcast station did not have keys configured, these frames were sent unencrypted. Fix this by using the multicast station which is the station for which encryption keys are configured. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 57ad6019ffad..af6dfceab6b8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -648,7 +648,11 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE || info.control.vif->type == NL80211_IFTYPE_AP || info.control.vif->type == NL80211_IFTYPE_ADHOC) { - sta_id = mvmvif->bcast_sta.sta_id; + if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE) + sta_id = mvmvif->bcast_sta.sta_id; + else + sta_id = mvmvif->mcast_sta.sta_id; + queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); if (queue < 0) -- cgit From 6508de0305d560235b366cc2cc98f7bcfb029e92 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 25 Jan 2018 15:22:41 +0200 Subject: iwlwifi: mvm: Correctly set the tid for mcast queue In the scheduler config command, the meaning of tid == 0xf was intended to indicate the configuration is for management frames. However, tid == 0xf was also used for the multicast queue that was meant only for multicast data frames, which resulted with the FW not encrypting multicast data frames. As multicast frames do not have a QoS header, fix this by setting tid == 0, to indicate that this is a data queue and not management one. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 3739e42b34e4..630e23cb0ffb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2039,7 +2039,7 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) struct iwl_trans_txq_scd_cfg cfg = { .fifo = IWL_MVM_TX_FIFO_MCAST, .sta_id = msta->sta_id, - .tid = IWL_MAX_TID_COUNT, + .tid = 0, .aggregate = false, .frame_limit = IWL_FRAME_LIMIT, }; @@ -2090,7 +2090,7 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (iwl_mvm_has_new_tx_api(mvm)) { int queue = iwl_mvm_tvqm_enable_txq(mvm, vif->cab_queue, msta->sta_id, - IWL_MAX_TID_COUNT, + 0, timeout); mvmvif->cab_queue = queue; } else if (!fw_has_api(&mvm->fw->ucode_capa, @@ -2115,7 +2115,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0); iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue, - IWL_MAX_TID_COUNT, 0); + 0, 0); ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id); if (ret) -- cgit From de655fa8fb237d0eaf838e8833b464c1dec90070 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:30 +0100 Subject: iwlwifi: fix malformed CONFIG_IWLWIFI_PCIE_RTPM default 'default false' should be 'default n', though they happen to have the same effect here, due to undefined symbols ('false' in this case) evaluating to n in a tristate sense. Remove the default instead of changing it. bool and tristate symbols implicitly default to n. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Signed-off-by: Ulf Magnusson Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index c5f2ddf9b0fe..e5a2fc738ac3 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -91,7 +91,6 @@ config IWLWIFI_BCAST_FILTERING config IWLWIFI_PCIE_RTPM bool "Enable runtime power management mode for PCIe devices" depends on IWLMVM && PM && EXPERT - default false help Say Y here to enable runtime power management for PCIe devices. If enabled, the device will go into low power mode -- cgit From d02f51cbcf12b09ab945873e35046045875eed9a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sat, 3 Mar 2018 03:03:46 +0100 Subject: bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp skbs SCTP GSO skbs have a gso_size of GSO_BY_FRAGS, so any sort of unconditionally mangling of that will result in nonsense value and would corrupt the skb later on. Therefore, i) add two helpers skb_increase_gso_size() and skb_decrease_gso_size() that would throw a one time warning and bail out for such skbs and ii) refuse and return early with an error in those BPF helpers that are affected. We do need to bail out as early as possible from there before any changes on the skb have been performed. Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Co-authored-by: Daniel Borkmann Signed-off-by: Daniel Axtens Cc: Marcelo Ricardo Leitner Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- Documentation/networking/segmentation-offloads.txt | 11 +++- include/linux/skbuff.h | 22 ++++++++ net/core/filter.c | 60 +++++++++++++++------- 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index d47480b61ac6..23a8dd91a9ec 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -153,8 +153,15 @@ To signal this, gso_size is set to the special value GSO_BY_FRAGS. Therefore, any code in the core networking stack must be aware of the possibility that gso_size will be GSO_BY_FRAGS and handle that case -appropriately. (For size checks, the skb_gso_validate_*_len family of -helpers do this automatically.) +appropriately. + +There are a couple of helpers to make this easier: + + - For size checks, the skb_gso_validate_*_len family of helpers correctly + considers GSO_BY_FRAGS. + + - For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size + will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs. This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c1e66bdcf583..8c67c33f40c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4038,6 +4038,12 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +/* Note: Should be called only if skb_is_gso(skb) is true */ +static inline bool skb_is_gso_sctp(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; +} + static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; @@ -4045,6 +4051,22 @@ static inline void skb_gso_reset(struct sk_buff *skb) skb_shinfo(skb)->gso_type = 0; } +static inline void skb_increase_gso_size(struct skb_shared_info *shinfo, + u16 increment) +{ + if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) + return; + shinfo->gso_size += increment; +} + +static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, + u16 decrement) +{ + if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) + return; + shinfo->gso_size -= decrement; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 0c121adbdbaa..48aa7c7320db 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2087,6 +2087,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_cow(skb, len_diff); if (unlikely(ret < 0)) return ret; @@ -2096,19 +2100,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* SKB_GSO_TCPV4 needs to be changed into * SKB_GSO_TCPV6. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + if (shinfo->gso_type & SKB_GSO_TCPV4) { + shinfo->gso_type &= ~SKB_GSO_TCPV4; + shinfo->gso_type |= SKB_GSO_TCPV6; } /* Due to IPv6 header, MSS needs to be downgraded. */ - skb_shinfo(skb)->gso_size -= len_diff; + skb_decrease_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } skb->protocol = htons(ETH_P_IPV6); @@ -2123,6 +2129,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) return ret; @@ -2132,19 +2142,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* SKB_GSO_TCPV6 needs to be changed into * SKB_GSO_TCPV4. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { - skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (shinfo->gso_type & SKB_GSO_TCPV6) { + shinfo->gso_type &= ~SKB_GSO_TCPV6; + shinfo->gso_type |= SKB_GSO_TCPV4; } /* Due to IPv4 header, MSS can be upgraded. */ - skb_shinfo(skb)->gso_size += len_diff; + skb_increase_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } skb->protocol = htons(ETH_P_IP); @@ -2243,6 +2255,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_cow(skb, len_diff); if (unlikely(ret < 0)) return ret; @@ -2252,11 +2268,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Due to header grow, MSS needs to be downgraded. */ - skb_shinfo(skb)->gso_size -= len_diff; + skb_decrease_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } return 0; @@ -2267,6 +2285,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) return ret; @@ -2276,11 +2298,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Due to header shrink, MSS can be upgraded. */ - skb_shinfo(skb)->gso_size += len_diff; + skb_increase_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } return 0; -- cgit From 74c12c630fe310eb7fcae1b292257d47781fff0a Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sun, 4 Mar 2018 13:08:17 +0100 Subject: batman-adv: Fix multicast packet loss with a single WANT_ALL_IPV4/6 flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the kernel doc describes too the code is supposed to skip adding multicast TT entries if both the WANT_ALL_IPV4 and WANT_ALL_IPV6 flags are present. Unfortunately, the current code even skips adding multicast TT entries if only either the WANT_ALL_IPV4 or WANT_ALL_IPV6 is present. This could lead to IPv6 multicast packet loss if only an IGMP but not an MLD querier is present for instance or vice versa. Fixes: 687937ab3489 ("batman-adv: Add multicast optimization support for bridged setups") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index cbdeb47ec3f6..d70640135e3a 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -543,8 +543,8 @@ update: bat_priv->mcast.enabled = true; } - return !(mcast_data.flags & - (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6)); + return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 && + mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6); } /** -- cgit From 745d0bd3af99ccc8c5f5822f808cd133eadad6ac Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2018 16:26:27 +0900 Subject: e1000e: Remove Other from EIAC It was reported that emulated e1000e devices in vmware esxi 6.5 Build 7526125 do not link up after commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1). Some tracing shows that after e1000e_trigger_lsc() is called, ICR reads out as 0x0 in e1000_msix_other() on emulated e1000e devices. In comparison, on real e1000e 82574 hardware, icr=0x80000004 (_INT_ASSERTED | _LSC) in the same situation. Some experimentation showed that this flaw in vmware e1000e emulation can be worked around by not setting Other in EIAC. This is how it was before 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1). Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 1298b69f990b..153ad406c65e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1918,6 +1918,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) bool enable = true; icr = er32(ICR); + ew32(ICR, E1000_ICR_OTHER); + if (icr & E1000_ICR_RXO) { ew32(ICR, E1000_ICR_RXO); enable = false; @@ -2040,7 +2042,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) hw->hw_addr + E1000_EITR_82574(vector)); else writel(1, hw->hw_addr + E1000_EITR_82574(vector)); - adapter->eiac_mask |= E1000_IMS_OTHER; /* Cause Tx interrupts on every write back */ ivar |= BIT(31); @@ -2265,7 +2266,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { -- cgit From 1f0ea19722ef9dfa229a9540f70b8d1c34a98a6a Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:12 +0900 Subject: Partial revert "e1000e: Avoid receiver overrun interrupt bursts" This partially reverts commit 4aea7a5c5e940c1723add439f4088844cd26196d. We keep the fix for the first part of the problem (1) described in the log of that commit, that is to read ICR in the other interrupt handler. We remove the fix for the second part of the problem (2), Other interrupt throttling. Bursts of "Other" interrupts may once again occur during rxo (receive overflow) traffic conditions. This is deemed acceptable in the interest of avoiding unforeseen fallout from changes that are not strictly necessary. As discussed, the e1000e driver should be in "maintenance mode". Link: https://www.spinics.net/lists/netdev/msg480675.html Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 153ad406c65e..3b36efa6228d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1915,21 +1915,10 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 icr; - bool enable = true; icr = er32(ICR); ew32(ICR, E1000_ICR_OTHER); - if (icr & E1000_ICR_RXO) { - ew32(ICR, E1000_ICR_RXO); - enable = false; - /* napi poll will re-enable Other, make sure it runs */ - if (napi_schedule_prep(&adapter->napi)) { - adapter->total_rx_bytes = 0; - adapter->total_rx_packets = 0; - __napi_schedule(&adapter->napi); - } - } if (icr & E1000_ICR_LSC) { ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; @@ -1938,7 +1927,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (enable && !test_bit(__E1000_DOWN, &adapter->state)) + if (!test_bit(__E1000_DOWN, &adapter->state)) ew32(IMS, E1000_IMS_OTHER); return IRQ_HANDLED; @@ -2708,8 +2697,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight) napi_complete_done(napi, work_done); if (!test_bit(__E1000_DOWN, &adapter->state)) { if (adapter->msix_entries) - ew32(IMS, adapter->rx_ring->ims_val | - E1000_IMS_OTHER); + ew32(IMS, adapter->rx_ring->ims_val); else e1000_irq_enable(adapter); } -- cgit From 361a954e6a7215de11a6179ad9bdc07d7e394b04 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:13 +0900 Subject: e1000e: Fix queue interrupt re-raising in Other interrupt Restores the ICS write for Rx/Tx queue interrupts which was present before commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1) but was not restored in commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1). This re-raises the queue interrupts in case the txq or rxq bits were set in ICR and the Other interrupt handler read and cleared ICR before the queue interrupt was raised. Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 3b36efa6228d..2c9609bee2ae 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1919,6 +1919,9 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) icr = er32(ICR); ew32(ICR, E1000_ICR_OTHER); + if (icr & adapter->eiac_mask) + ew32(ICS, (icr & adapter->eiac_mask)); + if (icr & E1000_ICR_LSC) { ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; -- cgit From 116f4a640b3197401bc93b8adc6c35040308ceff Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:14 +0900 Subject: e1000e: Avoid missed interrupts following ICR read The 82574 specification update errata 12 states that interrupts may be missed if ICR is read while INT_ASSERTED is not set. Avoid that problem by setting all bits related to events that can trigger the Other interrupt in IMS. The Other interrupt is raised for such events regardless of whether or not they are set in IMS. However, only when they are set is the INT_ASSERTED bit also set in ICR. By doing this, we ensure that INT_ASSERTED is always set when we read ICR in e1000_msix_other() and steer clear of the errata. This also ensures that ICR will automatically be cleared on read, therefore we no longer need to clear bits explicitly. Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/defines.h | 21 ++++++++++++++++++++- drivers/net/ethernet/intel/e1000e/netdev.c | 11 ++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index afb7ebe20b24..824fd44e25f0 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -400,6 +400,10 @@ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ +#define E1000_ICR_MDAC 0x00000200 /* MDIO Access Complete */ +#define E1000_ICR_SRPD 0x00010000 /* Small Receive Packet Detected */ +#define E1000_ICR_ACK 0x00020000 /* Receive ACK Frame Detected */ +#define E1000_ICR_MNG 0x00040000 /* Manageability Event Detected */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 @@ -407,7 +411,7 @@ #define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ #define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ #define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ -#define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ +#define E1000_ICR_OTHER 0x01000000 /* Other Interrupt */ /* PBA ECC Register */ #define E1000_PBA_ECC_COUNTER_MASK 0xFFF00000 /* ECC counter mask */ @@ -431,12 +435,27 @@ E1000_IMS_RXSEQ | \ E1000_IMS_LSC) +/* These are all of the events related to the OTHER interrupt. + */ +#define IMS_OTHER_MASK ( \ + E1000_IMS_LSC | \ + E1000_IMS_RXO | \ + E1000_IMS_MDAC | \ + E1000_IMS_SRPD | \ + E1000_IMS_ACK | \ + E1000_IMS_MNG) + /* Interrupt Mask Set */ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* Receiver Overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ +#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO Access Complete */ +#define E1000_IMS_SRPD E1000_ICR_SRPD /* Small Receive Packet */ +#define E1000_IMS_ACK E1000_ICR_ACK /* Receive ACK Frame Detected */ +#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability Event */ #define E1000_IMS_ECCER E1000_ICR_ECCER /* Uncorrectable ECC Error */ #define E1000_IMS_RXQ0 E1000_ICR_RXQ0 /* Rx Queue 0 Interrupt */ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 2c9609bee2ae..9fd4050a91ca 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1914,16 +1914,12 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 icr; - - icr = er32(ICR); - ew32(ICR, E1000_ICR_OTHER); + u32 icr = er32(ICR); if (icr & adapter->eiac_mask) ew32(ICS, (icr & adapter->eiac_mask)); if (icr & E1000_ICR_LSC) { - ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -1931,7 +1927,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) } if (!test_bit(__E1000_DOWN, &adapter->state)) - ew32(IMS, E1000_IMS_OTHER); + ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK); return IRQ_HANDLED; } @@ -2258,7 +2254,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | + IMS_OTHER_MASK); } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { -- cgit From 4e7dc08e57c95673d2edaba8983c3de4dd1f65f5 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 20 Feb 2018 15:12:00 +0900 Subject: e1000e: Fix check_for_link return value with autoneg off When autoneg is off, the .check_for_link callback functions clear the get_link_status flag and systematically return a "pseudo-error". This means that the link is not detected as up until the next execution of the e1000_watchdog_task() 2 seconds later. Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Acked-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- drivers/net/ethernet/intel/e1000e/mac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 31277d3bb7dc..ff308b05d68c 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1602,7 +1602,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return -E1000_ERR_CONFIG; + return 1; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index f457c5703d0c..db735644b312 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -450,7 +450,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return -E1000_ERR_CONFIG; + return 1; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to -- cgit From aea3fca005fb45f80869f2e8d56fd4e64c1d1fdb Mon Sep 17 00:00:00 2001 From: Pierre-Yves Kerbrat Date: Fri, 26 Jan 2018 11:24:12 +0100 Subject: e1000e: allocate ring descriptors with dma_zalloc_coherent Descriptor rings were not initialized at zero when allocated When area contained garbage data, it caused skb_over_panic in e1000_clean_rx_irq (if data had E1000_RXD_STAT_DD bit set) This patch makes use of dma_zalloc_coherent to make sure the ring is memset at 0 to prevent the area from containing garbage. Following is the signature of the panic: IODDR0@0.0: skbuff: skb_over_panic: text:80407b20 len:64010 put:64010 head:ab46d800 data:ab46d842 tail:0xab47d24c end:0xab46df40 dev:eth0 IODDR0@0.0: BUG: failure at net/core/skbuff.c:105/skb_panic()! IODDR0@0.0: Kernel panic - not syncing: BUG! IODDR0@0.0: IODDR0@0.0: Process swapper/0 (pid: 0, threadinfo=81728000, task=8173cc00 ,cpu: 0) IODDR0@0.0: SP = <815a1c0c> IODDR0@0.0: Stack: 00000001 IODDR0@0.0: b2d89800 815e33ac IODDR0@0.0: ea73c040 00000001 IODDR0@0.0: 60040003 0000fa0a IODDR0@0.0: 00000002 IODDR0@0.0: IODDR0@0.0: 804540c0 815a1c70 IODDR0@0.0: b2744000 602ac070 IODDR0@0.0: 815a1c44 b2d89800 IODDR0@0.0: 8173cc00 815a1c08 IODDR0@0.0: IODDR0@0.0: 00000006 IODDR0@0.0: 815a1b50 00000000 IODDR0@0.0: 80079434 00000001 IODDR0@0.0: ab46df40 b2744000 IODDR0@0.0: b2d89800 IODDR0@0.0: IODDR0@0.0: 0000fa0a 8045745c IODDR0@0.0: 815a1c88 0000fa0a IODDR0@0.0: 80407b20 b2789f80 IODDR0@0.0: 00000005 80407b20 IODDR0@0.0: IODDR0@0.0: IODDR0@0.0: Call Trace: IODDR0@0.0: [<804540bc>] skb_panic+0xa4/0xa8 IODDR0@0.0: [<80079430>] console_unlock+0x2f8/0x6d0 IODDR0@0.0: [<80457458>] skb_put+0xa0/0xc0 IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8 IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8 IODDR0@0.0: [<804079c8>] e1000_clean_rx_irq+0x188/0x3e8 IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8 IODDR0@0.0: [<80468b48>] __dev_kfree_skb_any+0x88/0xa8 IODDR0@0.0: [<804101ac>] e1000e_poll+0x94/0x288 IODDR0@0.0: [<8046e9d4>] net_rx_action+0x19c/0x4e8 IODDR0@0.0: ... IODDR0@0.0: Maximum depth to print reached. Use kstack= To specify a custom value (where 0 means to display the full backtrace) IODDR0@0.0: ---[ end Kernel panic - not syncing: BUG! Signed-off-by: Pierre-Yves Kerbrat Signed-off-by: Marius Gligor Tested-by: Aaron Brown Reviewed-by: Alexander Duyck Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 9fd4050a91ca..c0f23446bf26 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2323,8 +2323,8 @@ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter, { struct pci_dev *pdev = adapter->pdev; - ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma, - GFP_KERNEL); + ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma, + GFP_KERNEL); if (!ring->desc) return -ENOMEM; -- cgit From 0d3601d2994a4ad9edf0b44f2ea34c68ff494ea5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Mar 2018 16:07:38 +0100 Subject: netfilter: nft_set_hash: skip fixed hash if timeout is specified Fixed hash supports to timeouts, so skip it. Otherwise, userspace hits EOPNOTSUPP. Fixes: 6c03ae210ce3 ("netfilter: nft_set_hash: add non-resizable hashtable implementation") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3f1624ee056f..d40591fe1b2f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -674,7 +674,7 @@ static const struct nft_set_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, u32 flags) { - if (desc->size) { + if (desc->size && !(flags & NFT_SET_TIMEOUT)) { switch (desc->klen) { case 4: return &nft_hash_fast_ops; -- cgit From cd526676de1ece03a8ea7cea726cf879a2dba2b8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:36 -0800 Subject: net: dsa: b53: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index db830a1141d9..63e02a54d537 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -814,8 +814,8 @@ void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) unsigned int i; for (i = 0; i < mib_size; i++) - memcpy(data + i * ETH_GSTRING_LEN, - mibs[i].name, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + mibs[i].name, ETH_GSTRING_LEN); } EXPORT_SYMBOL(b53_get_strings); -- cgit From 98409b2bbca36b578be8e66758e6acb96e4c91da Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:37 -0800 Subject: net: phy: marvell: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: d2fa47d9dd5c ("phy: marvell: Add ethtool statistics counters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 22d9bc9c33a4..0e0978d8a0eb 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1452,8 +1452,8 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data) int i; for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) { - memcpy(data + i * ETH_GSTRING_LEN, - marvell_hw_stats[i].string, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + marvell_hw_stats[i].string, ETH_GSTRING_LEN); } } -- cgit From 55f53567afe5f0cd2fd9e006b174c08c31c466f8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:38 -0800 Subject: net: phy: micrel: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 0f45310300f6..49be85afbea9 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -664,8 +664,8 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data) int i; for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) { - memcpy(data + i * ETH_GSTRING_LEN, - kszphy_hw_stats[i].string, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + kszphy_hw_stats[i].string, ETH_GSTRING_LEN); } } -- cgit From 8a17eefa235f73b60c0ca7d397d2e4f66f85f413 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Mar 2018 15:08:39 -0800 Subject: net: phy: broadcom: Use strlcpy() for ethtool::get_strings Our statistics strings are allocated at initialization without being bound to a specific size, yet, we would copy ETH_GSTRING_LEN bytes using memcpy() which would create out of bounds accesses, this was flagged by KASAN. Replace this with strlcpy() to make sure we are bound the source buffer size and we also always NUL-terminate strings. Fixes: 820ee17b8d3b ("net: phy: broadcom: Add support code for reading PHY counters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm-phy-lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 171010eb4d9c..5ad130c3da43 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -341,8 +341,8 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data) unsigned int i; for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++) - memcpy(data + i * ETH_GSTRING_LEN, - bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN); + strlcpy(data + i * ETH_GSTRING_LEN, + bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN); } EXPORT_SYMBOL_GPL(bcm_phy_get_strings); -- cgit From 22b8d8115d1b940d548a5fddcbce4cdd30083cfc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 2 Mar 2018 16:34:22 -0800 Subject: ethernet: natsemi: correct spelling Correct spelling of National Semi-conductor (no hyphen) in drivers/net/ethernet/. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/Kconfig | 2 +- drivers/net/ethernet/natsemi/Kconfig | 6 +++--- drivers/net/ethernet/natsemi/Makefile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 29c3075bfb05..fdc673484add 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -3,7 +3,7 @@ # config NET_VENDOR_8390 - bool "National Semi-conductor 8390 devices" + bool "National Semiconductor 8390 devices" default y depends on NET_VENDOR_NATSEMI ---help--- diff --git a/drivers/net/ethernet/natsemi/Kconfig b/drivers/net/ethernet/natsemi/Kconfig index a10ef50e4f12..017fb2322589 100644 --- a/drivers/net/ethernet/natsemi/Kconfig +++ b/drivers/net/ethernet/natsemi/Kconfig @@ -1,16 +1,16 @@ # -# National Semi-conductor device configuration +# National Semiconductor device configuration # config NET_VENDOR_NATSEMI - bool "National Semi-conductor devices" + bool "National Semiconductor devices" default y ---help--- If you have a network (Ethernet) card belonging to this class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all - the questions about National Semi-conductor devices. If you say Y, + the questions about National Semiconductor devices. If you say Y, you will be asked for your specific card in the following questions. if NET_VENDOR_NATSEMI diff --git a/drivers/net/ethernet/natsemi/Makefile b/drivers/net/ethernet/natsemi/Makefile index cc664977596e..a759aa09ef59 100644 --- a/drivers/net/ethernet/natsemi/Makefile +++ b/drivers/net/ethernet/natsemi/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Makefile for the National Semi-conductor Sonic devices. +# Makefile for the National Semiconductor Sonic devices. # obj-$(CONFIG_MACSONIC) += macsonic.o -- cgit From 2cbb4ea7de167b02ffa63e9cdfdb07a7e7094615 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 16 Feb 2018 11:03:03 -0800 Subject: net: Only honor ifindex in IP_PKTINFO if non-0 Only allow ifindex from IP_PKTINFO to override SO_BINDTODEVICE settings if the index is actually set in the message. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 9c41a0cef1a5..74c962b9b09c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) return -EINVAL; - ipc->oif = src_info->ipi6_ifindex; + if (src_info->ipi6_ifindex) + ipc->oif = src_info->ipi6_ifindex; ipc->addr = src_info->ipi6_addr.s6_addr32[3]; continue; } @@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; info = (struct in_pktinfo *)CMSG_DATA(cmsg); - ipc->oif = info->ipi_ifindex; + if (info->ipi_ifindex) + ipc->oif = info->ipi_ifindex; ipc->addr = info->ipi_spec_dst.s_addr; break; } -- cgit From 20d5de51e7052ae3fb645d8c5c584ee7383b2a93 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 6 Mar 2018 14:50:10 +0100 Subject: tools: bpftool: fix compilation with older headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compilation of bpftool on a distro that lacks eBPF support in the installed kernel headers fails with: common.c: In function ‘is_bpffs’: common.c:96:40: error: ‘BPF_FS_MAGIC’ undeclared (first use in this function) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; ^ Fix this the same way it is already in tools/lib/bpf/libbpf.c and tools/lib/api/fs/fs.c. Signed-off-by: Jiri Benc Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 0b482c0070e0..465995281dcd 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -55,6 +55,10 @@ #include "main.h" +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + void p_err(const char *fmt, ...) { va_list ap; -- cgit From 87cdf3148b11d46382dbce2754ae7036aba96380 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Sun, 4 Mar 2018 21:03:52 +0200 Subject: xfrm: Verify MAC header exists before overwriting eth_hdr(skb)->h_proto Artem Savkov reported that commit 5efec5c655dd leads to a packet loss under IPSec configuration. It appears that his setup consists of a TUN device, which does not have a MAC header. Make sure MAC header exists. Note: TUN device sets a MAC header pointer, although it does not have one. Fixes: 5efec5c655dd ("xfrm: Fix eth_hdr(skb)->h_proto to reflect inner IP version") Reported-by: Artem Savkov Tested-by: Artem Savkov Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_mode_tunnel.c | 3 ++- net/ipv6/xfrm6_mode_tunnel.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 63faeee989a9..2a9764bd1719 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); - eth_hdr(skb)->h_proto = skb->protocol; + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index bb935a3b7fea..de1b0b8c53b0 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); - eth_hdr(skb)->h_proto = skb->protocol; + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; err = 0; -- cgit From 933897342d0714ae1c10729cbaeecea0c6178db5 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 28 Feb 2018 21:15:19 +0100 Subject: brcmfmac: add possibility to obtain firmware error The feature module needs to evaluate the actual firmware error return upon a control command. This adds a flag to struct brcmf_if that the caller can set. This flag is checked to determine the error code that needs to be returned. Fixes: b69c1df47281 ("brcmfmac: separate firmware errors from i/o errors") Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h | 2 ++ drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c | 10 ++++++++++ drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c | 3 +++ 3 files changed, 15 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index df8a1ecb9924..232dcbb83311 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -181,6 +181,7 @@ enum brcmf_netif_stop_reason { * @netif_stop_lock: spinlock for update netif_stop from multiple sources. * @pend_8021x_cnt: tracks outstanding number of 802.1x frames. * @pend_8021x_wait: used for signalling change in count. + * @fwil_fwerr: flag indicating fwil layer should return firmware error codes. */ struct brcmf_if { struct brcmf_pub *drvr; @@ -198,6 +199,7 @@ struct brcmf_if { wait_queue_head_t pend_8021x_wait; struct in6_addr ipv6_addr_tbl[NDOL_MAX_ENTRIES]; u8 ipv6addr_idx; + bool fwil_fwerr; }; int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index 47de35a33853..bede7b7fd996 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -104,6 +104,9 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp, u32 data; int err; + /* we need to know firmware error */ + ifp->fwil_fwerr = true; + err = brcmf_fil_iovar_int_get(ifp, name, &data); if (err == 0) { brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]); @@ -112,6 +115,8 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp, brcmf_dbg(TRACE, "%s feature check failed: %d\n", brcmf_feat_names[id], err); } + + ifp->fwil_fwerr = false; } static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp, @@ -120,6 +125,9 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp, { int err; + /* we need to know firmware error */ + ifp->fwil_fwerr = true; + err = brcmf_fil_iovar_data_set(ifp, name, data, len); if (err != -BRCMF_FW_UNSUPPORTED) { brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]); @@ -128,6 +136,8 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp, brcmf_dbg(TRACE, "%s feature check failed: %d\n", brcmf_feat_names[id], err); } + + ifp->fwil_fwerr = false; } #define MAX_CAPS_BUFFER_SIZE 512 diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c index f2cfdd3b2bf1..fc5751116d99 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c @@ -131,6 +131,9 @@ brcmf_fil_cmd_data(struct brcmf_if *ifp, u32 cmd, void *data, u32 len, bool set) brcmf_fil_get_errstr((u32)(-fwerr)), fwerr); err = -EBADE; } + if (ifp->fwil_fwerr) + return fwerr; + return err; } -- cgit From 455f3e76cfc0d893585a5f358b9ddbe9c1e1e53b Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 28 Feb 2018 21:15:20 +0100 Subject: brcmfmac: fix P2P_DEVICE ethernet address generation The firmware has a requirement that the P2P_DEVICE address should be different from the address of the primary interface. When not specified by user-space, the driver generates the MAC address for the P2P_DEVICE interface using the MAC address of the primary interface and setting the locally administered bit. However, the MAC address of the primary interface may already have that bit set causing the creation of the P2P_DEVICE interface to fail with -EBUSY. Fix this by using a random address instead to determine the P2P_DEVICE address. Cc: stable@vger.kernel.org # 3.10.y Reported-by: Hans de Goede Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 2ee54133efa1..82064e909784 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -462,25 +462,23 @@ static int brcmf_p2p_set_firmware(struct brcmf_if *ifp, u8 *p2p_mac) * @dev_addr: optional device address. * * P2P needs mac addresses for P2P device and interface. If no device - * address it specified, these are derived from the primary net device, ie. - * the permanent ethernet address of the device. + * address it specified, these are derived from a random ethernet + * address. */ static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr) { - struct brcmf_if *pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp; - bool local_admin = false; + bool random_addr = false; - if (!dev_addr || is_zero_ether_addr(dev_addr)) { - dev_addr = pri_ifp->mac_addr; - local_admin = true; - } + if (!dev_addr || is_zero_ether_addr(dev_addr)) + random_addr = true; - /* Generate the P2P Device Address. This consists of the device's - * primary MAC address with the locally administered bit set. + /* Generate the P2P Device Address obtaining a random ethernet + * address with the locally administered bit set. */ - memcpy(p2p->dev_addr, dev_addr, ETH_ALEN); - if (local_admin) - p2p->dev_addr[0] |= 0x02; + if (random_addr) + eth_random_addr(p2p->dev_addr); + else + memcpy(p2p->dev_addr, dev_addr, ETH_ALEN); /* Generate the P2P Interface Address. If the discovery and connection * BSSCFGs need to simultaneously co-exist, then this address must be -- cgit From 25b5cdfcce1b57971840505dfc78556bd12dea6d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Mar 2018 16:01:48 +0100 Subject: dt-bindings: net: renesas-ravb: Make stream buffer optional The Stream Buffer for EtherAVB-IF (STBE) is an optional component, and is not present on all SoCs. Document this in the DT bindings, including a list of SoCs that do have it. Fixes: 785ec87483d1e24a ("ravb: document R8A77970 bindings") Fixes: f231c4178a655b09 ("dt-bindings: net: renesas-ravb: Add support for R8A77995 RAVB") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Acked-by: Sergei Shtylyov Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 92fd4b2f17b2..b4dc455eb155 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -27,7 +27,11 @@ Required properties: SoC-specific version corresponding to the platform first followed by the generic version. -- reg: offset and length of (1) the register block and (2) the stream buffer. +- reg: Offset and length of (1) the register block and (2) the stream buffer. + The region for the register block is mandatory. + The region for the stream buffer is optional, as it is only present on + R-Car Gen2 and RZ/G1 SoCs, and on R-Car H3 (R8A7795), M3-W (R8A7796), + and M3-N (R8A77965). - interrupts: A list of interrupt-specifiers, one for each entry in interrupt-names. If interrupt-names is not present, an interrupt specifier -- cgit From d3dcf8eb615537526bd42ff27a081d46d337816e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:48 +0200 Subject: rhashtable: Fix rhlist duplicates insertion When inserting duplicate objects (those with the same key), current rhlist implementation messes up the chain pointers by updating the bucket pointer instead of prev next pointer to the newly inserted node. This causes missing elements on removal and travesal. Fix that by properly updating pprev pointer to point to the correct rhash_head next pointer. Issue: 1241076 Change-Id: I86b2c140bcb4aeb10b70a72a267ff590bb2b17e7 Fixes: ca26893f05e8 ('rhashtable: Add rhlist interface') Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- lib/rhashtable.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c9df2527e0cd..668a21f04b09 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -766,8 +766,10 @@ slow_path: if (!key || (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } data = rht_obj(ht, head); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 3825c30aaa36..47de025b6245 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -506,8 +506,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, if (!key || (ht->p.obj_cmpfn ? ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } if (!ht->rhlist) return rht_obj(ht, head); -- cgit From 499ac3b60f657dae82055fc81c7b01e6242ac9bc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:49 +0200 Subject: test_rhashtable: add test case for rhltable with duplicate objects Tries to insert duplicates in the middle of bucket's chain: bucket 1: [[val 21 (tid=1)]] -> [[ val 1 (tid=2), val 1 (tid=0) ]] Reuses tid to distinguish the elements insertion order. Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 76d3667fdea2..f4000c137dbe 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -79,6 +79,21 @@ struct thread_data { struct test_obj *objs; }; +static u32 my_hashfn(const void *data, u32 len, u32 seed) +{ + const struct test_obj_rhl *obj = data; + + return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE; +} + +static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct test_obj_rhl *test_obj = obj; + const struct test_obj_val *val = arg->key; + + return test_obj->value.id - val->id; +} + static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), @@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = { .nulls_base = (3U << RHT_BASE_SHIFT), }; +static struct rhashtable_params test_rht_params_dup = { + .head_offset = offsetof(struct test_obj_rhl, list_node), + .key_offset = offsetof(struct test_obj_rhl, value), + .key_len = sizeof(struct test_obj_val), + .hashfn = jhash, + .obj_hashfn = my_hashfn, + .obj_cmpfn = my_cmpfn, + .nelem_hint = 128, + .automatic_shrinking = false, +}; + static struct semaphore prestart_sem; static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0); @@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array, return err; } +static unsigned int __init print_ht(struct rhltable *rhlt) +{ + struct rhashtable *ht; + const struct bucket_table *tbl; + char buff[512] = ""; + unsigned int i, cnt = 0; + + ht = &rhlt->ht; + tbl = rht_dereference(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) { + struct rhash_head *pos, *next; + struct test_obj_rhl *p; + + pos = rht_dereference(tbl->buckets[i], ht); + next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; + + if (!rht_is_a_nulls(pos)) { + sprintf(buff, "%s\nbucket[%d] -> ", buff, i); + } + + while (!rht_is_a_nulls(pos)) { + struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead); + sprintf(buff, "%s[[", buff); + do { + pos = &list->rhead; + list = rht_dereference(list->next, ht); + p = rht_obj(ht, pos); + + sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid, + list? ", " : " "); + cnt++; + } while (list); + + pos = next, + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL; + + sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : ""); + } + } + printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff); + + return cnt; +} + +static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects, + int cnt, bool slow) +{ + struct rhltable rhlt; + unsigned int i, ret; + const char *key; + int err = 0; + + err = rhltable_init(&rhlt, &test_rht_params_dup); + if (WARN_ON(err)) + return err; + + for (i = 0; i < cnt; i++) { + rhl_test_objects[i].value.tid = i; + key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead); + key += test_rht_params_dup.key_offset; + + if (slow) { + err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key, + &rhl_test_objects[i].list_node.rhead)); + if (err == -EAGAIN) + err = 0; + } else + err = rhltable_insert(&rhlt, + &rhl_test_objects[i].list_node, + test_rht_params_dup); + if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast")) + goto skip_print; + } + + ret = print_ht(&rhlt); + WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast"); + +skip_print: + rhltable_destroy(&rhlt); + + return 0; +} + +static int __init test_insert_duplicates_run(void) +{ + struct test_obj_rhl rhl_test_objects[3] = {}; + + pr_info("test inserting duplicates\n"); + + /* two different values that map to same bucket */ + rhl_test_objects[0].value.id = 1; + rhl_test_objects[1].value.id = 21; + + /* and another duplicate with same as [0] value + * which will be second on the bucket list */ + rhl_test_objects[2].value.id = rhl_test_objects[0].value.id; + + test_insert_dup(rhl_test_objects, 2, false); + test_insert_dup(rhl_test_objects, 3, false); + test_insert_dup(rhl_test_objects, 2, true); + test_insert_dup(rhl_test_objects, 3, true); + + return 0; +} + static int thread_lookup_test(struct thread_data *tdata) { unsigned int entries = tdata->entries; @@ -613,6 +745,8 @@ static int __init test_rht_init(void) do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); + test_insert_duplicates_run(); + if (!tcount) return 0; -- cgit From 803fafbe0cd522fa6b9e41ca3b96cfb2e2a2222d Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Sun, 4 Mar 2018 21:48:17 +0300 Subject: fsl/fman: avoid sleeping in atomic context while adding an address __dev_mc_add grabs an adress spinlock so use atomic context in kmalloc. / # ifconfig eth0 inet 192.168.0.111 [ 89.331622] BUG: sleeping function called from invalid context at mm/slab.h:420 [ 89.339002] in_atomic(): 1, irqs_disabled(): 0, pid: 1035, name: ifconfig [ 89.345799] 2 locks held by ifconfig/1035: [ 89.349908] #0: (rtnl_mutex){+.+.}, at: [<(ptrval)>] devinet_ioctl+0xc0/0x8a0 [ 89.357258] #1: (_xmit_ETHER){+...}, at: [<(ptrval)>] __dev_mc_add+0x28/0x80 [ 89.364520] CPU: 1 PID: 1035 Comm: ifconfig Not tainted 4.16.0-rc3-dirty #8 [ 89.371464] Call Trace: [ 89.373908] [e959db60] [c066f948] dump_stack+0xa4/0xfc (unreliable) [ 89.380177] [e959db80] [c00671d8] ___might_sleep+0x248/0x280 [ 89.385833] [e959dba0] [c01aec34] kmem_cache_alloc_trace+0x174/0x320 [ 89.392179] [e959dbd0] [c04ab920] dtsec_add_hash_mac_address+0x130/0x240 [ 89.398874] [e959dc00] [c04a9d74] set_multi+0x174/0x1b0 [ 89.404093] [e959dc30] [c04afb68] dpaa_set_rx_mode+0x68/0xe0 [ 89.409745] [e959dc40] [c057baf8] __dev_mc_add+0x58/0x80 [ 89.415052] [e959dc60] [c060fd64] igmp_group_added+0x164/0x190 [ 89.420878] [e959dca0] [c060ffa8] ip_mc_inc_group+0x218/0x460 [ 89.426617] [e959dce0] [c06120fc] ip_mc_up+0x3c/0x190 [ 89.431662] [e959dd10] [c0607270] inetdev_event+0x250/0x620 [ 89.437227] [e959dd50] [c005f190] notifier_call_chain+0x80/0xf0 [ 89.443138] [e959dd80] [c0573a74] __dev_notify_flags+0x54/0xf0 [ 89.448964] [e959dda0] [c05743f8] dev_change_flags+0x48/0x60 [ 89.454615] [e959ddc0] [c0606744] devinet_ioctl+0x544/0x8a0 [ 89.460180] [e959de10] [c060987c] inet_ioctl+0x9c/0x1f0 [ 89.465400] [e959de80] [c05479a8] sock_ioctl+0x168/0x460 [ 89.470708] [e959ded0] [c01cf3ec] do_vfs_ioctl+0xac/0x8c0 [ 89.476099] [e959df20] [c01cfc40] SyS_ioctl+0x40/0xc0 [ 89.481147] [e959df40] [c0011318] ret_from_syscall+0x0/0x3c [ 89.486715] --- interrupt: c01 at 0x1006943c [ 89.486715] LR = 0x100c45ec Signed-off-by: Denis Kirjanov Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index ea43b4974149..7af31ddd093f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1100,7 +1100,7 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr) set_bucket(dtsec->regs, bucket, true); /* Create element to be added to the driver hash table */ - hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL); + hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC); if (!hash_entry) return -ENOMEM; hash_entry->addr = addr; -- cgit From 35d889d10b649fda66121891ec05eca88150059d Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Mon, 5 Mar 2018 20:52:54 +0300 Subject: sch_netem: fix skb leak in netem_enqueue() When we exceed current packets limit and we have more than one segment in the list returned by skb_gso_segment(), netem drops only the first one, skipping the rest, hence kmemleak reports: unreferenced object 0xffff880b5d23b600 (size 1024): comm "softirq", pid 0, jiffies 4384527763 (age 2770.629s) hex dump (first 32 bytes): 00 80 23 5d 0b 88 ff ff 00 00 00 00 00 00 00 00 ..#]............ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d8a19b9d>] __alloc_skb+0xc9/0x520 [<000000001709b32f>] skb_segment+0x8c8/0x3710 [<00000000c7b9bb88>] tcp_gso_segment+0x331/0x1830 [<00000000c921cba1>] inet_gso_segment+0x476/0x1370 [<000000008b762dd4>] skb_mac_gso_segment+0x1f9/0x510 [<000000002182660a>] __skb_gso_segment+0x1dd/0x620 [<00000000412651b9>] netem_enqueue+0x1536/0x2590 [sch_netem] [<0000000005d3b2a9>] __dev_queue_xmit+0x1167/0x2120 [<00000000fc5f7327>] ip_finish_output2+0x998/0xf00 [<00000000d309e9d3>] ip_output+0x1aa/0x2c0 [<000000007ecbd3a4>] tcp_transmit_skb+0x18db/0x3670 [<0000000042d2a45f>] tcp_write_xmit+0x4d4/0x58c0 [<0000000056a44199>] tcp_tasklet_func+0x3d9/0x540 [<0000000013d06d02>] tasklet_action+0x1ca/0x250 [<00000000fcde0b8b>] __do_softirq+0x1b4/0x5a3 [<00000000e7ed027c>] irq_exit+0x1e2/0x210 Fix it by adding the rest of the segments, if any, to skb 'to_free' list. Add new __qdisc_drop_all() and qdisc_drop_all() functions because they can be useful in the future if we need to drop segmented GSO packets in other places. Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue") Signed-off-by: Alexey Kodanev Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sch_generic.h | 19 +++++++++++++++++++ net/sched/sch_netem.c | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e2ab13687fb9..2092d33194dd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -824,6 +824,16 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) *to_free = skb; } +static inline void __qdisc_drop_all(struct sk_buff *skb, + struct sk_buff **to_free) +{ + if (skb->prev) + skb->prev->next = *to_free; + else + skb->next = *to_free; + *to_free = skb; +} + static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, struct qdisc_skb_head *qh, struct sk_buff **to_free) @@ -956,6 +966,15 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_DROP; } +static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + __qdisc_drop_all(skb, to_free); + qdisc_qstats_drop(sch); + + return NET_XMIT_DROP; +} + /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how long it will take to send a packet given its size. */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 7c179addebcd..7d6801fc5340 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_all(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); -- cgit From 2695578b896aea472b2c0dcbe9d92daa71738484 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2018 11:41:13 -0800 Subject: net: usbnet: fix potential deadlock on 32bit hosts Marek reported a LOCKDEP issue occurring on 32bit host, that we tracked down to the fact that usbnet could either run from soft or hard irqs. This patch adds u64_stats_update_begin_irqsave() and u64_stats_update_end_irqrestore() helpers to solve this case. [ 17.768040] ================================ [ 17.772239] WARNING: inconsistent lock state [ 17.776511] 4.16.0-rc3-next-20180227-00007-g876c53a7493c #453 Not tainted [ 17.783329] -------------------------------- [ 17.787580] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 17.793607] swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 17.798751] (&syncp->seq#5){?.-.}, at: [<9b22e5f0>] asix_rx_fixup_internal+0x188/0x288 [ 17.806790] {IN-HARDIRQ-W} state was registered at: [ 17.811677] tx_complete+0x100/0x208 [ 17.815319] __usb_hcd_giveback_urb+0x60/0xf0 [ 17.819770] xhci_giveback_urb_in_irq+0xa8/0x240 [ 17.824469] xhci_td_cleanup+0xf4/0x16c [ 17.828367] xhci_irq+0xe74/0x2240 [ 17.831827] usb_hcd_irq+0x24/0x38 [ 17.835343] __handle_irq_event_percpu+0x98/0x510 [ 17.840111] handle_irq_event_percpu+0x1c/0x58 [ 17.844623] handle_irq_event+0x38/0x5c [ 17.848519] handle_fasteoi_irq+0xa4/0x138 [ 17.852681] generic_handle_irq+0x18/0x28 [ 17.856760] __handle_domain_irq+0x6c/0xe4 [ 17.860941] gic_handle_irq+0x54/0xa0 [ 17.864666] __irq_svc+0x70/0xb0 [ 17.867964] arch_cpu_idle+0x20/0x3c [ 17.871578] arch_cpu_idle+0x20/0x3c [ 17.875190] do_idle+0x144/0x218 [ 17.878468] cpu_startup_entry+0x18/0x1c [ 17.882454] start_kernel+0x394/0x400 [ 17.886177] irq event stamp: 161912 [ 17.889616] hardirqs last enabled at (161912): [<7bedfacf>] __netdev_alloc_skb+0xcc/0x140 [ 17.897893] hardirqs last disabled at (161911): [] __netdev_alloc_skb+0x94/0x140 [ 17.904903] exynos5-hsi2c 12ca0000.i2c: tx timeout [ 17.906116] softirqs last enabled at (161904): [<387102ff>] irq_enter+0x78/0x80 [ 17.906123] softirqs last disabled at (161905): [] irq_exit+0x134/0x158 [ 17.925722]. [ 17.925722] other info that might help us debug this: [ 17.933435] Possible unsafe locking scenario: [ 17.933435]. [ 17.940331] CPU0 [ 17.942488] ---- [ 17.944894] lock(&syncp->seq#5); [ 17.948274] [ 17.950847] lock(&syncp->seq#5); [ 17.954386]. [ 17.954386] *** DEADLOCK *** [ 17.954386]. [ 17.962422] no locks held by swapper/0/0. Fixes: c8b5d129ee29 ("net: usbnet: support 64bit stats") Signed-off-by: Eric Dumazet Reported-by: Marek Szyprowski Cc: Greg Ungerer Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 10 ++++++---- include/linux/u64_stats_sync.h | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 8a22ff67b026..d9eea8cfe6cb 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -315,6 +315,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev) void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) { struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64); + unsigned long flags; int status; if (test_bit(EVENT_RX_PAUSED, &dev->flags)) { @@ -326,10 +327,10 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) if (skb->protocol == 0) skb->protocol = eth_type_trans (skb, dev->net); - u64_stats_update_begin(&stats64->syncp); + flags = u64_stats_update_begin_irqsave(&stats64->syncp); stats64->rx_packets++; stats64->rx_bytes += skb->len; - u64_stats_update_end(&stats64->syncp); + u64_stats_update_end_irqrestore(&stats64->syncp, flags); netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n", skb->len + sizeof (struct ethhdr), skb->protocol); @@ -1248,11 +1249,12 @@ static void tx_complete (struct urb *urb) if (urb->status == 0) { struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64); + unsigned long flags; - u64_stats_update_begin(&stats64->syncp); + flags = u64_stats_update_begin_irqsave(&stats64->syncp); stats64->tx_packets += entry->packets; stats64->tx_bytes += entry->length; - u64_stats_update_end(&stats64->syncp); + u64_stats_update_end_irqrestore(&stats64->syncp, flags); } else { dev->net->stats.tx_errors++; diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 5bdbd9f49395..07ee0f84a46c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -90,6 +90,28 @@ static inline void u64_stats_update_end(struct u64_stats_sync *syncp) #endif } +static inline unsigned long +u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags = 0; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + local_irq_save(flags); + write_seqcount_begin(&syncp->seq); +#endif + return flags; +} + +static inline void +u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_end(&syncp->seq); + local_irq_restore(flags); +#endif +} + static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) -- cgit From 9de506a547c0d172d13a91d69b1a399e6a2c0efa Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 5 Mar 2018 23:50:46 +0200 Subject: qed: Free RoCE ILT Memory on rmmod qedr Rdma requires ILT Memory to be allocated for it's QPs. Each ILT entry points to a page used by several Rdma QPs. To avoid allocating all the memory in advance, the rdma implementation dynamically allocates memory as more QPs are added, however it does not dynamically free the memory. The memory should have been freed on rmmod qedr, but isn't. This patch adds the memory freeing on rmmod qedr (currently it will be freed with qed is removed). An outcome of this bug, is that if qedr is unloaded and loaded without unloaded qed, there will be no more RoCE traffic. The reason these are related, is that the logic of detecting the first QP ever opened is by asking whether ILT memory for RoCE has been allocated. In addition, this patch modifies freeing of the Task context to always use the PROTOCOLID_ROCE and not the protocol passed, this is because task context for iWARP and ROCE both use the ROCE protocol id, as opposed to the connection context. Fixes: dbb799c39717 ("qed: Initialize hardware for new protocols") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 5 ++++- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 6f546e869d8d..00f41c145d4d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2480,7 +2480,10 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto) if (rc) return rc; - /* Free Task CXT */ + /* Free Task CXT ( Intentionally RoCE as task-id is shared between + * RoCE and iWARP ) + */ + proto = PROTOCOLID_ROCE; rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_TASK, 0, qed_cxt_get_proto_tid_count(p_hwfn, proto)); if (rc) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 5d040b873137..f3ee6538b553 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -380,6 +380,7 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn) qed_rdma_free_reserved_lkey(p_hwfn); qed_rdma_resc_free(p_hwfn); + qed_cxt_free_proto_ilt(p_hwfn, p_hwfn->p_rdma_info->proto); } static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) -- cgit From cc5db3150e87fe7f7e947bf333b6c1c97f848ecb Mon Sep 17 00:00:00 2001 From: Hemanth Puranik Date: Tue, 6 Mar 2018 08:18:06 +0530 Subject: net: qcom/emac: Use proper free methods during TX This patch fixes the warning messages/call traces seen if DMA debug is enabled, In case of fragmented skb's memory was allocated using dma_map_page but freed using dma_unmap_single. This patch modifies buffer allocations in TX path to use dma_map_page in all the places and dma_unmap_page while freeing the buffers. Signed-off-by: Hemanth Puranik Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 9cbb27263742..d5a32b7c7dc5 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1194,9 +1194,9 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) while (tx_q->tpd.consume_idx != hw_consume_idx) { tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx); if (tpbuf->dma_addr) { - dma_unmap_single(adpt->netdev->dev.parent, - tpbuf->dma_addr, tpbuf->length, - DMA_TO_DEVICE); + dma_unmap_page(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); tpbuf->dma_addr = 0; } @@ -1353,9 +1353,11 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt, tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); tpbuf->length = mapped_len; - tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, - skb->data, tpbuf->length, - DMA_TO_DEVICE); + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + virt_to_page(skb->data), + offset_in_page(skb->data), + tpbuf->length, + DMA_TO_DEVICE); ret = dma_mapping_error(adpt->netdev->dev.parent, tpbuf->dma_addr); if (ret) @@ -1371,9 +1373,12 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt, if (mapped_len < len) { tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); tpbuf->length = len - mapped_len; - tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, - skb->data + mapped_len, - tpbuf->length, DMA_TO_DEVICE); + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + virt_to_page(skb->data + + mapped_len), + offset_in_page(skb->data + + mapped_len), + tpbuf->length, DMA_TO_DEVICE); ret = dma_mapping_error(adpt->netdev->dev.parent, tpbuf->dma_addr); if (ret) -- cgit From e9fa1495d738e34fcec88a3d2ec9101a9ee5b310 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 6 Mar 2018 11:10:19 +0100 Subject: ipv6: Reflect MTU changes on PMTU of exceptions for MTU-less routes Currently, administrative MTU changes on a given netdevice are not reflected on route exceptions for MTU-less routes, with a set PMTU value, for that device: # ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a proto kernel src 2001:db8::a metric 256 pref medium # ping6 -c 1 -q -s10000 2001:db8::b > /dev/null # ip netns exec a ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0 cache expires 571sec mtu 4926 pref medium # ip link set dev vti_a mtu 3000 # ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0 cache expires 571sec mtu 4926 pref medium # ip link set dev vti_a mtu 9000 # ip -6 route get 2001:db8::b 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0 cache expires 571sec mtu 4926 pref medium The first issue is that since commit fb56be83e43d ("net-ipv6: on device mtu change do not add mtu to mtu-less routes") we don't call rt6_exceptions_update_pmtu() from rt6_mtu_change_route(), which handles administrative MTU changes, if the regular route is MTU-less. However, PMTU exceptions should be always updated, as long as RTAX_MTU is not locked. Keep the check for MTU-less main route, as introduced by that commit, but, for exceptions, call rt6_exceptions_update_pmtu() regardless of that check. Once that is fixed, one problem remains: MTU changes are not reflected if the new MTU is higher than the previous one, because rt6_exceptions_update_pmtu() doesn't allow that. We should instead allow PMTU increase if the old PMTU matches the local MTU, as that implies that the old MTU was the lowest in the path, and PMTU discovery might lead to different results. The existing check in rt6_mtu_change_route() correctly took that case into account (for regular routes only), so factor it out and re-use it also in rt6_exceptions_update_pmtu(). While at it, fix comments style and grammar, and try to be a bit more descriptive. Reported-by: Xiumei Mu Fixes: fb56be83e43d ("net-ipv6: on device mtu change do not add mtu to mtu-less routes") Fixes: f5bbe7ee79c2 ("ipv6: prepare rt6_mtu_change() for exception table") Signed-off-by: Stefano Brivio Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 71 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9dcfadddd800..0db4218c9186 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1509,7 +1509,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) } } -static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) +static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, + struct rt6_info *rt, int mtu) +{ + /* If the new MTU is lower than the route PMTU, this new MTU will be the + * lowest MTU in the path: always allow updating the route PMTU to + * reflect PMTU decreases. + * + * If the new MTU is higher, and the route PMTU is equal to the local + * MTU, this means the old MTU is the lowest in the path, so allow + * updating it: if other nodes now have lower MTUs, PMTU discovery will + * handle this. + */ + + if (dst_mtu(&rt->dst) >= mtu) + return true; + + if (dst_mtu(&rt->dst) == idev->cnf.mtu6) + return true; + + return false; +} + +static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, + struct rt6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1518,20 +1541,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, lockdep_is_held(&rt6_exception_lock)); - if (bucket) { - for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { - hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { - struct rt6_info *entry = rt6_ex->rt6i; - /* For RTF_CACHE with rt6i_pmtu == 0 - * (i.e. a redirected route), - * the metrics of its rt->dst.from has already - * been updated. - */ - if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu) - entry->rt6i_pmtu = mtu; - } - bucket++; + if (!bucket) + return; + + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + + /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected + * route), the metrics of its rt->dst.from have already + * been updated. + */ + if (entry->rt6i_pmtu && + rt6_mtu_change_route_allowed(idev, entry, mtu)) + entry->rt6i_pmtu = mtu; } + bucket++; } } @@ -3809,25 +3834,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - /* - If new MTU is less than route PMTU, this new MTU will be the - lowest MTU in the path, update the route PMTU to reflect PMTU - decreases; if new MTU is greater than route PMTU, and the - old MTU is the lowest MTU in the path, update the route PMTU - to reflect the increase. In this case if the other nodes' MTU - also have the lowest MTU, TOO BIG MESSAGE will be lead to - PMTU discovery. - */ if (rt->dst.dev == arg->dev && - dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { spin_lock_bh(&rt6_exception_lock); - if (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + if (dst_metric_raw(&rt->dst, RTAX_MTU) && + rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); - } - rt6_exceptions_update_pmtu(rt, arg->mtu); + rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } return 0; -- cgit From e06513d78d54e6c7026c9043a39e2c01ee25bdbe Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Tue, 6 Mar 2018 09:00:06 -0600 Subject: net: smsc911x: Fix unload crash when link is up The smsc911x driver will crash if it is rmmod'ed while the netdev is up like: Call trace: phy_detach+0x94/0x150 phy_disconnect+0x40/0x50 smsc911x_stop+0x104/0x128 [smsc911x] __dev_close_many+0xb4/0x138 dev_close_many+0xbc/0x190 rollback_registered_many+0x140/0x460 rollback_registered+0x68/0xb0 unregister_netdevice_queue+0x100/0x118 unregister_netdev+0x28/0x38 smsc911x_drv_remove+0x58/0x130 [smsc911x] platform_drv_remove+0x30/0x50 device_release_driver_internal+0x15c/0x1f8 driver_detach+0x54/0x98 bus_remove_driver+0x64/0xe8 driver_unregister+0x34/0x60 platform_driver_unregister+0x20/0x30 smsc911x_cleanup_module+0x14/0xbca8 [smsc911x] SyS_delete_module+0x1e8/0x238 __sys_trace_return+0x0/0x4 This is caused by the mdiobus being unregistered/free'd and the code in phy_detach() attempting to manipulate mdio related structures from unregister_netdev() calling close() To fix this, we delay the mdiobus teardown until after the netdev is deregistered. Reported-by: Matt Sealey Signed-off-by: Jeremy Linton Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 012fb66eed8d..f0afb88d7bc2 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2335,14 +2335,14 @@ static int smsc911x_drv_remove(struct platform_device *pdev) pdata = netdev_priv(dev); BUG_ON(!pdata); BUG_ON(!pdata->ioaddr); - WARN_ON(dev->phydev); SMSC_TRACE(pdata, ifdown, "Stopping driver"); + unregister_netdev(dev); + mdiobus_unregister(pdata->mii_bus); mdiobus_free(pdata->mii_bus); - unregister_netdev(dev); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smsc911x-memory"); if (!res) -- cgit From a560002437d3646dafccecb1bf32d1685112ddda Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 6 Mar 2018 18:46:39 +0300 Subject: net: Fix hlist corruptions in inet_evict_bucket() inet_evict_bucket() iterates global list, and several tasks may call it in parallel. All of them hash the same fq->list_evictor to different lists, which leads to list corruption. This patch makes fq be hashed to expired list only if this has not been made yet by another task. Since inet_frag_alloc() allocates fq using kmem_cache_zalloc(), we may rely on list_evictor is initially unhashed. The problem seems to exist before async pernet_operations, as there was possible to have exit method to be executed in parallel with inet_frags::frags_work, so I add two Fixes tags. This also may go to stable. Fixes: d1fe19444d82 "inet: frag: don't re-use chainlist for evictor" Fixes: f84c6821aa54 "net: Convert pernet_subsys, registered from inet_init()" Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 26a3d0315728..e8ec28999f5c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -119,6 +119,9 @@ out: static bool inet_fragq_should_evict(const struct inet_frag_queue *q) { + if (!hlist_unhashed(&q->list_evictor)) + return false; + return q->net->low_thresh == 0 || frag_mem_limit(q->net) >= q->net->low_thresh; } -- cgit From 17cfe79a65f98abe535261856c5aef14f306dff7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Mar 2018 07:54:53 -0800 Subject: l2tp: do not accept arbitrary sockets syzkaller found an issue caused by lack of sufficient checks in l2tp_tunnel_create() RAW sockets can not be considered as UDP ones for instance. In another patch, we shall replace all pr_err() by less intrusive pr_debug() so that syzkaller can find other bugs faster. Acked-by: Guillaume Nault Acked-by: James Chapman ================================================================== BUG: KASAN: slab-out-of-bounds in setup_udp_tunnel_sock+0x3ee/0x5f0 net/ipv4/udp_tunnel.c:69 dst_release: dst:00000000d53d0d0f refcnt:-1 Write of size 1 at addr ffff8801d013b798 by task syz-executor3/6242 CPU: 1 PID: 6242 Comm: syz-executor3 Not tainted 4.16.0-rc2+ #253 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23b/0x360 mm/kasan/report.c:412 __asan_report_store1_noabort+0x17/0x20 mm/kasan/report.c:435 setup_udp_tunnel_sock+0x3ee/0x5f0 net/ipv4/udp_tunnel.c:69 l2tp_tunnel_create+0x1354/0x17f0 net/l2tp/l2tp_core.c:1596 pppol2tp_connect+0x14b1/0x1dd0 net/l2tp/l2tp_ppp.c:707 SYSC_connect+0x213/0x4a0 net/socket.c:1640 SyS_connect+0x24/0x30 net/socket.c:1621 do_syscall_64+0x280/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 83421c6f0bef..e22512e32827 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1457,9 +1457,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 encap = cfg->encap; /* Quick sanity checks */ + err = -EPROTONOSUPPORT; + if (sk->sk_type != SOCK_DGRAM) { + pr_debug("tunl %hu: fd %d wrong socket type\n", + tunnel_id, fd); + goto err; + } switch (encap) { case L2TP_ENCAPTYPE_UDP: - err = -EPROTONOSUPPORT; if (sk->sk_protocol != IPPROTO_UDP) { pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); @@ -1467,7 +1472,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } break; case L2TP_ENCAPTYPE_IP: - err = -EPROTONOSUPPORT; if (sk->sk_protocol != IPPROTO_L2TP) { pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP); -- cgit From 67f93df79aeefc3add4e4b31a752600f834236e2 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 6 Mar 2018 22:57:01 +0300 Subject: dccp: check sk for closed state in dccp_sendmsg() dccp_disconnect() sets 'dp->dccps_hc_tx_ccid' tx handler to NULL, therefore if DCCP socket is disconnected and dccp_sendmsg() is called after it, it will cause a NULL pointer dereference in dccp_write_xmit(). This crash and the reproducer was reported by syzbot. Looks like it is reproduced if commit 69c64866ce07 ("dccp: CVE-2017-8824: use-after-free in DCCP code") is applied. Reported-by: syzbot+f99ab3887ab65d70f816@syzkaller.appspotmail.com Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/dccp/proto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 15bdc002d90c..84cd4e3fd01b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (skb == NULL) goto out_release; + if (sk->sk_state == DCCP_CLOSED) { + rc = -ENOTCONN; + goto out_discard; + } + skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc != 0) -- cgit From e05836ac07c77dd90377f8c8140bce2a44af5fe7 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Tue, 6 Mar 2018 17:15:12 -0500 Subject: tcp: purge write queue upon aborting the connection When the connection is aborted, there is no point in keeping the packets on the write queue until the connection is closed. Similar to a27fd7a8ed38 ('tcp: purge write queue upon RST'), this is essential for a correct MSG_ZEROCOPY implementation, because userspace cannot call close(fd) before receiving zerocopy signals even when the connection is aborted. Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_timer.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48636aee23c3..8b8059b7af4d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3566,6 +3566,7 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); + tcp_write_queue_purge(sk); release_sock(sk); return 0; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 71fc60f1b326..f7d944855f8e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; sk->sk_error_report(sk); + tcp_write_queue_purge(sk); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } -- cgit From b51f26b14683838825170387457176c1ffaea9f5 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 6 Mar 2018 17:27:44 -0500 Subject: net: don't unnecessarily load kernel modules in dev_ioctl() Starting with v4.16-rc1 we've been seeing a higher than usual number of requests for the kernel to load networking modules, even on events which shouldn't trigger a module load (e.g. ioctl(TCGETS)). Stephen Smalley suggested the problem may lie in commit 44c02a2c3dc5 ("dev_ioctl(): move copyin/copyout to callers") which moves changes the network dev_ioctl() function to always call dev_load(), regardless of the requested ioctl. This patch moves the dev_load() calls back into the individual ioctls while preserving the rest of the original patch. Reported-by: Dominick Grift Suggested-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 0ab1af04296c..a04e1e88bf3a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c if (colon) *colon = 0; - dev_load(net, ifr->ifr_name); - /* * See which interface the caller is talking about. */ @@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: + dev_load(net, ifr->ifr_name); rcu_read_lock(); ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); @@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c return ret; case SIOCETHTOOL: + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ethtool(net, ifr); rtnl_unlock(); @@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: + dev_load(net, ifr->ifr_name); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; rtnl_lock(); @@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); @@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); -- cgit From 016764de8b0d17e946832d7b6530434daa82df0e Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 7 Mar 2018 13:08:45 +0530 Subject: cxgb4: copy adap index to PF0-3 adapter instances instantiation of VF's on different adapters fails, copy adapter index and chip type to PF0-3 adapter instances to fix the issue. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 7b452e85de2a..33bc84185b82 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5181,6 +5181,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->name = pci_name(pdev); adapter->mbox = func; adapter->pf = func; + adapter->params.chip = chip; + adapter->adap_idx = adap_idx; adapter->msg_enable = DFLT_MSG_ENABLE; adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + (sizeof(struct mbox_cmd) * -- cgit From b06ef18a4c255609388ed6e068a1c69c797545e0 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 7 Mar 2018 13:10:24 +0530 Subject: cxgb4: do not set needs_free_netdev for mgmt dev's Do not set 'needs_free_netdev' as we do call free_netdev for mgmt net devices, doing both hits BUG_ON. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 33bc84185b82..61022b5f6743 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4970,7 +4970,6 @@ static void cxgb4_mgmt_setup(struct net_device *dev) /* Initialize the device structure. */ dev->netdev_ops = &cxgb4_mgmt_netdev_ops; dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; - dev->needs_free_netdev = true; } static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) -- cgit From 6007b080d2e2adb7af22bf29165f0594ea12b34c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 7 Mar 2018 22:10:01 +0100 Subject: bpf, x64: increase number of passes In Cilium some of the main programs we run today are hitting 9 passes on x64's JIT compiler, and we've had cases already where we surpassed the limit where the JIT then punts the program to the interpreter instead, leading to insertion failures due to CONFIG_BPF_JIT_ALWAYS_ON or insertion failures due to the prog array owner being JITed but the program to insert not (both must have the same JITed/non-JITed property). One concrete case the program image shrunk from 12,767 bytes down to 10,288 bytes where the image converged after 16 steps. I've measured that this took 340us in the JIT until it converges on my i7-6600U. Thus, increase the original limit we had from day one where the JIT covered cBPF only back then before we run into the case (as similar with the complexity limit) where we trip over this and hit program rejections. Also add a cond_resched() into the compilation loop, the JIT process runs without any locks and may sleep anyway. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 45e4eb5bcbb2..ce5b2ebd5701 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1188,7 +1188,7 @@ skip_init_addrs: * may converge on the last pass. In such case do one more * pass to emit the final image */ - for (pass = 0; pass < 10 || image; pass++) { + for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; @@ -1215,6 +1215,7 @@ skip_init_addrs: } } oldproglen = proglen; + cond_resched(); } if (bpf_jit_enable > 1) -- cgit From cbcc607e18422555db569b593608aec26111cb0b Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 8 Mar 2018 12:42:10 +0200 Subject: team: Fix double free in error path The __send_and_alloc_skb() receives a skb ptr as a parameter but in case it fails the skb is not valid: - Send failed and released the skb internally. - Allocation failed. The current code tries to release the skb in case of failure which causes redundant freeing. Fixes: 9b00cf2d1024 ("team: implement multipart netlink messages for options transfers") Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a468439969df..56c701b73c12 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2395,7 +2395,7 @@ send_done: if (!nlh) { err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) - goto errout; + return err; goto send_done; } @@ -2681,7 +2681,7 @@ send_done: if (!nlh) { err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) - goto errout; + return err; goto send_done; } -- cgit From 3d07e0746fbbbe107e611d35719c009742e21b94 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 8 Mar 2018 23:34:35 +1100 Subject: docs: segmentation-offloads.txt: Correct TCP gso_types Pretty minor: just SKB_GSO_TCP -> SKB_GSO_TCPV4 and SKB_GSO_TCP6 -> SKB_GSO_TCPV6. Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index 23a8dd91a9ec..fc0c949e7f9c 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -20,8 +20,8 @@ TCP Segmentation Offload TCP segmentation allows a device to segment a single frame into multiple frames with a data payload size specified in skb_shinfo()->gso_size. -When TCP segmentation requested the bit for either SKB_GSO_TCP or -SKB_GSO_TCP6 should be set in skb_shinfo()->gso_type and +When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or +SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and skb_shinfo()->gso_size should be set to a non-zero value. TCP segmentation is dependent on support for the use of partial checksum -- cgit From de3d50aadd40bf68614db9fd157b275ce9c2d467 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:09 -0800 Subject: hv_netvsc: fix filter flags The recent change to not always enable all multicast and broadcast was broken; meant to set filter, not change flags. Fixes: 009f766ca238 ("hv_netvsc: filter multicast/broadcast") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 8927c483c217..411a3aee39b2 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -861,9 +861,9 @@ static void rndis_set_multicast(struct work_struct *w) filter = NDIS_PACKET_TYPE_PROMISCUOUS; } else { if (flags & IFF_ALLMULTI) - flags |= NDIS_PACKET_TYPE_ALL_MULTICAST; + filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; if (flags & IFF_BROADCAST) - flags |= NDIS_PACKET_TYPE_BROADCAST; + filter |= NDIS_PACKET_TYPE_BROADCAST; } rndis_filter_set_packet_filter(rdev, filter); -- cgit From 7eeb4a6ee4820c4e84895d252079a797f27fc80d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:10 -0800 Subject: hv_netvsc: avoid repeated updates of packet filter The netvsc driver can get repeated calls to netvsc_rx_mode during network setup; each of these calls ends up scheduling the lower layers to update tha packet filter. This update requires an request/response to the host. So avoid doing this if we already know that the correct packet filter value is set. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/rndis_filter.c | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 0db3bd1ea06f..cd538d5a7986 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -173,6 +173,7 @@ struct rndis_device { struct list_head req_list; struct work_struct mcast_work; + u32 filter; bool link_state; /* 0 - link up, 1 - link down */ diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 411a3aee39b2..00ec80c23fe5 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -825,13 +825,15 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev, struct rndis_set_request *set; int ret; + if (dev->filter == new_filter) + return 0; + request = get_rndis_request(dev, RNDIS_MSG_SET, RNDIS_MESSAGE_SIZE(struct rndis_set_request) + sizeof(u32)); if (!request) return -ENOMEM; - /* Setup the rndis set */ set = &request->request_msg.msg.set_req; set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; @@ -842,8 +844,10 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev, &new_filter, sizeof(u32)); ret = rndis_filter_send_request(dev, request); - if (ret == 0) + if (ret == 0) { wait_for_completion(&request->wait_event); + dev->filter = new_filter; + } put_rndis_request(dev, request); -- cgit From 35a57b7fef136fa3d5b735ba773f191b95110fa0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:11 -0800 Subject: hv_netvsc: fix locking for rx_mode The rx_mode operation handler is different than other callbacks in that is not always called with rtnl held. Therefore use RCU to ensure that references are valid. Fixes: bee9d41b37ea ("hv_netvsc: propagate rx filters to VF") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index cdb78eefab67..48d9fa7a66c2 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -89,15 +89,20 @@ static void netvsc_change_rx_flags(struct net_device *net, int change) static void netvsc_set_rx_mode(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); - struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); + struct net_device *vf_netdev; + struct netvsc_device *nvdev; + rcu_read_lock(); + vf_netdev = rcu_dereference(ndev_ctx->vf_netdev); if (vf_netdev) { dev_uc_sync(vf_netdev, net); dev_mc_sync(vf_netdev, net); } - rndis_filter_update(nvdev); + nvdev = rcu_dereference(ndev_ctx->nvdev); + if (nvdev) + rndis_filter_update(nvdev); + rcu_read_unlock(); } static int netvsc_open(struct net_device *net) -- cgit From b0dee7910317f41f398838992516af6a3b981d86 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Mar 2018 13:49:12 -0800 Subject: hv_netvsc: fix locking during VF setup The dev_uc/mc_sync calls need to have the device address list locked. This was spotted by running with lockdep enabled. Fixes: bee9d41b37ea ("hv_netvsc: propagate rx filters to VF") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 48d9fa7a66c2..faea0be18924 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1851,8 +1851,12 @@ static void __netvsc_vf_setup(struct net_device *ndev, /* set multicast etc flags on VF */ dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE); + + /* sync address list from ndev to VF */ + netif_addr_lock_bh(ndev); dev_uc_sync(vf_netdev, ndev); dev_mc_sync(vf_netdev, ndev); + netif_addr_unlock_bh(ndev); if (netif_running(ndev)) { ret = dev_open(vf_netdev); -- cgit From 9f62c15f28b0d1d746734666d88a79f08ba1e43e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 8 Mar 2018 17:00:02 +0100 Subject: ipv6: fix access to non-linear packet in ndisc_fill_redirect_hdr_option() Fix the following slab-out-of-bounds kasan report in ndisc_fill_redirect_hdr_option when the incoming ipv6 packet is not linear and the accessed data are not in the linear data region of orig_skb. [ 1503.122508] ================================================================== [ 1503.122832] BUG: KASAN: slab-out-of-bounds in ndisc_send_redirect+0x94e/0x990 [ 1503.123036] Read of size 1184 at addr ffff8800298ab6b0 by task netperf/1932 [ 1503.123220] CPU: 0 PID: 1932 Comm: netperf Not tainted 4.16.0-rc2+ #124 [ 1503.123347] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-2.fc27 04/01/2014 [ 1503.123527] Call Trace: [ 1503.123579] [ 1503.123638] print_address_description+0x6e/0x280 [ 1503.123849] kasan_report+0x233/0x350 [ 1503.123946] memcpy+0x1f/0x50 [ 1503.124037] ndisc_send_redirect+0x94e/0x990 [ 1503.125150] ip6_forward+0x1242/0x13b0 [...] [ 1503.153890] Allocated by task 1932: [ 1503.153982] kasan_kmalloc+0x9f/0xd0 [ 1503.154074] __kmalloc_track_caller+0xb5/0x160 [ 1503.154198] __kmalloc_reserve.isra.41+0x24/0x70 [ 1503.154324] __alloc_skb+0x130/0x3e0 [ 1503.154415] sctp_packet_transmit+0x21a/0x1810 [ 1503.154533] sctp_outq_flush+0xc14/0x1db0 [ 1503.154624] sctp_do_sm+0x34e/0x2740 [ 1503.154715] sctp_primitive_SEND+0x57/0x70 [ 1503.154807] sctp_sendmsg+0xaa6/0x1b10 [ 1503.154897] sock_sendmsg+0x68/0x80 [ 1503.154987] ___sys_sendmsg+0x431/0x4b0 [ 1503.155078] __sys_sendmsg+0xa4/0x130 [ 1503.155168] do_syscall_64+0x171/0x3f0 [ 1503.155259] entry_SYSCALL_64_after_hwframe+0x42/0xb7 [ 1503.155436] Freed by task 1932: [ 1503.155527] __kasan_slab_free+0x134/0x180 [ 1503.155618] kfree+0xbc/0x180 [ 1503.155709] skb_release_data+0x27f/0x2c0 [ 1503.155800] consume_skb+0x94/0xe0 [ 1503.155889] sctp_chunk_put+0x1aa/0x1f0 [ 1503.155979] sctp_inq_pop+0x2f8/0x6e0 [ 1503.156070] sctp_assoc_bh_rcv+0x6a/0x230 [ 1503.156164] sctp_inq_push+0x117/0x150 [ 1503.156255] sctp_backlog_rcv+0xdf/0x4a0 [ 1503.156346] __release_sock+0x142/0x250 [ 1503.156436] release_sock+0x80/0x180 [ 1503.156526] sctp_sendmsg+0xbb0/0x1b10 [ 1503.156617] sock_sendmsg+0x68/0x80 [ 1503.156708] ___sys_sendmsg+0x431/0x4b0 [ 1503.156799] __sys_sendmsg+0xa4/0x130 [ 1503.156889] do_syscall_64+0x171/0x3f0 [ 1503.156980] entry_SYSCALL_64_after_hwframe+0x42/0xb7 [ 1503.157158] The buggy address belongs to the object at ffff8800298ab600 which belongs to the cache kmalloc-1024 of size 1024 [ 1503.157444] The buggy address is located 176 bytes inside of 1024-byte region [ffff8800298ab600, ffff8800298aba00) [ 1503.157702] The buggy address belongs to the page: [ 1503.157820] page:ffffea0000a62a00 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0 [ 1503.158053] flags: 0x4000000000008100(slab|head) [ 1503.158171] raw: 4000000000008100 0000000000000000 0000000000000000 00000001800e000e [ 1503.158350] raw: dead000000000100 dead000000000200 ffff880036002600 0000000000000000 [ 1503.158523] page dumped because: kasan: bad access detected [ 1503.158698] Memory state around the buggy address: [ 1503.158816] ffff8800298ab900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1503.158988] ffff8800298ab980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1503.159165] >ffff8800298aba00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 1503.159338] ^ [ 1503.159436] ffff8800298aba80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1503.159610] ffff8800298abb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1503.159785] ================================================================== [ 1503.159964] Disabling lock debugging due to kernel taint The test scenario to trigger the issue consists of 4 devices: - H0: data sender, connected to LAN0 - H1: data receiver, connected to LAN1 - GW0 and GW1: routers between LAN0 and LAN1. Both of them have an ethernet connection on LAN0 and LAN1 On H{0,1} set GW0 as default gateway while on GW0 set GW1 as next hop for data from LAN0 to LAN1. Moreover create an ip6ip6 tunnel between H0 and H1 and send 3 concurrent data streams (TCP/UDP/SCTP) from H0 to H1 through ip6ip6 tunnel (send buffer size is set to 16K). While data streams are active flush the route cache on HA multiple times. I have not been able to identify a given commit that introduced the issue since, using the reproducer described above, the kasan report has been triggered from 4.14 and I have not gone back further. Reported-by: Jianlin Shi Reviewed-by: Stefano Brivio Reviewed-by: Eric Dumazet Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f61a5b613b52..ba5e04c6ae17 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, *(opt++) = (rd_len >> 3); opt += 6; - memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8); + skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt, + rd_len - 8); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) -- cgit From ca0edb131bdf1e6beaeb2b8289fd6b374b74147d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2018 08:51:03 -0800 Subject: ieee802154: 6lowpan: fix possible NULL deref in lowpan_device_event() A tun device type can trivially be set to arbitrary value using TUNSETLINK ioctl(). Therefore, lowpan_device_event() must really check that ieee802154_ptr is not NULL. Fixes: 2c88b5283f60d ("ieee802154: 6lowpan: remove check on null") Signed-off-by: Eric Dumazet Cc: Alexander Aring Cc: Stefan Schmidt Reported-by: syzbot Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- net/ieee802154/6lowpan/core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 974765b7d92a..e9f0489e4229 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -206,9 +206,13 @@ static inline void lowpan_netlink_fini(void) static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *wdev = netdev_notifier_info_to_dev(ptr); + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct wpan_dev *wpan_dev; - if (wdev->type != ARPHRD_IEEE802154) + if (ndev->type != ARPHRD_IEEE802154) + return NOTIFY_DONE; + wpan_dev = ndev->ieee802154_ptr; + if (!wpan_dev) return NOTIFY_DONE; switch (event) { @@ -217,8 +221,8 @@ static int lowpan_device_event(struct notifier_block *unused, * also delete possible lowpan interfaces which belongs * to the wpan interface. */ - if (wdev->ieee802154_ptr->lowpan_dev) - lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL); + if (wpan_dev->lowpan_dev) + lowpan_dellink(wpan_dev->lowpan_dev, NULL); break; default: return NOTIFY_DONE; -- cgit From 1dd27cde30e85774c77349c804229431616d594a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 9 Mar 2018 14:06:09 +1100 Subject: net: use skb_is_gso_sctp() instead of open-coding As well as the basic conversion, I noticed that a lot of the SCTP code checks gso_type without first checking skb_is_gso() so I have added that where appropriate. Also, document the helper. Cc: Daniel Borkmann Cc: Marcelo Ricardo Leitner Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 5 ++++- net/core/skbuff.c | 2 +- net/sched/act_csum.c | 2 +- net/sctp/input.c | 8 ++++---- net/sctp/inqueue.c | 2 +- net/sctp/offload.c | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index fc0c949e7f9c..aca542ec125c 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -155,7 +155,10 @@ Therefore, any code in the core networking stack must be aware of the possibility that gso_size will be GSO_BY_FRAGS and handle that case appropriately. -There are a couple of helpers to make this easier: +There are some helpers to make this easier: + + - skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if + an skb is an SCTP GSO skb. - For size checks, the skb_gso_validate_*_len family of helpers correctly considers GSO_BY_FRAGS. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0bb0d8877954..baf990528943 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4904,7 +4904,7 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) thlen += inner_tcp_hdrlen(skb); } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { thlen = tcp_hdrlen(skb); - } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) { + } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); } /* UFO sets gso_size to the size of the fragmentation diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b7ba9b06b147..24b2e8e681cf 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, { struct sctphdr *sctph; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) + if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return 1; sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph)); diff --git a/net/sctp/input.c b/net/sctp/input.c index 0247cc432e02..b381d78548ac 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb) int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); + bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && - skb_linearize(skb)) || + if ((!is_gso && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; @@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb_csum_unnecessary(skb)) __skb_decr_checksum_unnecessary(skb); else if (!sctp_checksum_disable && - !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && + !is_gso && sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb->csum_valid = 1; @@ -1218,7 +1218,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * issue as packets hitting this are mostly INIT or INIT-ACK and * those cannot be on GSO-style anyway. */ - if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) + if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return NULL; ch = (struct sctp_chunkhdr *)skb->data; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 48392552ee7c..23ebc5318edc 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -170,7 +170,7 @@ next_chunk: chunk = list_entry(entry, struct sctp_chunk, list); - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { + if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) { /* GSO-marked skbs but without frags, handle * them normally */ diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 35bc7106d182..123e9f2dc226 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + if (!skb_is_gso_sctp(skb)) goto out; sh = sctp_hdr(skb); -- cgit From d06cbe9cbb8905df21b11d1cf789c9b2947688e9 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Fri, 9 Mar 2018 15:27:20 +0900 Subject: net: ethernet: ave: enable Rx drop interrupt This enables AVE_GI_RXDROP interrupt factor. This factor indicates depletion of Rx descriptors and the handler counts the number of dropped packets. Signed-off-by: Kunihiko Hayashi Signed-off-by: David S. Miller --- drivers/net/ethernet/socionext/sni_ave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 111e7ca9df56..f5c5984afefb 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1295,7 +1295,7 @@ static int ave_open(struct net_device *ndev) val |= AVE_IIRQC_EN0 | (AVE_INTM_COUNT << 16); writel(val, priv->base + AVE_IIRQC); - val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX; + val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX | AVE_GI_RXDROP; ave_irq_restore(ndev, val); napi_enable(&priv->napi_rx); -- cgit From ab7e34b3431c5d29817c503ad4d3bf2732f92ad3 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 9 Mar 2018 14:50:32 +0800 Subject: vhost_net: initialize rx_ring in vhost_net_open() KMSAN reported a use of uninit memory in vhost_net_buf_unproduce() while trying to access n->vqs[VHOST_NET_VQ_TX].rx_ring: ================================================================== BUG: KMSAN: use of uninitialized memory in vhost_net_buf_unproduce+0x7bb/0x9a0 drivers/vho et.c:170 CPU: 0 PID: 3021 Comm: syz-fuzzer Not tainted 4.16.0-rc4+ #3853 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x1f0 mm/kmsan/kmsan.c:1093 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676 vhost_net_buf_unproduce+0x7bb/0x9a0 drivers/vhost/net.c:170 vhost_net_stop_vq drivers/vhost/net.c:974 [inline] vhost_net_stop+0x146/0x380 drivers/vhost/net.c:982 vhost_net_release+0xb1/0x4f0 drivers/vhost/net.c:1015 __fput+0x49f/0xa00 fs/file_table.c:209 ____fput+0x37/0x40 fs/file_table.c:243 task_work_run+0x243/0x2c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:191 [inline] exit_to_usermode_loop arch/x86/entry/common.c:166 [inline] prepare_exit_to_usermode+0x349/0x3b0 arch/x86/entry/common.c:196 syscall_return_slowpath+0xf3/0x6d0 arch/x86/entry/common.c:265 do_syscall_64+0x34d/0x450 arch/x86/entry/common.c:292 ... origin: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:303 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:213 kmsan_kmalloc_large+0x6f/0xd0 mm/kmsan/kmsan.c:392 kmalloc_large_node_hook mm/slub.c:1366 [inline] kmalloc_large_node mm/slub.c:3808 [inline] __kmalloc_node+0x100e/0x1290 mm/slub.c:3818 kmalloc_node include/linux/slab.h:554 [inline] kvmalloc_node+0x1a5/0x2e0 mm/util.c:419 kvmalloc include/linux/mm.h:541 [inline] vhost_net_open+0x64/0x5f0 drivers/vhost/net.c:921 misc_open+0x7b5/0x8b0 drivers/char/misc.c:154 chrdev_open+0xc28/0xd90 fs/char_dev.c:417 do_dentry_open+0xccb/0x1430 fs/open.c:752 vfs_open+0x272/0x2e0 fs/open.c:866 do_last fs/namei.c:3378 [inline] path_openat+0x49ad/0x6580 fs/namei.c:3519 do_filp_open+0x267/0x640 fs/namei.c:3553 do_sys_open+0x6ad/0x9c0 fs/open.c:1059 SYSC_openat+0xc7/0xe0 fs/open.c:1086 SyS_openat+0x63/0x90 fs/open.c:1080 do_syscall_64+0x2f1/0x450 arch/x86/entry/common.c:287 ================================================================== Fixes: c67df11f6e480 ("vhost_net: try batch dequing from skb array") Signed-off-by: Alexander Potapenko Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 610cba276d47..60f1080bffc7 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -948,6 +948,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].done_idx = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; + n->vqs[i].rx_ring = NULL; vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); -- cgit From 303fd71b37fb710b26f5ff5444029d62cfd627bd Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Mar 2018 14:50:33 +0800 Subject: vhost_net: keep private_data and rx_ring synced We get pointer ring from the exported sock, this means we should keep rx_ring and vq->private synced during both vq stop and backend set, otherwise we may see stale rx_ring. Fixes: c67df11f6e480 ("vhost_net: try batch dequing from skb array") Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 60f1080bffc7..efb93063fda1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -973,6 +973,7 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, vhost_net_disable_vq(n, vq); vq->private_data = NULL; vhost_net_buf_unproduce(nvq); + nvq->rx_ring = NULL; mutex_unlock(&vq->mutex); return sock; } @@ -1162,14 +1163,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; vhost_net_buf_unproduce(nvq); - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); if (r) goto err_used; + if (index == VHOST_NET_VQ_RX) + nvq->rx_ring = get_tap_ptr_ring(fd); oldubufs = nvq->ubufs; nvq->ubufs = ubufs; -- cgit From 3a4030761ea88ff439030ca98e3094b9900e96b7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Mar 2018 14:50:34 +0800 Subject: vhost_net: examine pointer types during un-producing After commit fc72d1d54dd9 ("tuntap: XDP transmission"), we can actually queueing XDP pointers in the pointer ring, so we should examine the pointer type before freeing the pointer. Fixes: fc72d1d54dd9 ("tuntap: XDP transmission") Reported-by: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 ++- drivers/vhost/net.c | 2 +- include/linux/if_tun.h | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7433bb2e4451..28cfa642e39a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -655,7 +655,7 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) return tun; } -static void tun_ptr_free(void *ptr) +void tun_ptr_free(void *ptr) { if (!ptr) return; @@ -667,6 +667,7 @@ static void tun_ptr_free(void *ptr) __skb_array_destroy_skb(ptr); } } +EXPORT_SYMBOL_GPL(tun_ptr_free); static void tun_queue_purge(struct tun_file *tfile) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index efb93063fda1..8139bc70ad7d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -170,7 +170,7 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, vhost_net_buf_get_size(rxq), - __skb_array_destroy_skb); + tun_ptr_free); rxq->head = rxq->tail = 0; } } diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index c5b0a75a7812..fd00170b494f 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -25,6 +25,7 @@ struct ptr_ring *tun_get_tx_ring(struct file *file); bool tun_is_xdp_buff(void *ptr); void *tun_xdp_to_ptr(void *ptr); void *tun_ptr_to_xdp(void *ptr); +void tun_ptr_free(void *ptr); #else #include #include @@ -50,5 +51,8 @@ static inline void *tun_ptr_to_xdp(void *ptr) { return NULL; } +static inline void tun_ptr_free(void *ptr) +{ +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ -- cgit From 49bae2f3093b0a7bc5e1a158d89697a73cdb0243 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 9 Mar 2018 15:33:52 +0200 Subject: mlxsw: spectrum: Fix gact_ok offloading For ok GACT action, TERMINATE binding_cmd should be used in action set passed down to HW. Fixes: b2925957ec1a9 ("mlxsw: spectrum_flower: Offload "ok" termination action") Signed-off-by: Jiri Pirko Reported-by: Alexander Petrovskiy Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 2 +- 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index b698fb481b2e..996dc099cd58 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -443,6 +443,17 @@ int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) } EXPORT_SYMBOL(mlxsw_afa_block_jump); +int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block) +{ + if (block->finished) + return -EINVAL; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0); + block->finished = true; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_terminate); + static struct mlxsw_afa_fwd_entry * mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 43132293475c..b91f2b0829b0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -65,6 +65,7 @@ char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4ec1ca3c96c8..386861773476 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -553,6 +553,7 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, u16 group_id); +int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 0897a5435cc2..92d90ed7207e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -528,6 +528,11 @@ int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, return mlxsw_afa_block_jump(rulei->act_block, group_id); } +int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_terminate(rulei->act_block); +} + int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) { return mlxsw_afa_block_append_drop(rulei->act_block); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 6ce00e28d4ea..89dbf569dff5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -65,7 +65,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { if (is_tcf_gact_ok(a)) { - err = mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) return err; } else if (is_tcf_gact_shot(a)) { -- cgit From 663f1b26f9c129aa2912c1a1d3359e3ecd88e814 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Mar 2018 15:33:53 +0200 Subject: mlxsw: spectrum: Prevent duplicate mirrors The Spectrum ASIC doesn't support mirroring more than once from a single binding point (which is a port-direction pair). Therefore detect that a second binding of a given binding point is attempted. To that end, extend struct mlxsw_sp_span_inspected_port to track whether a given binding point is bound or not. Extend mlxsw_sp_span_entry_port_find() to look for ports based on the full unique key: port number, direction, and boundness. Besides fixing the overt bug where configured mirrors are not offloaded, this also fixes a more subtle bug: mlxsw_sp_span_inspected_port_del() just defers to mlxsw_sp_span_entry_bound_port_find(), and that used to find the first port with the right number (disregarding the type). Thus by adding and removing egress and ingress mirrors in the right order, one could trick the system into believing it has no egress mirrors when in fact it did have some. That then caused that mlxsw_sp_span_port_mtu_update() didn't update mirroring buffer when MTU was changed. Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 28 ++++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c7e941aecc2a..bf400c75fcc8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -655,13 +655,17 @@ static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) } static struct mlxsw_sp_span_inspected_port * -mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry) +mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + struct mlxsw_sp_port *port, + bool bind) { struct mlxsw_sp_span_inspected_port *p; list_for_each_entry(p, &span_entry->bound_ports_list, list) - if (port->local_port == p->local_port) + if (type == p->type && + port->local_port == p->local_port && + bind == p->bound) return p; return NULL; } @@ -691,8 +695,22 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, struct mlxsw_sp_span_inspected_port *inspected_port; struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; char sbib_pl[MLXSW_REG_SBIB_LEN]; + int i; int err; + /* A given (source port, direction) can only be bound to one analyzer, + * so if a binding is requested, check for conflicts. + */ + if (bind) + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = + &mlxsw_sp->span.entries[i]; + + if (mlxsw_sp_span_entry_bound_port_find(curr, type, + port, bind)) + return -EEXIST; + } + /* if it is an egress SPAN, bind a shared buffer to it */ if (type == MLXSW_SP_SPAN_EGRESS) { u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, @@ -720,6 +738,7 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, } inspected_port->local_port = port->local_port; inspected_port->type = type; + inspected_port->bound = bind; list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); return 0; @@ -746,7 +765,8 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; char sbib_pl[MLXSW_REG_SBIB_LEN]; - inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); + inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type, + port, bind); if (!inspected_port) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 386861773476..92064db2ae44 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -120,6 +120,9 @@ struct mlxsw_sp_span_inspected_port { struct list_head list; enum mlxsw_sp_span_type type; u8 local_port; + + /* Whether this is a directly bound mirror (port-to-port) or an ACL. */ + bool bound; }; struct mlxsw_sp_span_entry { -- cgit From 3b04caab81649a9e8d5375b919b6653d791951df Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 9 Mar 2018 07:34:40 -0800 Subject: ip6gre: add erspan v2 to tunnel lookup The patch adds the erspan v2 proto in ip6gre_tunnel_lookup so the erspan v2 tunnel can be found correctly. Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c353125546d..83c2fffd40be 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, struct ip6_tnl *t, *cand = NULL; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB) || - gre_proto == htons(ETH_P_ERSPAN)) ? + gre_proto == htons(ETH_P_ERSPAN) || + gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; -- cgit From d6aa71197ffcb68850bfebfc3fc160abe41df53b Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 9 Mar 2018 07:34:41 -0800 Subject: ip6erspan: improve error handling for erspan version number. When users fill in incorrect erspan version number through the struct erspan_metadata uapi, current code skips pushing the erspan header but continue pushing the gre header, which is incorrect. The patch fixes it by returning error. Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 83c2fffd40be..a299f5424e16 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -945,6 +945,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, md->u.md2.dir, get_hwid(&md->u.md2), truncate, false); + } else { + goto tx_err; } } else { switch (skb->protocol) { -- cgit From e41c7c68ea771683cae5a7f81c268f38d7912ecb Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 9 Mar 2018 07:34:42 -0800 Subject: ip6erspan: make sure enough headroom at xmit. The patch adds skb_cow_header() to ensure enough headroom at ip6erspan_tunnel_xmit before pushing the erspan header to the skb. Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a299f5424e16..1bbd0930063e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -903,6 +903,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } + if (skb_cow_head(skb, dev->needed_headroom)) + goto tx_err; + t->parms.o_flags &= ~TUNNEL_KEY; IPCB(skb)->flags = 0; -- cgit From 932909d9b28d27e807ff8eecb68c7748f6701628 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 8 Mar 2018 12:54:19 +0100 Subject: netfilter: ebtables: fix erroneous reject of last rule The last rule in the blob has next_entry offset that is same as total size. This made "ebtables32 -A OUTPUT -d de:ad:be:ef:01:02" fail on 64 bit kernel. Fixes: b71812168571fa ("netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 254ef9f49567..a94d23b0a9af 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2119,8 +2119,12 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, * offsets are relative to beginning of struct ebt_entry (i.e., 0). */ for (i = 0; i < 4 ; ++i) { - if (offsets[i] >= *total) + if (offsets[i] > *total) return -EINVAL; + + if (i < 3 && offsets[i] == *total) + return -EINVAL; + if (i == 0) continue; if (offsets[i-1] > offsets[i]) -- cgit From b1d0a5d0cba4597c0394997b2d5fced3e3841b4e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 10 Mar 2018 01:15:45 +0100 Subject: netfilter: x_tables: add and use xt_check_proc_name recent and hashlimit both create /proc files, but only check that name is 0 terminated. This can trigger WARN() from procfs when name is "" or "/". Add helper for this and then use it for both. Cc: Eric Dumazet Reported-by: Eric Dumazet Reported-by: Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 ++ net/netfilter/x_tables.c | 30 ++++++++++++++++++++++++++++++ net/netfilter/xt_hashlimit.c | 16 ++++++++++------ net/netfilter/xt_recent.c | 6 +++--- 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1313b35c3ab7..14529511c4b8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -285,6 +285,8 @@ unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); +int xt_check_proc_name(const char *name, unsigned int size); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fa1655aff8d3..4aa01c90e9d1 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -423,6 +423,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto) return buf; } +/** + * xt_check_proc_name - check that name is suitable for /proc file creation + * + * @name: file name candidate + * @size: length of buffer + * + * some x_tables modules wish to create a file in /proc. + * This function makes sure that the name is suitable for this + * purpose, it checks that name is NUL terminated and isn't a 'special' + * name, like "..". + * + * returns negative number on error or 0 if name is useable. + */ +int xt_check_proc_name(const char *name, unsigned int size) +{ + if (name[0] == '\0') + return -EINVAL; + + if (strnlen(name, size) == size) + return -ENAMETOOLONG; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + strchr(name, '/')) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_check_proc_name); + int xt_check_match(struct xt_mtchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 66f5aca62a08..3360f13dc208 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -917,8 +917,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) struct hashlimit_cfg3 cfg = {}; int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); @@ -935,8 +936,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) struct hashlimit_cfg3 cfg = {}; int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); @@ -950,9 +952,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) static int hashlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_hashlimit_mtinfo3 *info = par->matchinfo; + int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, info->name, 3); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 6d232d18faff..81ee1d6543b2 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -361,9 +361,9 @@ static int recent_mt_check(const struct xt_mtchk_param *par, info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } - if (info->name[0] == '\0' || - strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; -- cgit From c8d70a700a5b486bfa8e5a7d33d805389f6e59f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Mar 2018 14:27:31 +0100 Subject: netfilter: bridge: ebt_among: add more missing match size checks ebt_among is special, it has a dynamic match size and is exempt from the central size checks. commit c4585a2823edf ("bridge: ebt_among: add missing match size checks") added validation for pool size, but missed fact that the macros ebt_among_wh_src/dst can already return out-of-bound result because they do not check value of wh_src/dst_ofs (an offset) vs. the size of the match that userspace gave to us. v2: check that offset has correct alignment. Paolo Abeni points out that we should also check that src/dst wormhash arrays do not overlap, and src + length lines up with start of dst (or vice versa). v3: compact wormhash_sizes_valid() part NB: Fixes tag is intentionally wrong, this bug exists from day one when match was added for 2.6 kernel. Tag is there so stable maintainers will notice this one too. Tested with same rules from the earlier patch. Fixes: c4585a2823edf ("bridge: ebt_among: add missing match size checks") Reported-by: Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_among.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index c5afb4232ecb..620e54f08296 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -177,6 +177,28 @@ static bool poolsize_invalid(const struct ebt_mac_wormhash *w) return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple)); } +static bool wormhash_offset_invalid(int off, unsigned int len) +{ + if (off == 0) /* not present */ + return false; + + if (off < (int)sizeof(struct ebt_among_info) || + off % __alignof__(struct ebt_mac_wormhash)) + return true; + + off += sizeof(struct ebt_mac_wormhash); + + return off > len; +} + +static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b) +{ + if (a == 0) + a = sizeof(struct ebt_among_info); + + return ebt_mac_wormhash_size(wh) + a == b; +} + static int ebt_among_mt_check(const struct xt_mtchk_param *par) { const struct ebt_among_info *info = par->matchinfo; @@ -189,6 +211,10 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) if (expected_length > em->match_size) return -EINVAL; + if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) || + wormhash_offset_invalid(info->wh_src_ofs, em->match_size)) + return -EINVAL; + wh_dst = ebt_among_wh_dst(info); if (poolsize_invalid(wh_dst)) return -EINVAL; @@ -201,6 +227,14 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) if (poolsize_invalid(wh_src)) return -EINVAL; + if (info->wh_src_ofs < info->wh_dst_ofs) { + if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs)) + return -EINVAL; + } else { + if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs)) + return -EINVAL; + } + expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { -- cgit From c04a3f730021c304c7cc4bc30ee57ee70ad98d57 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Mar 2018 18:56:26 +0100 Subject: netfilter: nf_tables: release flowtable hooks Otherwise we leak this array. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 558593e6a0a3..c4acc7340eb1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5423,6 +5423,7 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->ops); kfree(flowtable->name); flowtable->data.type->free(&flowtable->data); rhashtable_destroy(&flowtable->data.rhashtable); -- cgit From 13fbcc8dc573482dd3f27568257fd7087f8935f4 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 8 Mar 2018 16:17:23 -0800 Subject: macvlan: filter out unsupported feature flags Adding a macvlan device on top of a lowerdev that supports the xfrm offloads fails with a new regression: # ip link add link ens1f0 mv0 type macvlan RTNETLINK answers: Operation not permitted Tracing down the failure shows that the macvlan device inherits the NETIF_F_HW_ESP and NETIF_F_HW_ESP_TX_CSUM feature flags from the lowerdev, but with no dev->xfrmdev_ops API filled in, it doesn't actually support xfrm. When the request is made to add the new macvlan device, the XFRM listener for NETDEV_REGISTER calls xfrm_api_check() which fails the new registration because dev->xfrmdev_ops is NULL. The macvlan creation succeeds when we filter out the ESP feature flags in macvlan_fix_features(), so let's filter them out like we're already filtering out ~NETIF_F_NETNS_LOCAL. When XFRM support is added in the future, we can add the flags into MACVLAN_FEATURES. This same problem could crop up in the future with any other new feature flags, so let's filter out any flags that aren't defined as supported in macvlan. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Reported-by: Alexey Kodanev Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 8fc02d9db3d0..725f4b4afc6d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1036,7 +1036,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, lowerdev_features &= (features | ~NETIF_F_LRO); features = netdev_increment_features(lowerdev_features, features, mask); features |= ALWAYS_ON_FEATURES; - features &= ~NETIF_F_NETNS_LOCAL; + features &= (ALWAYS_ON_FEATURES | MACVLAN_FEATURES); return features; } -- cgit From ddc502dfed600bff0b61d899f70d95b76223fdfc Mon Sep 17 00:00:00 2001 From: zhangliping Date: Fri, 9 Mar 2018 10:08:50 +0800 Subject: openvswitch: meter: fix the incorrect calculation of max delta_t Max delat_t should be the full_bucket/rate instead of the full_bucket. Also report EINVAL if the rate is zero. Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure") Cc: Andy Zhou Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 04b94281a30b..b891a91577f8 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); + if (band->rate == 0) { + err = -EINVAL; + goto exit_free_meter; + } + band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); /* Figure out max delta_t that is enough to fill any bucket. * Keep max_delta_t size to the bucket units: * pkts => 1/1000 packets, kilobits => bits. + * + * Start with a full bucket. */ - band_max_delta_t = (band->burst_size + band->rate) * 1000; - /* Start with a full bucket. */ - band->bucket = band_max_delta_t; + band->bucket = (band->burst_size + band->rate) * 1000; + band_max_delta_t = band->bucket / band->rate; if (band_max_delta_t > meter->max_delta_t) meter->max_delta_t = band_max_delta_t; band++; -- cgit From b7db978ac283b237835129ac87f26cbac94d04e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 6 Feb 2018 09:52:07 +0100 Subject: can: m_can: change comparison to bitshift when dealing with a mask Due to a typo, the mask was destroyed by a comparison instead of a bit shift. Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2594f7779c6f..74170b0d23cf 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -253,7 +253,7 @@ enum m_can_mram_cfg { /* Rx FIFO 0/1 Configuration (RXF0C/RXF1C) */ #define RXFC_FWM_SHIFT 24 -#define RXFC_FWM_MASK (0x7f < RXFC_FWM_SHIFT) +#define RXFC_FWM_MASK (0x7f << RXFC_FWM_SHIFT) #define RXFC_FS_SHIFT 16 #define RXFC_FS_MASK (0x7f << RXFC_FS_SHIFT) -- cgit From 591d65d5b15496af8d05e252bc1da611c66c0b79 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 5 Mar 2018 21:29:52 +0100 Subject: can: ifi: Check core revision upon probe Older versions of the core are not compatible with the driver due to various intrusive fixes of the core. Read out the VER register, check the core revision bitfield and verify if the core in use is new enough (rev 2.1 or newer) to work correctly with this driver. Signed-off-by: Marek Vasut Cc: Heiko Schocher Cc: Markus Marb Cc: Marc Kleine-Budde Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2772d05ff11c..9fd396c3569a 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -136,6 +136,8 @@ #define IFI_CANFD_SYSCLOCK 0x50 #define IFI_CANFD_VER 0x54 +#define IFI_CANFD_VER_REV_MASK 0xff +#define IFI_CANFD_VER_REV_MIN_SUPPORTED 0x15 #define IFI_CANFD_IP_ID 0x58 #define IFI_CANFD_IP_ID_VALUE 0xD073CAFD @@ -933,7 +935,7 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr; int irq, ret; - u32 id; + u32 id, rev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); addr = devm_ioremap_resource(dev, res); @@ -947,6 +949,13 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) return -EINVAL; } + rev = readl(addr + IFI_CANFD_VER) & IFI_CANFD_VER_REV_MASK; + if (rev < IFI_CANFD_VER_REV_MIN_SUPPORTED) { + dev_err(dev, "This block is too old (rev %i), minimum supported is rev %i\n", + rev, IFI_CANFD_VER_REV_MIN_SUPPORTED); + return -EINVAL; + } + ndev = alloc_candev(sizeof(*priv), 1); if (!ndev) return -ENOMEM; -- cgit From 880dd464b4304583c557c4e5f5ecebfd55d232b1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 1 Mar 2018 19:34:00 +0100 Subject: can: ifi: Repair the error handling The new version of the IFI CANFD core has significantly less complex error state indication logic. In particular, the warning/error state bits are no longer all over the place, but are all present in the STATUS register. Moreover, there is a new IRQ register bit indicating transition between error states (active/warning/passive/busoff). This patch makes use of this bit to weed out the obscure selective INTERRUPT register clearing, which was used to carry over the error state indication into the poll function. While at it, this patch fixes the handling of the ACTIVE state, since the hardware provides indication of the core being in ACTIVE state and that in turn fixes the state transition indication toward userspace. Finally, register reads in the poll function are moved to the matching subfunctions since those are also no longer needed in the poll function. Signed-off-by: Marek Vasut Cc: Heiko Schocher Cc: Markus Marb Cc: Marc Kleine-Budde Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ifi_canfd/ifi_canfd.c | 64 ++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 9fd396c3569a..fedd927ba6ed 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -30,6 +30,7 @@ #define IFI_CANFD_STCMD_ERROR_ACTIVE BIT(2) #define IFI_CANFD_STCMD_ERROR_PASSIVE BIT(3) #define IFI_CANFD_STCMD_BUSOFF BIT(4) +#define IFI_CANFD_STCMD_ERROR_WARNING BIT(5) #define IFI_CANFD_STCMD_BUSMONITOR BIT(16) #define IFI_CANFD_STCMD_LOOPBACK BIT(18) #define IFI_CANFD_STCMD_DISABLE_CANFD BIT(24) @@ -52,7 +53,10 @@ #define IFI_CANFD_TXSTCMD_OVERFLOW BIT(13) #define IFI_CANFD_INTERRUPT 0xc +#define IFI_CANFD_INTERRUPT_ERROR_BUSOFF BIT(0) #define IFI_CANFD_INTERRUPT_ERROR_WARNING BIT(1) +#define IFI_CANFD_INTERRUPT_ERROR_STATE_CHG BIT(2) +#define IFI_CANFD_INTERRUPT_ERROR_REC_TEC_INC BIT(3) #define IFI_CANFD_INTERRUPT_ERROR_COUNTER BIT(10) #define IFI_CANFD_INTERRUPT_TXFIFO_EMPTY BIT(16) #define IFI_CANFD_INTERRUPT_TXFIFO_REMOVE BIT(22) @@ -61,6 +65,10 @@ #define IFI_CANFD_INTERRUPT_SET_IRQ ((u32)BIT(31)) #define IFI_CANFD_IRQMASK 0x10 +#define IFI_CANFD_IRQMASK_ERROR_BUSOFF BIT(0) +#define IFI_CANFD_IRQMASK_ERROR_WARNING BIT(1) +#define IFI_CANFD_IRQMASK_ERROR_STATE_CHG BIT(2) +#define IFI_CANFD_IRQMASK_ERROR_REC_TEC_INC BIT(3) #define IFI_CANFD_IRQMASK_SET_ERR BIT(7) #define IFI_CANFD_IRQMASK_SET_TS BIT(15) #define IFI_CANFD_IRQMASK_TXFIFO_EMPTY BIT(16) @@ -222,7 +230,10 @@ static void ifi_canfd_irq_enable(struct net_device *ndev, bool enable) if (enable) { enirq = IFI_CANFD_IRQMASK_TXFIFO_EMPTY | - IFI_CANFD_IRQMASK_RXFIFO_NEMPTY; + IFI_CANFD_IRQMASK_RXFIFO_NEMPTY | + IFI_CANFD_IRQMASK_ERROR_STATE_CHG | + IFI_CANFD_IRQMASK_ERROR_WARNING | + IFI_CANFD_IRQMASK_ERROR_BUSOFF; if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) enirq |= IFI_CANFD_INTERRUPT_ERROR_COUNTER; } @@ -363,12 +374,13 @@ static int ifi_canfd_handle_lost_msg(struct net_device *ndev) return 1; } -static int ifi_canfd_handle_lec_err(struct net_device *ndev, const u32 errctr) +static int ifi_canfd_handle_lec_err(struct net_device *ndev) { struct ifi_canfd_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; + u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); const u32 errmask = IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST | IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST | IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST | @@ -451,6 +463,11 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, switch (new_state) { case CAN_STATE_ERROR_ACTIVE: + /* error active state */ + priv->can.can_stats.error_warning++; + priv->can.state = CAN_STATE_ERROR_ACTIVE; + break; + case CAN_STATE_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; priv->can.state = CAN_STATE_ERROR_WARNING; @@ -479,7 +496,7 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, ifi_canfd_get_berr_counter(ndev, &bec); switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (bec.txerr > bec.rxerr) ? @@ -512,22 +529,21 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, return 1; } -static int ifi_canfd_handle_state_errors(struct net_device *ndev, u32 stcmd) +static int ifi_canfd_handle_state_errors(struct net_device *ndev) { struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); int work_done = 0; - u32 isr; - /* - * The ErrWarn condition is a little special, since the bit is - * located in the INTERRUPT register instead of STCMD register. - */ - isr = readl(priv->base + IFI_CANFD_INTERRUPT); - if ((isr & IFI_CANFD_INTERRUPT_ERROR_WARNING) && + if ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) && + (priv->can.state != CAN_STATE_ERROR_ACTIVE)) { + netdev_dbg(ndev, "Error, entered active state\n"); + work_done += ifi_canfd_handle_state_change(ndev, + CAN_STATE_ERROR_ACTIVE); + } + + if ((stcmd & IFI_CANFD_STCMD_ERROR_WARNING) && (priv->can.state != CAN_STATE_ERROR_WARNING)) { - /* Clear the interrupt */ - writel(IFI_CANFD_INTERRUPT_ERROR_WARNING, - priv->base + IFI_CANFD_INTERRUPT); netdev_dbg(ndev, "Error, entered warning state\n"); work_done += ifi_canfd_handle_state_change(ndev, CAN_STATE_ERROR_WARNING); @@ -554,18 +570,11 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) { struct net_device *ndev = napi->dev; struct ifi_canfd_priv *priv = netdev_priv(ndev); - const u32 stcmd_state_mask = IFI_CANFD_STCMD_ERROR_PASSIVE | - IFI_CANFD_STCMD_BUSOFF; - int work_done = 0; - - u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD); - u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); + int work_done = 0; /* Handle bus state changes */ - if ((stcmd & stcmd_state_mask) || - ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) == 0)) - work_done += ifi_canfd_handle_state_errors(ndev, stcmd); + work_done += ifi_canfd_handle_state_errors(ndev); /* Handle lost messages on RX */ if (rxstcmd & IFI_CANFD_RXSTCMD_OVERFLOW) @@ -573,7 +582,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) /* Handle lec errors on the bus */ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) - work_done += ifi_canfd_handle_lec_err(ndev, errctr); + work_done += ifi_canfd_handle_lec_err(ndev); /* Handle normal messages on RX */ if (!(rxstcmd & IFI_CANFD_RXSTCMD_EMPTY)) @@ -594,12 +603,13 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id) struct net_device_stats *stats = &ndev->stats; const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY | IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER | + IFI_CANFD_INTERRUPT_ERROR_COUNTER | + IFI_CANFD_INTERRUPT_ERROR_STATE_CHG | IFI_CANFD_INTERRUPT_ERROR_WARNING | - IFI_CANFD_INTERRUPT_ERROR_COUNTER; + IFI_CANFD_INTERRUPT_ERROR_BUSOFF; const u32 tx_irq_mask = IFI_CANFD_INTERRUPT_TXFIFO_EMPTY | IFI_CANFD_INTERRUPT_TXFIFO_REMOVE; - const u32 clr_irq_mask = ~((u32)(IFI_CANFD_INTERRUPT_SET_IRQ | - IFI_CANFD_INTERRUPT_ERROR_WARNING)); + const u32 clr_irq_mask = ~((u32)IFI_CANFD_INTERRUPT_SET_IRQ); u32 isr; isr = readl(priv->base + IFI_CANFD_INTERRUPT); -- cgit From e6048a00cfd0863d32f53b226e0b9a3633fc3332 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 8 Mar 2018 09:30:28 +0100 Subject: can: peak/pcie_fd: fix echo_skb is occupied! bug This patch makes atomic the handling of the linux-can echo_skb array and the network tx queue. This prevents from the "BUG! echo_skb is occupied!" message to be printed by the linux-can core, in SMP environments. Reported-by: Diana Burgess Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_canfd.c | 12 ++++++------ drivers/net/can/peak_canfd/peak_pciefd_main.c | 8 ++++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 55513411a82e..6fa66231ed8e 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -262,7 +262,6 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, spin_lock_irqsave(&priv->echo_lock, flags); can_get_echo_skb(priv->ndev, msg->client); - spin_unlock_irqrestore(&priv->echo_lock, flags); /* count bytes of the echo instead of skb */ stats->tx_bytes += cf_len; @@ -271,6 +270,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, /* restart tx queue (a slot is free) */ netif_wake_queue(priv->ndev); + spin_unlock_irqrestore(&priv->echo_lock, flags); return 0; } @@ -726,11 +726,6 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb, */ should_stop_tx_queue = !!(priv->can.echo_skb[priv->echo_idx]); - spin_unlock_irqrestore(&priv->echo_lock, flags); - - /* write the skb on the interface */ - priv->write_tx_msg(priv, msg); - /* stop network tx queue if not enough room to save one more msg too */ if (priv->can.ctrlmode & CAN_CTRLMODE_FD) should_stop_tx_queue |= (room_left < @@ -742,6 +737,11 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb, if (should_stop_tx_queue) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->echo_lock, flags); + + /* write the skb on the interface */ + priv->write_tx_msg(priv, msg); + return NETDEV_TX_OK; } diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index 788c3464a3b0..3c51a884db87 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -349,8 +349,12 @@ static irqreturn_t pciefd_irq_handler(int irq, void *arg) priv->tx_pages_free++; spin_unlock_irqrestore(&priv->tx_lock, flags); - /* wake producer up */ - netif_wake_queue(priv->ucan.ndev); + /* wake producer up (only if enough room in echo_skb array) */ + spin_lock_irqsave(&priv->ucan.echo_lock, flags); + if (!priv->ucan.can.echo_skb[priv->ucan.echo_idx]) + netif_wake_queue(priv->ucan.ndev); + + spin_unlock_irqrestore(&priv->ucan.echo_lock, flags); } /* re-enable Rx DMA transfer for this CAN */ -- cgit From ffd137f7043cb30067e1bff6fe62a073ae190b23 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 8 Mar 2018 09:30:29 +0100 Subject: can: peak/pcie_fd: remove useless code when interface starts When an interface starts, the echo_skb array is empty and the network queue should be started only. This patch replaces useless code and locks when the internal RX_BARRIER message is received from the IP core, telling the driver that tx may start. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_canfd.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 6fa66231ed8e..ed8561d4a90f 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -333,7 +333,6 @@ static int pucan_handle_status(struct peak_canfd_priv *priv, /* this STATUS is the CNF of the RX_BARRIER: Tx path can be setup */ if (pucan_status_is_rx_barrier(msg)) { - unsigned long flags; if (priv->enable_tx_path) { int err = priv->enable_tx_path(priv); @@ -342,16 +341,8 @@ static int pucan_handle_status(struct peak_canfd_priv *priv, return err; } - /* restart network queue only if echo skb array is free */ - spin_lock_irqsave(&priv->echo_lock, flags); - - if (!priv->can.echo_skb[priv->echo_idx]) { - spin_unlock_irqrestore(&priv->echo_lock, flags); - - netif_wake_queue(ndev); - } else { - spin_unlock_irqrestore(&priv->echo_lock, flags); - } + /* start network queue (echo_skb array is empty) */ + netif_start_queue(ndev); return 0; } -- cgit From c9b3bce18da4a0aebc27853052dea39aa64b7d75 Mon Sep 17 00:00:00 2001 From: Bich HEMON Date: Mon, 12 Mar 2018 08:52:37 +0000 Subject: can: m_can: select pinctrl state in each suspend/resume function Make sure to apply the correct pin state in suspend/resume callbacks. Putting pins in sleep state saves power. Signed-off-by: Bich Hemon Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 74170b0d23cf..b397a33f3d32 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -26,6 +26,7 @@ #include #include #include +#include /* napi related */ #define M_CAN_NAPI_WEIGHT 64 @@ -1700,6 +1701,8 @@ static __maybe_unused int m_can_suspend(struct device *dev) m_can_clk_stop(priv); } + pinctrl_pm_select_sleep_state(dev); + priv->can.state = CAN_STATE_SLEEPING; return 0; @@ -1710,6 +1713,8 @@ static __maybe_unused int m_can_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct m_can_priv *priv = netdev_priv(ndev); + pinctrl_pm_select_default_state(dev); + m_can_init_ram(priv); priv->can.state = CAN_STATE_ERROR_ACTIVE; -- cgit From a2c054a896b8ac794ddcfc7c92e2dc7ec4ed4ed5 Mon Sep 17 00:00:00 2001 From: Brad Mouring Date: Thu, 8 Mar 2018 16:23:03 -0600 Subject: net: phy: Tell caller result of phy_change() In 664fcf123a30e (net: phy: Threaded interrupts allow some simplification) the phy_interrupt system was changed to use a traditional threaded interrupt scheme instead of a workqueue approach. With this change, the phy status check moved into phy_change, which did not report back to the caller whether or not the interrupt was handled. This means that, in the case of a shared phy interrupt, only the first phydev's interrupt registers are checked (since phy_interrupt() would always return IRQ_HANDLED). This leads to interrupt storms when it is a secondary device that's actually the interrupt source. Signed-off-by: Brad Mouring Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 145 +++++++++++++++++++++++++------------------------- include/linux/phy.h | 1 - 2 files changed, 72 insertions(+), 74 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a6f924fee584..9aabfa1a455a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -617,6 +617,77 @@ static void phy_error(struct phy_device *phydev) phy_trigger_machine(phydev, false); } +/** + * phy_disable_interrupts - Disable the PHY interrupts from the PHY side + * @phydev: target phy_device struct + */ +static int phy_disable_interrupts(struct phy_device *phydev) +{ + int err; + + /* Disable PHY interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + if (err) + goto phy_err; + + /* Clear the interrupt */ + err = phy_clear_interrupt(phydev); + if (err) + goto phy_err; + + return 0; + +phy_err: + phy_error(phydev); + + return err; +} + +/** + * phy_change - Called by the phy_interrupt to handle PHY changes + * @phydev: phy_device struct that interrupted + */ +static irqreturn_t phy_change(struct phy_device *phydev) +{ + if (phy_interrupt_is_valid(phydev)) { + if (phydev->drv->did_interrupt && + !phydev->drv->did_interrupt(phydev)) + return IRQ_NONE; + + if (phydev->state == PHY_HALTED) + if (phy_disable_interrupts(phydev)) + goto phy_err; + } + + mutex_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + mutex_unlock(&phydev->lock); + + /* reschedule state queue work to run as soon as possible */ + phy_trigger_machine(phydev, true); + + if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev)) + goto phy_err; + return IRQ_HANDLED; + +phy_err: + phy_error(phydev); + return IRQ_NONE; +} + +/** + * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes + * @work: work_struct that describes the work to be done + */ +void phy_change_work(struct work_struct *work) +{ + struct phy_device *phydev = + container_of(work, struct phy_device, phy_queue); + + phy_change(phydev); +} + /** * phy_interrupt - PHY interrupt handler * @irq: interrupt line @@ -632,9 +703,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (PHY_HALTED == phydev->state) return IRQ_NONE; /* It can't be ours. */ - phy_change(phydev); - - return IRQ_HANDLED; + return phy_change(phydev); } /** @@ -651,32 +720,6 @@ static int phy_enable_interrupts(struct phy_device *phydev) return phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); } -/** - * phy_disable_interrupts - Disable the PHY interrupts from the PHY side - * @phydev: target phy_device struct - */ -static int phy_disable_interrupts(struct phy_device *phydev) -{ - int err; - - /* Disable PHY interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - if (err) - goto phy_err; - - /* Clear the interrupt */ - err = phy_clear_interrupt(phydev); - if (err) - goto phy_err; - - return 0; - -phy_err: - phy_error(phydev); - - return err; -} - /** * phy_start_interrupts - request and enable interrupts for a PHY device * @phydev: target phy_device struct @@ -719,50 +762,6 @@ int phy_stop_interrupts(struct phy_device *phydev) } EXPORT_SYMBOL(phy_stop_interrupts); -/** - * phy_change - Called by the phy_interrupt to handle PHY changes - * @phydev: phy_device struct that interrupted - */ -void phy_change(struct phy_device *phydev) -{ - if (phy_interrupt_is_valid(phydev)) { - if (phydev->drv->did_interrupt && - !phydev->drv->did_interrupt(phydev)) - return; - - if (phydev->state == PHY_HALTED) - if (phy_disable_interrupts(phydev)) - goto phy_err; - } - - mutex_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - mutex_unlock(&phydev->lock); - - /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev, true); - - if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev)) - goto phy_err; - return; - -phy_err: - phy_error(phydev); -} - -/** - * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes - * @work: work_struct that describes the work to be done - */ -void phy_change_work(struct work_struct *work) -{ - struct phy_device *phydev = - container_of(work, struct phy_device, phy_queue); - - phy_change(phydev); -} - /** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index d7069539f351..b260fb336b25 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1012,7 +1012,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct phy_device *phydev); void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); -- cgit From 4ed50ef4da4d113fe65d9f9d049c1ce7468e3ac1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:03 -0500 Subject: bnxt_en: Refactor the functions to reserve hardware rings. The bnxt_hwrm_reserve_{pf|vf}_rings() functions are very similar to the bnxt_hwrm_check_{pf|vf}_rings() functions. Refactor the former so that the latter can make use of common code in the next patch. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 85 +++++++++++++++++++------------ 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1500243b9886..377fcebadffc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4558,18 +4558,17 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings) return rc; } -static int -bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings, int vnics) +static void +__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req, + int tx_rings, int rx_rings, int ring_grps, + int cp_rings, int vnics) { - struct hwrm_func_cfg_input req = {0}; u32 enables = 0; - int rc; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); - req.fid = cpu_to_le16(0xffff); + bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_CFG, -1, -1); + req->fid = cpu_to_le16(0xffff); enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; - req.num_tx_rings = cpu_to_le16(tx_rings); + req->num_tx_rings = cpu_to_le16(tx_rings); if (bp->flags & BNXT_FLAG_NEW_RM) { enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | @@ -4578,16 +4577,53 @@ bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_stat_ctxs = req.num_cmpl_rings; - req.num_vnics = cpu_to_le16(vnics); + req->num_rx_rings = cpu_to_le16(rx_rings); + req->num_hw_ring_grps = cpu_to_le16(ring_grps); + req->num_cmpl_rings = cpu_to_le16(cp_rings); + req->num_stat_ctxs = req->num_cmpl_rings; + req->num_vnics = cpu_to_le16(vnics); } - if (!enables) + req->enables = cpu_to_le32(enables); +} + +static void +__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, + struct hwrm_func_vf_cfg_input *req, int tx_rings, + int rx_rings, int ring_grps, int cp_rings, + int vnics) +{ + u32 enables = 0; + + bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1); + enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; + enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; + enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | + FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; + enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; + + req->num_tx_rings = cpu_to_le16(tx_rings); + req->num_rx_rings = cpu_to_le16(rx_rings); + req->num_hw_ring_grps = cpu_to_le16(ring_grps); + req->num_cmpl_rings = cpu_to_le16(cp_rings); + req->num_stat_ctxs = req->num_cmpl_rings; + req->num_vnics = cpu_to_le16(vnics); + + req->enables = cpu_to_le32(enables); +} + +static int +bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, + int ring_grps, int cp_rings, int vnics) +{ + struct hwrm_func_cfg_input req = {0}; + int rc; + + __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); + if (!req.enables) return 0; - req.enables = cpu_to_le32(enables); rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -4604,7 +4640,6 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, int ring_grps, int cp_rings, int vnics) { struct hwrm_func_vf_cfg_input req = {0}; - u32 enables = 0; int rc; if (!(bp->flags & BNXT_FLAG_NEW_RM)) { @@ -4612,22 +4647,8 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, return 0; } - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); - enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; - enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; - enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; - enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; - enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; - - req.num_tx_rings = cpu_to_le16(tx_rings); - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_stat_ctxs = req.num_cmpl_rings; - req.num_vnics = cpu_to_le16(vnics); - - req.enables = cpu_to_le32(enables); + __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; -- cgit From 6fc2ffdf1001ae4fb485b3ba95ff757ae54565c9 Mon Sep 17 00:00:00 2001 From: Eddie Wai Date: Fri, 9 Mar 2018 23:46:04 -0500 Subject: bnxt_en: Fix vnic accounting in the bnxt_check_rings() path. The number of vnics to check must be determined ahead of time because only standard RX rings require vnics to support RFS. The logic is similar to the ring reservation logic and we can now use the refactored common functions to do most of the work in setting up the firmware message. Fixes: 8f23d638b36b ("bnxt_en: Expand bnxt_check_rings() to check all resources.") Signed-off-by: Eddie Wai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 64 ++++++++++--------------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 377fcebadffc..b847d54504ed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4764,39 +4764,25 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) } static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings) + int ring_grps, int cp_rings, int vnics) { struct hwrm_func_vf_cfg_input req = {0}; - u32 flags, enables; + u32 flags; int rc; if (!(bp->flags & BNXT_FLAG_NEW_RM)) return 0; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); + __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST; - enables = FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS | - FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS; req.flags = cpu_to_le32(flags); - req.enables = cpu_to_le32(enables); - req.num_tx_rings = cpu_to_le16(tx_rings); - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_stat_ctxs = cpu_to_le16(cp_rings); - req.num_vnics = cpu_to_le16(1); - if (bp->flags & BNXT_FLAG_RFS) - req.num_vnics = cpu_to_le16(rx_rings + 1); rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -4804,38 +4790,23 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, } static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings) + int ring_grps, int cp_rings, int vnics) { struct hwrm_func_cfg_input req = {0}; - u32 flags, enables; + u32 flags; int rc; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); - req.fid = cpu_to_le16(0xffff); + __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps, + cp_rings, vnics); flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST; - enables = FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS; - req.num_tx_rings = cpu_to_le16(tx_rings); - if (bp->flags & BNXT_FLAG_NEW_RM) { + if (bp->flags & BNXT_FLAG_NEW_RM) flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST; - enables |= FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS | - FUNC_CFG_REQ_ENABLES_NUM_VNICS; - req.num_rx_rings = cpu_to_le16(rx_rings); - req.num_cmpl_rings = cpu_to_le16(cp_rings); - req.num_hw_ring_grps = cpu_to_le16(ring_grps); - req.num_stat_ctxs = cpu_to_le16(cp_rings); - req.num_vnics = cpu_to_le16(1); - if (bp->flags & BNXT_FLAG_RFS) - req.num_vnics = cpu_to_le16(rx_rings + 1); - } + req.flags = cpu_to_le32(flags); - req.enables = cpu_to_le32(enables); rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -4843,17 +4814,17 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, } static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings, - int ring_grps, int cp_rings) + int ring_grps, int cp_rings, int vnics) { if (bp->hwrm_spec_code < 0x10801) return 0; if (BNXT_PF(bp)) return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings, - ring_grps, cp_rings); + ring_grps, cp_rings, vnics); return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps, - cp_rings); + cp_rings, vnics); } static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal, @@ -7552,7 +7523,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int max_rx, max_tx, tx_sets = 1; int tx_rings_needed; int rx_rings = rx; - int cp, rc; + int cp, vnics, rc; if (tcs) tx_sets = tcs; @@ -7568,10 +7539,15 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, if (max_tx < tx_rings_needed) return -ENOMEM; + vnics = 1; + if (bp->flags & BNXT_FLAG_RFS) + vnics += rx_rings; + if (bp->flags & BNXT_FLAG_AGG_RINGS) rx_rings <<= 1; cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx; - return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp); + return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp, + vnics); } static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev) -- cgit From b9ecc3400bc418af3ba9e56ea852f4ad69c23454 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Fri, 9 Mar 2018 23:46:05 -0500 Subject: bnxt_en: Remove unwanted ovs-offload messages in some conditions In some conditions when the driver fails to add a flow in HW and returns an error back to the stack, the stack continues to invoke get_flow_stats() and/or del_flow() on it. The driver fails these APIs with an error message "no flow_node for cookie". The message gets logged repeatedly as long as the stack keeps invoking these functions. Fix this by removing the corresponding netdev_info() calls from these functions. Fixes: d7bc73053024 ("bnxt_en: add code to query TC flower offload stats") Signed-off-by: Sriharsha Basavapatna Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index fbe6e208e17b..2049d4356df4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1269,11 +1269,8 @@ static int bnxt_tc_del_flow(struct bnxt *bp, flow_node = rhashtable_lookup_fast(&tc_info->flow_table, &tc_flow_cmd->cookie, tc_info->flow_ht_params); - if (!flow_node) { - netdev_info(bp->dev, "ERROR: no flow_node for cookie %lx", - tc_flow_cmd->cookie); + if (!flow_node) return -EINVAL; - } return __bnxt_tc_del_flow(bp, flow_node); } @@ -1290,11 +1287,8 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp, flow_node = rhashtable_lookup_fast(&tc_info->flow_table, &tc_flow_cmd->cookie, tc_info->flow_ht_params); - if (!flow_node) { - netdev_info(bp->dev, "Error: no flow_node for cookie %lx", - tc_flow_cmd->cookie); + if (!flow_node) return -1; - } flow = &flow_node->flow; curr_stats = &flow->stats; -- cgit From ed7bc602f60a653e5dea488e6917d9a75d6ac0dd Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:06 -0500 Subject: bnxt_en: Pass complete VLAN TCI to the stack. When receiving a packet with VLAN tag, pass the entire 16-bit TCI to the stack when calling __vlan_hwaccel_put_tag(). The current code is only passing the 12-bit tag and it is missing the priority bits. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b847d54504ed..35099c83f847 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1439,7 +1439,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vlan_proto = tpa_info->metadata >> RX_CMP_FLAGS2_METADATA_TPID_SFT; - u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK; + u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } @@ -1623,7 +1623,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); - u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK; + u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 1989c470172c..5e3d62189cab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -189,6 +189,7 @@ struct rx_cmp_ext { #define RX_CMP_FLAGS2_T_L4_CS_CALC (0x1 << 3) #define RX_CMP_FLAGS2_META_FORMAT_VLAN (0x1 << 4) __le32 rx_cmp_meta_data; + #define RX_CMP_FLAGS2_METADATA_TCI_MASK 0xffff #define RX_CMP_FLAGS2_METADATA_VID_MASK 0xfff #define RX_CMP_FLAGS2_METADATA_TPID_MASK 0xffff0000 #define RX_CMP_FLAGS2_METADATA_TPID_SFT 16 -- cgit From 832aed16ce7af2a43dafe9d4bc9080322e042cde Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:07 -0500 Subject: bnxt_en: Fix regressions when setting up MQPRIO TX rings. Recent changes added the bnxt_init_int_mode() call in the driver's open path whenever ring reservations are changed. This call was previously only called in the probe path. In the open path, if MQPRIO TC has been setup, the bnxt_init_int_mode() call would reset and mess up the MQPRIO per TC rings. Fix it by not re-initilizing bp->tx_nr_rings_per_tc in bnxt_init_int_mode(). Instead, initialize it in the probe path only after the bnxt_init_int_mode() call. Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 35099c83f847..cbdb54f61855 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5857,7 +5857,6 @@ static int bnxt_init_msix(struct bnxt *bp) if (rc) goto msix_setup_exit; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; bp->cp_nr_rings = (min == 1) ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : bp->tx_nr_rings + bp->rx_nr_rings; @@ -5889,7 +5888,6 @@ static int bnxt_init_inta(struct bnxt *bp) bp->rx_nr_rings = 1; bp->tx_nr_rings = 1; bp->cp_nr_rings = 1; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; bp->flags |= BNXT_FLAG_SHARED_RINGS; bp->irq_tbl[0].vector = bp->pdev->irq; return 0; @@ -8661,6 +8659,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; + /* No TC has been set yet and rings may have been trimmed due to + * limited MSIX, so we re-initialize the TX rings per TC. + */ + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bnxt_get_wol_settings(bp); if (bp->flags & BNXT_FLAG_WOL_CAP) device_set_wakeup_enable(&pdev->dev, bp->wol); -- cgit From 6ae777eab2f53b50d84a5d75a48d2d149f787da8 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Fri, 9 Mar 2018 23:46:08 -0500 Subject: bnxt_en: Return standard Linux error codes for hwrm flow cmds. Currently, internal error value is returned by the driver, when hwrm_cfa_flow_alloc() fails due lack of resources. We should be returning Linux errno value -ENOSPC instead. This patch also converts other similar command errors to standard Linux errno code (-EIO) in bnxt_tc.c Fixes: db1d36a27324 ("bnxt_en: add TC flower offload flow_alloc/free FW cmds") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 2049d4356df4..65c2cee35766 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -349,6 +349,9 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle) if (rc) netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d", __func__, flow_handle, rc); + + if (rc) + rc = -EIO; return rc; } @@ -484,13 +487,15 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, req.action_flags = cpu_to_le16(action_flags); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) *flow_handle = resp->flow_handle; - mutex_unlock(&bp->hwrm_cmd_lock); + if (rc == HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR) + rc = -ENOSPC; + else if (rc) + rc = -EIO; return rc; } @@ -561,6 +566,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); mutex_unlock(&bp->hwrm_cmd_lock); + if (rc) + rc = -EIO; return rc; } @@ -576,6 +583,9 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + + if (rc) + rc = -EIO; return rc; } @@ -624,6 +634,8 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); mutex_unlock(&bp->hwrm_cmd_lock); + if (rc) + rc = -EIO; return rc; } @@ -639,6 +651,9 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + + if (rc) + rc = -EIO; return rc; } @@ -1338,8 +1353,10 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows, } else { netdev_info(bp->dev, "error rc=%d", rc); } - mutex_unlock(&bp->hwrm_cmd_lock); + + if (rc) + rc = -EIO; return rc; } -- cgit From 1a037782e79047ec3386d8ba94c103cbdfb851d0 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Fri, 9 Mar 2018 23:46:09 -0500 Subject: bnxt_en: close & open NIC, only when the interface is in running state. bnxt_restore_pf_fw_resources routine frees PF resources by calling close_nic and allocates the resources back, by doing open_nic. However, this is not needed, if the PF is already in closed state. This bug causes the driver to call open the device and call request_irq() when it is not needed. Ultimately, pci_disable_msix() will crash when bnxt_en is unloaded. This patch fixes the problem by skipping __bnxt_close_nic and __bnxt_open_nic inside bnxt_restore_pf_fw_resources routine, if the interface is not running. Fixes: 80fcaf46c092 ("bnxt_en: Restore MSIX after disabling SRIOV.") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cbdb54f61855..cc9569f474f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8432,13 +8432,20 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp) return 0; bnxt_hwrm_func_qcaps(bp); - __bnxt_close_nic(bp, true, false); + + if (netif_running(bp->dev)) + __bnxt_close_nic(bp, true, false); + bnxt_clear_int_mode(bp); rc = bnxt_init_int_mode(bp); - if (rc) - dev_close(bp->dev); - else - rc = bnxt_open_nic(bp, true, false); + + if (netif_running(bp->dev)) { + if (rc) + dev_close(bp->dev); + else + rc = bnxt_open_nic(bp, true, false); + } + return rc; } -- cgit From 3c4fe80b32c685bdc02b280814d0cfe80d441c72 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 9 Mar 2018 23:46:10 -0500 Subject: bnxt_en: Check valid VNIC ID in bnxt_hwrm_vnic_set_tpa(). During initialization, if we encounter errors, there is a code path that calls bnxt_hwrm_vnic_set_tpa() with invalid VNIC ID. This may cause a warning in firmware logs. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cc9569f474f5..c7e5e6f09647 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3847,6 +3847,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_tpa_cfg_input req = {0}; + if (vnic->fw_vnic_id == INVALID_HW_RING_ID) + return 0; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1); if (tpa_flags) { -- cgit From bf2ae2e4bf9360e07c0cdfa166bcdc0afd92f4ce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Mar 2018 18:57:50 +0800 Subject: sock_diag: request _diag module only when the family or proto has been registered Now when using 'ss' in iproute, kernel would try to load all _diag modules, which also causes corresponding family and proto modules to be loaded as well due to module dependencies. Like after running 'ss', sctp, dccp, af_packet (if it works as a module) would be loaded. For example: $ lsmod|grep sctp $ ss $ lsmod|grep sctp sctp_diag 16384 0 sctp 323584 5 sctp_diag inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag libcrc32c 16384 3 nf_conntrack,nf_nat,sctp As these family and proto modules are loaded unintentionally, it could cause some problems, like: - Some debug tools use 'ss' to collect the socket info, which loads all those diag and family and protocol modules. It's noisy for identifying issues. - Users usually expect to drop sctp init packet silently when they have no sense of sctp protocol instead of sending abort back. - It wastes resources (especially with multiple netns), and SCTP module can't be unloaded once it's loaded. ... In short, it's really inappropriate to have these family and proto modules loaded unexpectedly when just doing debugging with inet_diag. This patch is to introduce sock_load_diag_module() where it loads the _diag module only when it's corresponding family or proto has been already registered. Note that we can't just load _diag module without the family or proto loaded, as some symbols used in _diag module are from the family or proto module. v1->v2: - move inet proto check to inet_diag to avoid a compiling err. v2->v3: - define sock_load_diag_module in sock.c and export one symbol only. - improve the changelog. Reported-by: Sabrina Dubroca Acked-by: Marcelo Ricardo Leitner Acked-by: Phil Sutter Acked-by: Sabrina Dubroca Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/net.h | 1 + include/net/sock.h | 1 + net/core/sock.c | 21 +++++++++++++++++++++ net/core/sock_diag.c | 12 ++++-------- net/ipv4/inet_diag.c | 3 +-- net/socket.c | 5 +++++ 6 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 91216b16feb7..2a0391eea05c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -222,6 +222,7 @@ enum { int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); +bool sock_is_registered(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); diff --git a/include/net/sock.h b/include/net/sock.h index 169c92afcafa..ae23f3b389ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1137,6 +1137,7 @@ struct proto { int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); +int sock_load_diag_module(int family, int protocol); #ifdef SOCK_REFCNT_DEBUG static inline void sk_refcnt_debug_inc(struct sock *sk) diff --git a/net/core/sock.c b/net/core/sock.c index c501499a04fe..85b0b64e7f9d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3261,6 +3261,27 @@ void proto_unregister(struct proto *prot) } EXPORT_SYMBOL(proto_unregister); +int sock_load_diag_module(int family, int protocol) +{ + if (!protocol) { + if (!sock_is_registered(family)) + return -ENOENT; + + return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family); + } + +#ifdef CONFIG_INET + if (family == AF_INET && + !rcu_access_pointer(inet_protos[protocol])) + return -ENOENT; +#endif + + return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family, protocol); +} +EXPORT_SYMBOL(sock_load_diag_module); + #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) __acquires(proto_list_mutex) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 146b50e30659..c37b5be7c5e4 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (sock_diag_handlers[req->sdiag_family] == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, req->sdiag_family); + sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); hndl = sock_diag_handlers[req->sdiag_family]; @@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, case TCPDIAG_GETSOCK: case DCCPDIAG_GETSOCK: if (inet_rcv_compat == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + sock_load_diag_module(AF_INET, 0); mutex_lock(&sock_diag_table_mutex); if (inet_rcv_compat != NULL) @@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: if (!sock_diag_handlers[AF_INET]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET6); + sock_load_diag_module(AF_INET6, 0); break; } return 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a383f299ce24..4e5bc4b2f14e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { if (!inet_diag_table[proto]) - request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET, proto); + sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); if (!inet_diag_table[proto]) diff --git a/net/socket.c b/net/socket.c index a93c99b518ca..08847c3b8c39 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2587,6 +2587,11 @@ void sock_unregister(int family) } EXPORT_SYMBOL(sock_unregister); +bool sock_is_registered(int family) +{ + return family < NPROTO && rcu_access_pointer(net_families[family]); +} + static int __init sock_init(void) { int err; -- cgit From b6b76dd62c56f257cdd30900ed22668c42a74030 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 12 Mar 2018 19:00:49 +0900 Subject: error-injection: Fix to prohibit jump optimization Since the kprobe which was optimized by jump can not change the execution path, the kprobe for error-injection must not be optimized. To prohibit it, set a dummy post-handler as officially stated in Documentation/kprobes.txt. Fixes: 4b1a29a7f542 ("error-injection: Support fault injection framework") Signed-off-by: Masami Hiramatsu Signed-off-by: Daniel Borkmann --- kernel/fail_function.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 21b0122cb39c..1d5632d8bbcc 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -14,6 +14,15 @@ static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs); +static void fei_post_handler(struct kprobe *kp, struct pt_regs *regs, + unsigned long flags) +{ + /* + * A dummy post handler is required to prohibit optimizing, because + * jump optimization does not support execution path overriding. + */ +} + struct fei_attr { struct list_head list; struct kprobe kp; @@ -56,6 +65,7 @@ static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr) return NULL; } attr->kp.pre_handler = fei_kprobe_handler; + attr->kp.post_handler = fei_post_handler; attr->retval = adjust_error_retval(addr, 0); INIT_LIST_HEAD(&attr->list); } -- cgit From 3016e0a0c91246e55418825ba9aae271be267522 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 6 Mar 2018 10:55:52 +0900 Subject: Revert "e1000e: Separate signaling for link check/link up" This reverts commit 19110cfbb34d4af0cdfe14cd243f3b09dc95b013. This reverts commit 4110e02eb45ea447ec6f5459c9934de0a273fb91. This reverts commit d3604515c9eda464a92e8e67aae82dfe07fe3c98. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed what happens to the link status when there is an error which happens after "get_link_status = false" in the copper check_for_link callbacks. Previously, such an error would be ignored and the link considered up. After that commit, any error implies that the link is down. Revert commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") and its followups. After reverting, the race condition described in the log of commit 19110cfbb34d is reintroduced. It may still be triggered by LSC events but this should keep the link down in case the link is electrically unstable, as discussed. The race may no longer be triggered by RXO events because commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") restored reading icr in the Other handler. Link: https://lkml.org/lkml/2018/3/1/789 Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 13 ++++--------- drivers/net/ethernet/intel/e1000e/mac.c | 13 ++++--------- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ff308b05d68c..d6d4ed7acf03 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,9 +1367,6 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. - * - * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link - * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1385,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 1; + return 0; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1602,7 +1599,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return 1; + return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to @@ -1616,12 +1613,10 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) { + if (ret_val) e_dbg("Error configuring flow control\n"); - return ret_val; - } - return 1; + return ret_val; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index db735644b312..b322011ec282 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -410,9 +410,6 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. - * - * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link - * up). **/ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) { @@ -426,7 +423,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 1; + return 0; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -450,7 +447,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return 1; + return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to @@ -464,12 +461,10 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) { + if (ret_val) e_dbg("Error configuring flow control\n"); - return ret_val; - } - return 1; + return ret_val; } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c0f23446bf26..dc853b0863af 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5090,7 +5090,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); - link_active = ret_val > 0; + link_active = !hw->mac.get_link_status; } else { link_active = true; } -- cgit From e2710dbf0dc1e37d85368e2404049dadda848d5a Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 6 Mar 2018 10:55:53 +0900 Subject: e1000e: Fix link check race condition Alex reported the following race condition: /* link goes up... interrupt... schedule watchdog */ \ e1000_watchdog_task \ e1000e_has_link \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link \ e1000e_phy_has_link_generic(..., &link) link = true /* link goes down... interrupt */ \ e1000_msix_other hw->mac.get_link_status = true /* link is up */ mac->get_link_status = false link_active = true /* link_active is true, wrongly, and stays so because * get_link_status is false */ Avoid this problem by making sure that we don't set get_link_status = false after having checked the link. It seems this problem has been present since the introduction of e1000e. Link: https://lkml.org/lkml/2018/1/29/338 Reported-by: Alexander Duyck Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 31 ++++++++++++++++------------- drivers/net/ethernet/intel/e1000e/mac.c | 14 ++++++------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf03..1dddfb7b2de6 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1383,6 +1383,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ if (!mac->get_link_status) return 0; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1390,12 +1391,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) - return ret_val; + goto out; } /* When connected at 10Mbps half-duplex, some parts are excessively @@ -1428,7 +1429,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pch2lan) emi_addr = I82579_RX_CONFIG; @@ -1450,7 +1451,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type >= e1000_pch_spt) { u16 data; @@ -1459,14 +1460,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (speed == SPEED_1000) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_rphy_locked(hw, PHY_REG(776, 20), &data); if (ret_val) { hw->phy.ops.release(hw); - return ret_val; + goto out; } ptr_gap = (data & (0x3FF << 2)) >> 2; @@ -1480,18 +1481,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } else { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_wphy_locked(hw, PHY_REG(776, 20), 0xC023); hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } } @@ -1518,7 +1519,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) { ret_val = e1000_k1_workaround_lpt_lp(hw, link); if (ret_val) - return ret_val; + goto out; } if (hw->mac.type >= e1000_pch_lpt) { /* Set platform power management values for @@ -1526,7 +1527,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000_platform_pm_pch_lpt(hw, link); if (ret_val) - return ret_val; + goto out; } /* Clear link partner's EEE ability */ @@ -1549,9 +1550,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } if (!link) - return 0; /* No link detected */ - - mac->get_link_status = false; + goto out; switch (hw->mac.type) { case e1000_pch2lan: @@ -1617,6 +1616,10 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) e_dbg("Error configuring flow control\n"); return ret_val; + +out: + mac->get_link_status = true; + return ret_val; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b322011ec282..5bdc3a2d4fd7 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -424,19 +424,15 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) */ if (!mac->get_link_status) return 0; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) - return 0; /* No link detected */ - - mac->get_link_status = false; + if (ret_val || !link) + goto out; /* Check if there was DownShift, must be checked * immediately after link-up @@ -465,6 +461,10 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) e_dbg("Error configuring flow control\n"); return ret_val; + +out: + mac->get_link_status = true; + return ret_val; } /** -- cgit From 2f987a76a97773beafbc615b9c4d8fe79129a7f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Mar 2018 14:54:23 +0100 Subject: net: ipv6: keep sk status consistent after datagram connect failure On unsuccesful ip6_datagram_connect(), if the failure is caused by ip6_datagram_dst_update(), the sk peer information are cleared, but the sk->sk_state is preserved. If the socket was already in an established status, the overall sk status is inconsistent and fouls later checks in datagram code. Fix this saving the old peer information and restoring them in case of failure. This also aligns ipv6 datagram connect() behavior with ipv4. v1 -> v2: - added missing Fixes tag Fixes: 85cb73ff9b74 ("net: ipv6: reset daddr and dport in sk if connect() fails") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fbf08ce3f5ab..8a9ac2d0f5d3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -146,10 +146,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr; + struct in6_addr *daddr, old_daddr; + __be32 fl6_flowlabel = 0; + __be32 old_fl6_flowlabel; + __be32 old_dport; int addr_type; int err; - __be32 fl6_flowlabel = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -238,9 +240,13 @@ ipv4_connected: } } + /* save the current peer information before updating it */ + old_daddr = sk->sk_v6_daddr; + old_fl6_flowlabel = np->flow_label; + old_dport = inet->inet_dport; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6_flowlabel; - inet->inet_dport = usin->sin6_port; /* @@ -250,11 +256,12 @@ ipv4_connected: err = ip6_datagram_dst_update(sk, true); if (err) { - /* Reset daddr and dport so that udp_v6_early_demux() - * fails to find this socket + /* Restore the socket peer info, to keep it consistent with + * the old socket state */ - memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); - inet->inet_dport = 0; + sk->sk_v6_daddr = old_daddr; + np->flow_label = old_fl6_flowlabel; + inet->inet_dport = old_dport; goto out; } -- cgit From b954f94023dcc61388c8384f0f14eb8e42c863c5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Mar 2018 14:54:24 +0100 Subject: l2tp: fix races with ipv4-mapped ipv6 addresses The l2tp_tunnel_create() function checks for v4mapped ipv6 sockets and cache that flag, so that l2tp core code can reusing it at xmit time. If the socket is provided by the userspace, the connection status of the tunnel sockets can change between the tunnel creation and the xmit call, so that syzbot is able to trigger the following splat: BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:192 [inline] BUG: KASAN: use-after-free in ip6_xmit+0x1f76/0x2260 net/ipv6/ip6_output.c:264 Read of size 8 at addr ffff8801bd949318 by task syz-executor4/23448 CPU: 0 PID: 23448 Comm: syz-executor4 Not tainted 4.16.0-rc4+ #65 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23c/0x360 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 ip6_dst_idev include/net/ip6_fib.h:192 [inline] ip6_xmit+0x1f76/0x2260 net/ipv6/ip6_output.c:264 inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139 l2tp_xmit_core net/l2tp/l2tp_core.c:1053 [inline] l2tp_xmit_skb+0x105f/0x1410 net/l2tp/l2tp_core.c:1148 pppol2tp_sendmsg+0x470/0x670 net/l2tp/l2tp_ppp.c:341 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg+0xca/0x110 net/socket.c:640 ___sys_sendmsg+0x767/0x8b0 net/socket.c:2046 __sys_sendmsg+0xe5/0x210 net/socket.c:2080 SYSC_sendmsg net/socket.c:2091 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2087 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x453e69 RSP: 002b:00007f819593cc68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f819593d6d4 RCX: 0000000000453e69 RDX: 0000000000000081 RSI: 000000002037ffc8 RDI: 0000000000000004 RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000004c3 R14: 00000000006f72e8 R15: 0000000000000000 This change addresses the issues: * explicitly checking for TCP_ESTABLISHED for user space provided sockets * dropping the v4mapped flag usage - it can become outdated - and explicitly invoking ipv6_addr_v4mapped() instead The issue is apparently there since ancient times. v1 -> v2: (many thanks to Guillaume) - with csum issue introduced in v1 - replace pr_err with pr_debug - fix build issue with IPV6 disabled - move l2tp_sk_is_v4mapped in l2tp_core.c v2 -> v3: - don't update inet_daddr for v4mapped address, unneeded - drop rendundant check at creation time Reported-and-tested-by: syzbot+92fa328176eb07e4ac1a@syzkaller.appspotmail.com Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 38 ++++++++++++++++++-------------------- net/l2tp/l2tp_core.h | 3 --- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e22512e32827..14b67dfacc4b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -111,6 +111,13 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; +#if IS_ENABLED(CONFIG_IPV6) +static bool l2tp_sk_is_v6(struct sock *sk) +{ + return sk->sk_family == PF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr); +} +#endif static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) { @@ -1049,7 +1056,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) + if (l2tp_sk_is_v6(tunnel->sock)) error = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif @@ -1112,6 +1119,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len goto out_unlock; } + /* The user-space may change the connection status for the user-space + * provided socket at run time: we must check it under the socket lock + */ + if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) { + kfree_skb(skb); + ret = NET_XMIT_DROP; + goto out_unlock; + } + /* Get routing info from the tunnel socket */ skb_dst_drop(skb); skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); @@ -1131,7 +1147,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + if (l2tp_sk_is_v6(sk)) udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, &sk->sk_v6_daddr, udp_len); @@ -1511,24 +1527,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - if (ipv6_addr_v4mapped(&np->saddr) && - ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { - struct inet_sock *inet = inet_sk(sk); - - tunnel->v4mapped = true; - inet->inet_saddr = np->saddr.s6_addr32[3]; - inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3]; - inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3]; - } else { - tunnel->v4mapped = false; - } - } -#endif - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a1aa9550f04e..2718d0b284d0 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -188,9 +188,6 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ -#if IS_ENABLED(CONFIG_IPV6) - bool v4mapped; -#endif struct work_struct del_work; -- cgit From 5a9f8df68ee6927f21dd3f2c75c16feb8b53a9e8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 12 Mar 2018 16:00:40 -0700 Subject: net: dsa: Fix dsa_is_user_port() test inversion During the conversion to dsa_is_user_port(), a condition ended up being reversed, which would prevent the creation of any user port when using the legacy binding and/or platform data, fix that. Fixes: 4a5b85ffe2a0 ("net: dsa: use dsa_is_user_port everywhere") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index cb54b81d0bd9..42a7b85b84e1 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, ds->ports[i].dn = cd->port_dn[i]; ds->ports[i].cpu_dp = dst->cpu_dp; - if (dsa_is_user_port(ds, i)) + if (!dsa_is_user_port(ds, i)) continue; ret = dsa_slave_create(&ds->ports[i]); -- cgit From 0dcd7876029b58770f769cbb7b484e88e4a305e5 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 7 Mar 2018 14:42:53 -0800 Subject: net: xfrm: use preempt-safe this_cpu_read() in ipcomp_alloc_tfms() f7c83bcbfaf5 ("net: xfrm: use __this_cpu_read per-cpu helper") added a __this_cpu_read() call inside ipcomp_alloc_tfms(). At the time, __this_cpu_read() required the caller to either not care about races or to handle preemption/interrupt issues. 3.15 tightened the rules around some per-cpu operations, and now __this_cpu_read() should never be used in a preemptible context. On 3.15 and later, we need to use this_cpu_read() instead. syzkaller reported this leading to the following kernel BUG while fuzzing sendmsg: BUG: using __this_cpu_read() in preemptible [00000000] code: repro/3101 caller is ipcomp_init_state+0x185/0x990 CPU: 3 PID: 3101 Comm: repro Not tainted 4.16.0-rc4-00123-g86f84779d8e9 #154 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0xb9/0x115 check_preemption_disabled+0x1cb/0x1f0 ipcomp_init_state+0x185/0x990 ? __xfrm_init_state+0x876/0xc20 ? lock_downgrade+0x5e0/0x5e0 ipcomp4_init_state+0xaa/0x7c0 __xfrm_init_state+0x3eb/0xc20 xfrm_init_state+0x19/0x60 pfkey_add+0x20df/0x36f0 ? pfkey_broadcast+0x3dd/0x600 ? pfkey_sock_destruct+0x340/0x340 ? pfkey_seq_stop+0x80/0x80 ? __skb_clone+0x236/0x750 ? kmem_cache_alloc+0x1f6/0x260 ? pfkey_sock_destruct+0x340/0x340 ? pfkey_process+0x62a/0x6f0 pfkey_process+0x62a/0x6f0 ? pfkey_send_new_mapping+0x11c0/0x11c0 ? mutex_lock_io_nested+0x1390/0x1390 pfkey_sendmsg+0x383/0x750 ? dump_sp+0x430/0x430 sock_sendmsg+0xc0/0x100 ___sys_sendmsg+0x6c8/0x8b0 ? copy_msghdr_from_user+0x3b0/0x3b0 ? pagevec_lru_move_fn+0x144/0x1f0 ? find_held_lock+0x32/0x1c0 ? do_huge_pmd_anonymous_page+0xc43/0x11e0 ? lock_downgrade+0x5e0/0x5e0 ? get_kernel_page+0xb0/0xb0 ? _raw_spin_unlock+0x29/0x40 ? do_huge_pmd_anonymous_page+0x400/0x11e0 ? __handle_mm_fault+0x553/0x2460 ? __fget_light+0x163/0x1f0 ? __sys_sendmsg+0xc7/0x170 __sys_sendmsg+0xc7/0x170 ? SyS_shutdown+0x1a0/0x1a0 ? __do_page_fault+0x5a0/0xca0 ? lock_downgrade+0x5e0/0x5e0 SyS_sendmsg+0x27/0x40 ? __sys_sendmsg+0x170/0x170 do_syscall_64+0x19f/0x640 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7f0ee73dfb79 RSP: 002b:00007ffe14fc15a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0ee73dfb79 RDX: 0000000000000000 RSI: 00000000208befc8 RDI: 0000000000000004 RBP: 00007ffe14fc15b0 R08: 00007ffe14fc15c0 R09: 00007ffe14fc15c0 R10: 0000000000000000 R11: 0000000000000207 R12: 0000000000400440 R13: 00007ffe14fc16b0 R14: 0000000000000000 R15: 0000000000000000 Signed-off-by: Greg Hackmann Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index ccfdc7115a83..a00ec715aa46 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name) struct crypto_comp *tfm; /* This can be any valid CPU ID so we don't need locking. */ - tfm = __this_cpu_read(*pos->tfms); + tfm = this_cpu_read(*pos->tfms); if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; -- cgit From f89782c2d131e6eae0d1ea2569ba76bc4c5875fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Mar 2018 12:09:38 +0300 Subject: qed: Use after free in qed_rdma_free() We're dereferencing "p_hwfn->p_rdma_info" but that is freed on the line before in qed_rdma_resc_free(p_hwfn). Fixes: 9de506a547c0 ("qed: Free RoCE ILT Memory on rmmod qedr") Signed-off-by: Dan Carpenter Acked-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index f3ee6538b553..a411f9c702a1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -379,8 +379,8 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn) DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); qed_rdma_free_reserved_lkey(p_hwfn); - qed_rdma_resc_free(p_hwfn); qed_cxt_free_proto_ilt(p_hwfn, p_hwfn->p_rdma_info->proto); + qed_rdma_resc_free(p_hwfn); } static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) -- cgit From f4353daf4905c0099fd25fa742e2ffd4a4bab26a Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 14 Mar 2018 11:52:56 +0000 Subject: can: cc770: Fix stalls on rt-linux, remove redundant IRQ ack This has been reported to cause stalls on rt-linux. Suggested-by: Richard Weinberger Tested-by: Richard Weinberger Signed-off-by: Andri Yngvason Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 1e37313054f3..9fed163262e0 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -447,15 +447,6 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += dlc; - - /* - * HM: We had some cases of repeated IRQs so make sure the - * INT is acknowledged I know it's already further up, but - * doing again fixed the issue - */ - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES); - return NETDEV_TX_OK; } @@ -684,12 +675,6 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) /* Nothing more to send, switch off interrupts */ cc770_write_reg(priv, msgobj[mo].ctrl0, MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); - /* - * We had some cases of repeated IRQ so make sure the - * INT is acknowledged - */ - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES); stats->tx_packets++; can_get_echo_skb(dev, 0); -- cgit From 746201235b3f876792099079f4c6fea941d76183 Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 14 Mar 2018 11:52:57 +0000 Subject: can: cc770: Fix queue stall & dropped RTR reply While waiting for the TX object to send an RTR, an external message with a matching id can overwrite the TX data. In this case we must call the rx routine and then try transmitting the message that was overwritten again. The queue was being stalled because the RX event did not generate an interrupt to wake up the queue again and the TX event did not happen because the TXRQST flag is reset by the chip when new data is received. According to the CC770 datasheet the id of a message object should not be changed while the MSGVAL bit is set. This has been fixed by resetting the MSGVAL bit before modifying the object in the transmit function and setting it after. It is not enough to set & reset CPUUPD. It is important to keep the MSGVAL bit reset while the message object is being modified. Otherwise, during RTR transmission, a frame with matching id could trigger an rx-interrupt, which would cause a race condition between the interrupt routine and the transmit function. Signed-off-by: Andri Yngvason Tested-by: Richard Weinberger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 94 ++++++++++++++++++++++++++++++------------- drivers/net/can/cc770/cc770.h | 2 + 2 files changed, 68 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 9fed163262e0..2743d82d4424 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -390,37 +390,23 @@ static int cc770_get_berr_counter(const struct net_device *dev, return 0; } -static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) +static void cc770_tx(struct net_device *dev, int mo) { struct cc770_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct can_frame *cf = (struct can_frame *)skb->data; - unsigned int mo = obj2msgobj(CC770_OBJ_TX); + struct can_frame *cf = (struct can_frame *)priv->tx_skb->data; u8 dlc, rtr; u32 id; int i; - if (can_dropped_invalid_skb(dev, skb)) - return NETDEV_TX_OK; - - if ((cc770_read_reg(priv, - msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) { - netdev_err(dev, "TX register is still occupied!\n"); - return NETDEV_TX_BUSY; - } - - netif_stop_queue(dev); - dlc = cf->can_dlc; id = cf->can_id; - if (cf->can_id & CAN_RTR_FLAG) - rtr = 0; - else - rtr = MSGCFG_DIR; + rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR; + + cc770_write_reg(priv, msgobj[mo].ctrl0, + MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); cc770_write_reg(priv, msgobj[mo].ctrl1, RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES); - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES); + if (id & CAN_EFF_FLAG) { id &= CAN_EFF_MASK; cc770_write_reg(priv, msgobj[mo].config, @@ -439,13 +425,30 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < dlc; i++) cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]); - /* Store echo skb before starting the transfer */ - can_put_echo_skb(skb, dev, 0); - cc770_write_reg(priv, msgobj[mo].ctrl1, - RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC); + RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC); + cc770_write_reg(priv, msgobj[mo].ctrl0, + MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC); +} + +static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct cc770_priv *priv = netdev_priv(dev); + unsigned int mo = obj2msgobj(CC770_OBJ_TX); + + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; - stats->tx_bytes += dlc; + netif_stop_queue(dev); + + if ((cc770_read_reg(priv, + msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) { + netdev_err(dev, "TX register is still occupied!\n"); + return NETDEV_TX_BUSY; + } + + priv->tx_skb = skb; + cc770_tx(dev, mo); return NETDEV_TX_OK; } @@ -671,13 +674,47 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) struct cc770_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; unsigned int mo = obj2msgobj(o); + struct can_frame *cf; + u8 ctrl1; + + ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1); - /* Nothing more to send, switch off interrupts */ cc770_write_reg(priv, msgobj[mo].ctrl0, MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); + cc770_write_reg(priv, msgobj[mo].ctrl1, + RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES); - stats->tx_packets++; + if (unlikely(!priv->tx_skb)) { + netdev_err(dev, "missing tx skb in tx interrupt\n"); + return; + } + + if (unlikely(ctrl1 & MSGLST_SET)) { + stats->rx_over_errors++; + stats->rx_errors++; + } + + /* When the CC770 is sending an RTR message and it receives a regular + * message that matches the id of the RTR message, it will overwrite the + * outgoing message in the TX register. When this happens we must + * process the received message and try to transmit the outgoing skb + * again. + */ + if (unlikely(ctrl1 & NEWDAT_SET)) { + cc770_rx(dev, mo, ctrl1); + cc770_tx(dev, mo); + return; + } + + can_put_echo_skb(priv->tx_skb, dev, 0); can_get_echo_skb(dev, 0); + + cf = (struct can_frame *)priv->tx_skb->data; + stats->tx_bytes += cf->can_dlc; + stats->tx_packets++; + + priv->tx_skb = NULL; + netif_wake_queue(dev); } @@ -789,6 +826,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv) priv->can.do_set_bittiming = cc770_set_bittiming; priv->can.do_set_mode = cc770_set_mode; priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; + priv->tx_skb = NULL; memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags)); diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h index a1739db98d91..95752e1d1283 100644 --- a/drivers/net/can/cc770/cc770.h +++ b/drivers/net/can/cc770/cc770.h @@ -193,6 +193,8 @@ struct cc770_priv { u8 cpu_interface; /* CPU interface register */ u8 clkout; /* Clock out register */ u8 bus_config; /* Bus conffiguration register */ + + struct sk_buff *tx_skb; }; struct net_device *alloc_cc770dev(int sizeof_priv); -- cgit From 4dcb31d4649df36297296b819437709f5407059c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2018 09:04:16 -0700 Subject: net: use skb_to_full_sk() in skb_update_prio() Andrei Vagin reported a KASAN: slab-out-of-bounds error in skb_update_prio() Since SYNACK might be attached to a request socket, we need to get back to the listener socket. Since this listener is manipulated without locks, add const qualifiers to sock_cgroup_prioidx() so that the const can also be used in skb_update_prio() Also add the const qualifier to sock_cgroup_classid() for consistency. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Reported-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 4 ++-- net/core/dev.c | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9f242b876fde..f8e76d01a5ad 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -755,13 +755,13 @@ struct sock_cgroup_data { * updaters and return part of the previous pointer as the prioidx or * classid. Such races are short-lived and the result isn't critical. */ -static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { /* fallback to 1 which is always the ID of the root cgroup */ return (skcd->is_data & 1) ? skcd->prioidx : 1; } -static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { /* fallback to 0 which is the unconfigured default classid */ return (skcd->is_data & 1) ? skcd->classid : 0; diff --git a/net/core/dev.c b/net/core/dev.c index 2cedf520cb28..12be20535714 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3278,15 +3278,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { - struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); + const struct netprio_map *map; + const struct sock *sk; + unsigned int prioidx; - if (!skb->priority && skb->sk && map) { - unsigned int prioidx = - sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); + if (skb->priority) + return; + map = rcu_dereference_bh(skb->dev->priomap); + if (!map) + return; + sk = skb_to_full_sk(skb); + if (!sk) + return; - if (prioidx < map->priomap_len) - skb->priority = map->priomap[prioidx]; - } + prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data); + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; } #else #define skb_update_prio(skb) -- cgit From 96f413f47677366e0ae03797409bfcc4151dbf9e Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 14 Mar 2018 08:37:28 -0500 Subject: soc/fsl/qbman: fix issue in qman_delete_cgr_safe() The wait_for_completion() call in qman_delete_cgr_safe() was triggering a scheduling while atomic bug, replacing the kthread with a smp_call_function_single() call to fix it. Signed-off-by: Madalin Bucur Signed-off-by: Roy Pledge Signed-off-by: David S. Miller --- drivers/soc/fsl/qbman/qman.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index e4f5bb056fd2..ba3cfa8e279b 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2443,39 +2443,21 @@ struct cgr_comp { struct completion completion; }; -static int qman_delete_cgr_thread(void *p) +static void qman_delete_cgr_smp_call(void *p) { - struct cgr_comp *cgr_comp = (struct cgr_comp *)p; - int ret; - - ret = qman_delete_cgr(cgr_comp->cgr); - complete(&cgr_comp->completion); - - return ret; + qman_delete_cgr((struct qman_cgr *)p); } void qman_delete_cgr_safe(struct qman_cgr *cgr) { - struct task_struct *thread; - struct cgr_comp cgr_comp; - preempt_disable(); if (qman_cgr_cpus[cgr->cgrid] != smp_processor_id()) { - init_completion(&cgr_comp.completion); - cgr_comp.cgr = cgr; - thread = kthread_create(qman_delete_cgr_thread, &cgr_comp, - "cgr_del"); - - if (IS_ERR(thread)) - goto out; - - kthread_bind(thread, qman_cgr_cpus[cgr->cgrid]); - wake_up_process(thread); - wait_for_completion(&cgr_comp.completion); + smp_call_function_single(qman_cgr_cpus[cgr->cgrid], + qman_delete_cgr_smp_call, cgr, true); preempt_enable(); return; } -out: + qman_delete_cgr(cgr); preempt_enable(); } -- cgit From 88075256ee817041d68c2387f29065b5cb2b342a Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 14 Mar 2018 08:37:29 -0500 Subject: dpaa_eth: fix error in dpaa_remove() The recent changes that make the driver probing compatible with DSA were not propagated in the dpa_remove() function, breaking the module unload function. Using the proper device to address the issue. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 7caa8da48421..3af5e0c08233 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2860,7 +2860,7 @@ static int dpaa_remove(struct platform_device *pdev) struct device *dev; int err; - dev = &pdev->dev; + dev = pdev->dev.parent; net_dev = dev_get_drvdata(dev); priv = netdev_priv(net_dev); -- cgit From 565186362b73226a288830abe595f05f0cec0bbc Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 14 Mar 2018 08:37:30 -0500 Subject: dpaa_eth: remove duplicate initialization The fd_format has already been initialized at this point. Signed-off-by: Camelia Groza Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 3af5e0c08233..627f7f714b18 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2278,7 +2278,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, vaddr = phys_to_virt(addr); prefetch(vaddr + qm_fd_get_offset(fd)); - fd_format = qm_fd_get_format(fd); /* The only FD types that we may receive are contig and S/G */ WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg)); -- cgit From e4d1b37c17d000a3da9368a3e260fb9ea4927c25 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 14 Mar 2018 08:37:31 -0500 Subject: dpaa_eth: increment the RX dropped counter when needed Signed-off-by: Camelia Groza Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 627f7f714b18..6c0679fd78b8 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2310,8 +2310,10 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, skb_len = skb->len; - if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) + if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) { + percpu_stats->rx_dropped++; return qman_cb_dqrr_consume; + } percpu_stats->rx_packets++; percpu_stats->rx_bytes += skb_len; -- cgit From 82d141cd19d088ee41feafde4a6f86eeb40d93c5 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 14 Mar 2018 08:37:32 -0500 Subject: dpaa_eth: remove duplicate increment of the tx_errors counter The tx_errors counter is incremented by the dpaa_xmit caller. Signed-off-by: Camelia Groza Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6c0679fd78b8..e4ec32a9ca15 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2008,7 +2008,6 @@ static inline int dpaa_xmit(struct dpaa_priv *priv, } if (unlikely(err < 0)) { - percpu_stats->tx_errors++; percpu_stats->tx_fifo_errors++; return err; } -- cgit From d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 14 Mar 2018 10:21:14 +0100 Subject: ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu Prior to the rework of PMTU information storage in commit 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/net/ip.h | 11 +++++++++-- include/net/ip_fib.h | 1 + include/net/route.h | 3 ++- net/ipv4/route.c | 26 +++++++++++++++++++------- net/ipv4/xfrm4_policy.c | 1 + 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 746abff9ce51..f49b3a576bec 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -328,6 +328,13 @@ int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +static inline int ip_mtu_locked(const struct dst_entry *dst) +{ + const struct rtable *rt = (const struct rtable *)dst; + + return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); +} + static inline int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) { @@ -335,7 +342,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) return pmtudisc == IP_PMTUDISC_DO || (pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU))); + !ip_mtu_locked(dst)); } static inline bool ip_sk_accept_pmtu(const struct sock *sk) @@ -361,7 +368,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); if (net->ipv4.sysctl_ip_fwd_use_pmtu || - dst_metric_locked(dst, RTAX_MTU) || + ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f80524396c06..77d0a78cf7d2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -59,6 +59,7 @@ struct fib_nh_exception { int fnhe_genid; __be32 fnhe_daddr; u32 fnhe_pmtu; + bool fnhe_mtu_locked; __be32 fnhe_gw; unsigned long fnhe_expires; struct rtable __rcu *fnhe_rth_input; diff --git a/include/net/route.h b/include/net/route.h index 40b870d58f38..20a92ca9e115 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -63,7 +63,8 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - u32 rt_pmtu; + u32 rt_mtu_locked:1, + rt_pmtu:31; u32 rt_table_id; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f9dbb8cb66bf..299e247b2032 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr) static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) { rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; rt->dst.expires = fnhe->fnhe_expires; if (fnhe->fnhe_gw) { @@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, unsigned long expires) + u32 pmtu, bool lock, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) + if (pmtu) { fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; + } fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); @@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = expires; /* Exception created; mark the cached routes for the nexthop @@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, jiffies + ip_rt_gc_timeout); + 0, false, + jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; struct fib_result res; + bool lock = false; - if (dst_metric_locked(dst, RTAX_MTU)) + if (ip_mtu_locked(dst)) return; if (ipv4_mtu(dst) < mtu) return; - if (mtu < ip_rt_min_pmtu) + if (mtu < ip_rt_min_pmtu) { + lock = true; mtu = ip_rt_min_pmtu; + } if (rt->rt_pmtu == mtu && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) @@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, + update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = READ_ONCE(dst->dev->mtu); - if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_is_input = 0; rt->rt_iif = 0; rt->rt_pmtu = 0; + rt->rt_mtu_locked = 0; rt->rt_gateway = 0; rt->rt_uses_gateway = 0; rt->rt_table_id = 0; @@ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; + rt->rt_mtu_locked = ort->rt_mtu_locked; rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; @@ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; + if (rt->rt_mtu_locked && expires) + metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8d33f7b311f4..fbebda67ac1b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); rt_add_uncached_list(&xdst->u.rt); -- cgit From ea91df6d8a55836c7401eb6d5d4d828b659d8d67 Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Wed, 14 Mar 2018 12:36:25 -0400 Subject: tg3: prevent scheduling while atomic splat The problem was introduced in commit 506b0a395f26 ("[netdrv] tg3: APE heartbeat changes"). The bug occurs because tp->lock spinlock is held which is obtained in tg3_start by way of tg3_full_lock(), line 11571. The documentation for usleep_range() specifically states it cannot be used inside a spinlock. Fixes: 506b0a395f26 ("[netdrv] tg3: APE heartbeat changes") Signed-off-by: Jonathan Toppins Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index c1841db1b500..f2593978ae75 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -820,7 +820,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us) tg3_ape_unlock(tp, TG3_APE_LOCK_MEM); - usleep_range(10, 20); + udelay(10); timeout_us -= (timeout_us > 10) ? 10 : timeout_us; } -- cgit From cf55612a945039476abfd73e39064b2e721c3272 Mon Sep 17 00:00:00 2001 From: Cathy Zhou Date: Wed, 14 Mar 2018 10:56:07 -0700 Subject: sunvnet: does not support GSO for sctp The NETIF_F_GSO_SOFTWARE implies support for GSO on SCTP, but the sunvnet driver does not support GSO for sctp. Here we remove the NETIF_F_GSO_SOFTWARE feature flag and only report NETIF_F_ALL_TSO instead. Signed-off-by: Cathy Zhou Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 63d3d6b215f3..a94f50442613 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac, dev->ethtool_ops = &vnet_ethtool_ops; dev->watchdog_timeo = VNET_TX_TIMEOUT; - dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | + dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; -- cgit From 3d502067599f0db12e74e6646aee8728efe3e5be Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 13 Mar 2018 10:41:54 +0100 Subject: net/smc: simplify wait when closing listen socket Closing of a listen socket wakes up kernel_accept() of smc_tcp_listen_worker(), and then has to wait till smc_tcp_listen_worker() gives up the internal clcsock. The wait logic introduced with commit 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") might wait longer than necessary. This patch implements the idea to implement the wait just with flush_work(), and gets rid of the extra smc_close_wait_listen_clcsock() function. Fixes: 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") Reported-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ---- net/smc/smc_close.c | 25 +++---------------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8cc97834d4f6..1e0d780855c3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -978,10 +978,6 @@ out: lsmc->clcsock = NULL; } release_sock(lsk); - /* no more listening, wake up smc_close_wait_listen_clcsock and - * accept - */ - lsk->sk_state_change(lsk); sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index e339c0186dcf..fa41d9881741 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_listen_clcsock(struct smc_sock *smc) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sock *sk = &smc->sk; - signed long timeout; - - timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; - add_wait_queue(sk_sleep(sk), &wait); - do { - release_sock(sk); - if (smc->clcsock) - timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, - timeout); - sched_annotate_sleep(); - lock_sock(sk); - if (!smc->clcsock) - break; - } while (timeout); - remove_wait_queue(sk_sleep(sk), &wait); -} - /* wait for sndbuf data being transmitted */ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) { @@ -204,9 +183,11 @@ again: rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); /* wake up kernel_accept of smc_tcp_listen_worker */ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); - smc_close_wait_listen_clcsock(smc); } smc_close_cleanup_listen(sk); + release_sock(sk); + flush_work(&smc->tcp_listen_work); + lock_sock(sk); break; case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); -- cgit From f44cb4b19ed40b655c2d422c9021ab2c2625adb6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 15 Mar 2018 17:02:34 +0100 Subject: Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174 The Atheros 1525/QCA6174 BT doesn't seem working properly on the recent kernels, as it tries to load a wrong firmware ar3k/AthrBT_0x00000200.dfu and it fails. This seems to have been a problem for some time, and the known workaround is to apply BTUSB_QCA_ROM quirk instead of BTUSB_ATH3012. The device in question is: T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=3004 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1082504 Reported-by: Ivan Levshin Tested-by: Ivan Levshin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 382be00a8329..366a49c7c08f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -231,7 +231,6 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, @@ -264,6 +263,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ + { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME }, -- cgit From b09c61522c81886c34966825f9e5afcbfafac446 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Mar 2018 23:06:02 +0100 Subject: Revert "Bluetooth: hci_bcm: Streamline runtime PM code" This reverts commit 43fff7683468 ("Bluetooth: hci_bcm: Streamline runtime PM code"). The commit msg for this commit states "No functional change intended.", but replacing: pm_runtime_get(); pm_runtime_mark_last_busy(); pm_runtime_put_autosuspend(); with: pm_request_resume(); Does result in a functional change, pm_request_resume() only calls pm_runtime_mark_last_busy() if the device was suspended before the call. This results in the following happening: 1) Device is runtime suspended 2) Device drives host_wake IRQ logically high as it starts receiving data 3) bcm_host_wake() gets called, causes the device to runtime-resume, current time gets marked as last_busy time 4) After 5 seconds the autosuspend timer expires and the dev autosuspends as no one has been calling pm_runtime_mark_last_busy(), the device was resumed during those 5 seconds, so all the pm_request_resume() calls while receiving data and/or bcm_host_wake() calls were nops 5) If 4) happens while the device has (just received) data in its buffer to be read by the host the IRQ line is *already* / still logically high when we autosuspend and since we use an edge triggered IRQ, the IRQ will never trigger, causing the device to get stuck in suspend Therefor this commit has to be reverted, so that we avoid the device getting stuck in suspend. Signed-off-by: Hans de Goede Acked-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 6314dfb02969..af5658121601 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -244,7 +244,9 @@ static irqreturn_t bcm_host_wake(int irq, void *data) bt_dev_dbg(bdev, "Host wake IRQ"); - pm_request_resume(bdev->dev); + pm_runtime_get(bdev->dev); + pm_runtime_mark_last_busy(bdev->dev); + pm_runtime_put_autosuspend(bdev->dev); return IRQ_HANDLED; } @@ -586,8 +588,11 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) } else if (!bcm->rx_skb) { /* Delay auto-suspend when receiving completed packet */ mutex_lock(&bcm_device_lock); - if (bcm->dev && bcm_device_exists(bcm->dev)) - pm_request_resume(bcm->dev->dev); + if (bcm->dev && bcm_device_exists(bcm->dev)) { + pm_runtime_get(bcm->dev->dev); + pm_runtime_mark_last_busy(bcm->dev->dev); + pm_runtime_put_autosuspend(bcm->dev->dev); + } mutex_unlock(&bcm_device_lock); } -- cgit From e07c99b07ae85255d5c5bc2480fbd4c4e77f71bc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Mar 2018 23:06:03 +0100 Subject: Bluetooth: hci_bcm: Set pulsed_host_wake flag in sleep parameters The IRQ output of the bcm bt-device is really a level IRQ signal, which signals a logical high as long as the device's buffer contains data. Since the draining in the buffer is done in the tty driver, we cannot (easily) wait in a threaded interrupt handler for the draining, after which the IRQ should go low again. So instead we treat the IRQ as an edge interrupt. This opens the window for a theoretical race where we wakeup, read some data and then autosuspend *before* the IRQ has gone (logical) low, followed by the device just at that moment receiving more data, causing the IRQ to stay high and we never see an edge. Since we call pm_runtime_mark_last_busy() on every received byte, there should be plenty time for the IRQ to go (logical) low before we ever suspend, so this should never happen, but after commit 43fff7683468 ("Bluetooth: hci_bcm: Streamline runtime PM code"), which has been reverted since, this was actually happening causing the device to get stuck in runtime suspend. The bcm bt-device actually has a workaround for this, if we set the pulsed_host_wake flag in the sleep parameters, then the device monitors if the host is draining the buffer and if not then after a timeout the device will pulse the IRQ line, causing us to see an edge, fixing the stuck in suspend condition. This commit sets the pulsed_host_wake flag to fix the (mostly theoretical) race caused by us treating the IRQ as an edge IRQ. Signed-off-by: Hans de Goede Reviewed-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index af5658121601..40b9fb247010 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -303,7 +303,7 @@ static const struct bcm_set_sleep_mode default_sleep_params = { .usb_auto_sleep = 0, .usb_resume_timeout = 0, .break_to_host = 0, - .pulsed_host_wake = 0, + .pulsed_host_wake = 1, }; static int bcm_setup_sleep(struct hci_uart *hu) -- cgit From 51d4740f88affd85d49c04e3c9cd129c0e33bcb9 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Mon, 12 Mar 2018 16:20:58 -0400 Subject: net sched actions: return explicit error when tunnel_key mode is not specified If set/unset mode of the tunnel_key action is not provided, ->init() still returns 0, and the caller proceeds with bogus 'struct tc_action *' object, this results in crash: % tc actions add action tunnel_key src_ip 1.1.1.1 dst_ip 2.2.2.1 id 7 index 1 [ 35.805515] general protection fault: 0000 [#1] SMP PTI [ 35.806161] Modules linked in: act_tunnel_key kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd serio_raw [ 35.808233] CPU: 1 PID: 428 Comm: tc Not tainted 4.16.0-rc4+ #286 [ 35.808929] RIP: 0010:tcf_action_init+0x90/0x190 [ 35.809457] RSP: 0018:ffffb8edc068b9a0 EFLAGS: 00010206 [ 35.810053] RAX: 1320c000000a0003 RBX: 0000000000000001 RCX: 0000000000000000 [ 35.810866] RDX: 0000000000000070 RSI: 0000000000007965 RDI: ffffb8edc068b910 [ 35.811660] RBP: ffffb8edc068b9d0 R08: 0000000000000000 R09: ffffb8edc068b808 [ 35.812463] R10: ffffffffc02bf040 R11: 0000000000000040 R12: ffffb8edc068bb38 [ 35.813235] R13: 0000000000000000 R14: 0000000000000000 R15: ffffb8edc068b910 [ 35.814006] FS: 00007f3d0d8556c0(0000) GS:ffff91d1dbc40000(0000) knlGS:0000000000000000 [ 35.814881] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.815540] CR2: 000000000043f720 CR3: 0000000019248001 CR4: 00000000001606a0 [ 35.816457] Call Trace: [ 35.817158] tc_ctl_action+0x11a/0x220 [ 35.817795] rtnetlink_rcv_msg+0x23d/0x2e0 [ 35.818457] ? __slab_alloc+0x1c/0x30 [ 35.819079] ? __kmalloc_node_track_caller+0xb1/0x2b0 [ 35.819544] ? rtnl_calcit.isra.30+0xe0/0xe0 [ 35.820231] netlink_rcv_skb+0xce/0x100 [ 35.820744] netlink_unicast+0x164/0x220 [ 35.821500] netlink_sendmsg+0x293/0x370 [ 35.822040] sock_sendmsg+0x30/0x40 [ 35.822508] ___sys_sendmsg+0x2c5/0x2e0 [ 35.823149] ? pagecache_get_page+0x27/0x220 [ 35.823714] ? filemap_fault+0xa2/0x640 [ 35.824423] ? page_add_file_rmap+0x108/0x200 [ 35.825065] ? alloc_set_pte+0x2aa/0x530 [ 35.825585] ? finish_fault+0x4e/0x70 [ 35.826140] ? __handle_mm_fault+0xbc1/0x10d0 [ 35.826723] ? __sys_sendmsg+0x41/0x70 [ 35.827230] __sys_sendmsg+0x41/0x70 [ 35.827710] do_syscall_64+0x68/0x120 [ 35.828195] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 35.828859] RIP: 0033:0x7f3d0ca4da67 [ 35.829331] RSP: 002b:00007ffc9f284338 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 35.830304] RAX: ffffffffffffffda RBX: 00007ffc9f284460 RCX: 00007f3d0ca4da67 [ 35.831247] RDX: 0000000000000000 RSI: 00007ffc9f2843b0 RDI: 0000000000000003 [ 35.832167] RBP: 000000005aa6a7a9 R08: 0000000000000001 R09: 0000000000000000 [ 35.833075] R10: 00000000000005f1 R11: 0000000000000246 R12: 0000000000000000 [ 35.833997] R13: 00007ffc9f2884c0 R14: 0000000000000001 R15: 0000000000674640 [ 35.834923] Code: 24 30 bb 01 00 00 00 45 31 f6 eb 5e 8b 50 08 83 c2 07 83 e2 fc 83 c2 70 49 8b 07 48 8b 40 70 48 85 c0 74 10 48 89 14 24 4c 89 ff d0 48 8b 14 24 48 01 c2 49 01 d6 45 85 ed 74 05 41 83 47 2c [ 35.837442] RIP: tcf_action_init+0x90/0x190 RSP: ffffb8edc068b9a0 [ 35.838291] ---[ end trace a095c06ee4b97a26 ]--- Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Roman Mashak Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0e23aac09ad6..fea772e66e62 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; break; default: + ret = -EINVAL; goto err_out; } -- cgit From 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:27 +0900 Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off When we have a bridge with vlan_filtering on and a vlan device on top of it, packets would be corrupted in skb_vlan_untag() called from br_dev_xmit(). The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(), which makes use of skb->mac_len. In this function mac_len is meant for handling rx path with vlan devices with reorder_header disabled, but in tx path mac_len is typically 0 and cannot be used, which is the problem in this case. The current code even does not properly handle rx path (skb_vlan_untag() called from __netif_receive_skb_core()) with reorder_header off actually. In rx path single tag case, it works as follows: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+------+---- | ETH | VLAN | ETH | | ADDRS | TPID | TCI | TYPE | +-------------------+-------------+------+---- <-------- mac_len ---------> <-------------> to be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <-------- mac_len ---------> This is ok, but in rx double tag case, it corrupts packets: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+-------------+------+---- | ETH | VLAN | VLAN | ETH | | ADDRS | TPID | TCI | TPID | TCI | TYPE | +-------------------+-------------+-------------+------+---- <--------------- mac_len ----------------> <-------------> should be removed <---------------------------> actually will be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <--------------- mac_len ----------------> So, two of vlan tags are both removed while only inner one should be removed and mac_header (and mac_len) is broken. skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2), so use skb->data and skb->mac_header to calculate the right offset. Reported-by: Brandon Carpenter Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + net/core/skbuff.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 8bbbcb5cd94b..820de5d222d2 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -30,6 +30,7 @@ */ #define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_TLEN 2 /* Octets in ethernet type field */ #define ETH_HLEN 14 /* Total octets in header. */ #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ #define ETH_DATA_LEN 1500 /* Max. octets in payload */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index baf990528943..b103f46ec512 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5020,13 +5020,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { + int mac_len; + if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, - 2 * ETH_ALEN); + mac_len = skb->data - skb_mac_header(skb); + memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), + mac_len - VLAN_HLEN - ETH_TLEN); skb->mac_header += VLAN_HLEN; return skb; } -- cgit From cbe7128c4b92e2004984f477fd38dfa81662f02e Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:28 +0900 Subject: vlan: Fix out of order vlan headers with reorder header off With reorder header off, received packets are untagged in skb_vlan_untag() called from within __netif_receive_skb_core(), and later the tag will be inserted back in vlan_do_receive(). This caused out of order vlan headers when we create a vlan device on top of another vlan device, because vlan_do_receive() inserts a tag as the outermost vlan tag. E.g. the outer tag is first removed in skb_vlan_untag() and inserted back in vlan_do_receive(), then the inner tag is next removed and inserted back as the outermost tag. This patch fixes the behaviour by inserting the inner tag at the right position. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 66 ++++++++++++++++++++++++++++++++++++++++--------- net/8021q/vlan_core.c | 4 +-- 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 5e6a2d4dc366..c4a1cff9c768 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, } /** - * __vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_inner_tag - inner VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers * - * Inserts the VLAN tag into @skb as part of the payload + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns error if skb_cow_head failes. * * Does not change skb->protocol so this function can be used during receive. */ -static inline int __vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline int __vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci, + unsigned int mac_len) { struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) return -ENOMEM; - veth = skb_push(skb, VLAN_HLEN); + skb_push(skb, VLAN_HLEN); - /* Move the mac addresses to the beginning of the new header. */ - memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); + /* Move the mac header sans proto to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); skb->mac_header -= VLAN_HLEN; + veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); + /* first, the ethernet type */ veth->h_vlan_proto = vlan_proto; @@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, } /** - * vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload + * Returns error if skb_cow_head failes. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline int __vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + +/** + * vlan_insert_inner_tag - inner VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers + * + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. * * Following the skb_unshare() example, in case of error, the calling function @@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, + u16 vlan_tci, + unsigned int mac_len) { int err; - err = __vlan_insert_tag(skb, vlan_proto, vlan_tci); + err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len); if (err) { dev_kfree_skb_any(skb); return NULL; @@ -360,6 +384,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, return skb; } +/** + * vlan_insert_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + /** * vlan_insert_tag_set_proto - regular VLAN tag inserting * @skb: skbuff to tag diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 64aa9f755e1d..45c9bf5ff3a0 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp) * original position later */ skb_push(skb, offset); - skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto, - skb->vlan_tci); + skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto, + skb->vlan_tci, skb->mac_len); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); -- cgit From 2cc683e88c0c993ac3721d9b702cb0630abe2879 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 13 Mar 2018 12:01:43 -0700 Subject: kcm: lock lower socket in kcm_attach Need to lock lower socket in order to provide mutual exclusion with kcm_unattach. v2: Add Reported-by for syzbot Fixes: ab7ac4eb9832e32a09f4e804 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+ea75c0ffcd353d32515f064aaebefc5279e6161e@syzkaller.appspotmail.com Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index f297d53a11aa..34355fd19f27 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock, .parse_msg = kcm_parse_func_strparser, .read_sock_done = kcm_read_sock_done, }; - int err; + int err = 0; csk = csock->sk; if (!csk) return -EINVAL; + lock_sock(csk); + /* Only allow TCP sockets to be attached for now */ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || - csk->sk_protocol != IPPROTO_TCP) - return -EOPNOTSUPP; + csk->sk_protocol != IPPROTO_TCP) { + err = -EOPNOTSUPP; + goto out; + } /* Don't allow listeners or closed sockets */ - if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) - return -EOPNOTSUPP; + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) { + err = -EOPNOTSUPP; + goto out; + } psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); - if (!psock) - return -ENOMEM; + if (!psock) { + err = -ENOMEM; + goto out; + } psock->mux = mux; psock->sk = csk; @@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, err = strp_init(&psock->strp, csk, &cb); if (err) { kmem_cache_free(kcm_psockp, psock); - return err; + goto out; } write_lock_bh(&csk->sk_callback_lock); @@ -1419,7 +1427,8 @@ static int kcm_attach(struct socket *sock, struct socket *csock, write_unlock_bh(&csk->sk_callback_lock); strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); - return -EALREADY; + err = -EALREADY; + goto out; } psock->save_data_ready = csk->sk_data_ready; @@ -1455,7 +1464,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock, /* Schedule RX work in case there are already bytes queued */ strp_check_rcv(&psock->strp); - return 0; +out: + release_sock(csk); + + return err; } static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) @@ -1507,6 +1519,7 @@ static void kcm_unattach(struct kcm_psock *psock) if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); + release_sock(csk); return; } -- cgit From 484d802d0f2f29c335563fcac2a8facf174a1bbc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 13 Mar 2018 14:45:07 -0700 Subject: net: systemport: Rewrite __bcm_sysport_tx_reclaim() There is no need for complex checking between the last consumed index and current consumed index, a simple subtraction will do. This also eliminates the possibility of a permanent transmit queue stall under the following conditions: - one CPU bursts ring->size worth of traffic (up to 256 buffers), to the point where we run out of free descriptors, so we stop the transmit queue at the end of bcm_sysport_xmit() - because of our locking, we have the transmit process disable interrupts which means we can be blocking the TX reclamation process - when TX reclamation finally runs, we will be computing the difference between ring->c_index (last consumed index by SW) and what the HW reports through its register - this register is masked with (ring->size - 1) = 0xff, which will lead to stripping the upper bits of the index (register is 16-bits wide) - we will be computing last_tx_cn as 0, which means there is no work to be done, and we never wake-up the transmit queue, leaving it permanently disabled A practical example is e.g: ring->c_index aka last_c_index = 12, we pushed 256 entries, HW consumer index = 268, we mask it with 0xff = 12, so last_tx_cn == 0, nothing happens. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 33 ++++++++++++++---------------- drivers/net/ethernet/broadcom/bcmsysport.h | 2 +- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f15a8fc6dfc9..3fc549b88c43 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -855,10 +855,12 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring, static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, struct bcm_sysport_tx_ring *ring) { - unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs; unsigned int pkts_compl = 0, bytes_compl = 0; struct net_device *ndev = priv->netdev; + unsigned int txbds_processed = 0; struct bcm_sysport_cb *cb; + unsigned int txbds_ready; + unsigned int c_index; u32 hw_ind; /* Clear status before servicing to reduce spurious interrupts */ @@ -871,29 +873,23 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, /* Compute how many descriptors have been processed since last call */ hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK; - ring->p_index = (hw_ind & RING_PROD_INDEX_MASK); - - last_c_index = ring->c_index; - num_tx_cbs = ring->size; - - c_index &= (num_tx_cbs - 1); - - if (c_index >= last_c_index) - last_tx_cn = c_index - last_c_index; - else - last_tx_cn = num_tx_cbs - last_c_index + c_index; + txbds_ready = (c_index - ring->c_index) & RING_CONS_INDEX_MASK; netif_dbg(priv, tx_done, ndev, - "ring=%d c_index=%d last_tx_cn=%d last_c_index=%d\n", - ring->index, c_index, last_tx_cn, last_c_index); + "ring=%d old_c_index=%u c_index=%u txbds_ready=%u\n", + ring->index, ring->c_index, c_index, txbds_ready); - while (last_tx_cn-- > 0) { - cb = ring->cbs + last_c_index; + while (txbds_processed < txbds_ready) { + cb = &ring->cbs[ring->clean_index]; bcm_sysport_tx_reclaim_one(ring, cb, &bytes_compl, &pkts_compl); ring->desc_count++; - last_c_index++; - last_c_index &= (num_tx_cbs - 1); + txbds_processed++; + + if (likely(ring->clean_index < ring->size - 1)) + ring->clean_index++; + else + ring->clean_index = 0; } u64_stats_update_begin(&priv->syncp); @@ -1394,6 +1390,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); ring->index = index; ring->size = size; + ring->clean_index = 0; ring->alloc_size = ring->size; ring->desc_cpu = p; ring->desc_count = ring->size; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index f5a984c1c986..19c91c76e327 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -706,7 +706,7 @@ struct bcm_sysport_tx_ring { unsigned int desc_count; /* Number of descriptors */ unsigned int curr_desc; /* Current descriptor */ unsigned int c_index; /* Last consumer index */ - unsigned int p_index; /* Current producer index */ + unsigned int clean_index; /* Current clean index */ struct bcm_sysport_cb *cbs; /* Transmit control blocks */ struct dma_desc *desc_cpu; /* CPU view of the descriptor */ struct bcm_sysport_priv *priv; /* private context backpointer */ -- cgit From fa6a91e9b907231d2e38ea5ed89c537b3525df3d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 13 Mar 2018 16:50:06 +0100 Subject: net/iucv: Free memory obtained by kzalloc Free memory by calling put_device(), if afiucv_iucv_init is not successful. Signed-off-by: Arvind Yadav Reviewed-by: Cornelia Huck Signed-off-by: Ursula Braun Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1e8cc7bcbca3..9e2643ab4ccb 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2433,9 +2433,11 @@ static int afiucv_iucv_init(void) af_iucv_dev->driver = &af_iucv_driver; err = device_register(af_iucv_dev); if (err) - goto out_driver; + goto out_iucv_dev; return 0; +out_iucv_dev: + put_device(af_iucv_dev); out_driver: driver_unregister(&af_iucv_driver); out_iucv: -- cgit From 933e8c91b9f5a2f504f6da1f069c410449b9f4b9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 14 Mar 2018 14:49:27 +0200 Subject: qed: Fix MPA unalign flow in case header is split across two packets. There is a corner case in the MPA unalign flow where a FPDU header is split over two tcp segments. The length of the first fragment in this case was not initialized properly and should be '1' Fixes: c7d1d839 ("qed: Add support for MPA header being split over two tcp packets") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index ca4a81dc1ace..fefe5277f20d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1928,8 +1928,8 @@ qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn, /* Missing lower byte is now available */ mpa_len = fpdu->fpdu_length | *mpa_data; fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); - fpdu->mpa_frag_len = fpdu->fpdu_length; /* one byte of hdr */ + fpdu->mpa_frag_len = 1; fpdu->incomplete_bytes = fpdu->fpdu_length - 1; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, -- cgit From 16da09047d3fb991dc48af41f6d255fd578e8ca2 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 14 Mar 2018 14:49:28 +0200 Subject: qed: Fix non TCP packets should be dropped on iWARP ll2 connection FW workaround. The iWARP LL2 connection did not expect TCP packets to arrive on it's connection. The fix drops any non-tcp packets Fixes b5c29ca ("qed: iWARP CM - setup a ll2 connection for handling SYN packets") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index fefe5277f20d..d5d02be72947 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1703,6 +1703,13 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen); if (eth_type == ETH_P_IP) { + if (iph->protocol != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return -EINVAL; + } + cm_info->local_ip[0] = ntohl(iph->daddr); cm_info->remote_ip[0] = ntohl(iph->saddr); cm_info->ip_version = TCP_IPV4; @@ -1711,6 +1718,14 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, *payload_len = ntohs(iph->tot_len) - ip_hlen; } else if (eth_type == ETH_P_IPV6) { ip6h = (struct ipv6hdr *)iph; + + if (ip6h->nexthdr != IPPROTO_TCP) { + DP_NOTICE(p_hwfn, + "Unexpected ip protocol on ll2 %x\n", + iph->protocol); + return -EINVAL; + } + for (i = 0; i < 4; i++) { cm_info->local_ip[i] = ntohl(ip6h->daddr.in6_u.u6_addr32[i]); -- cgit From 4609adc27175839408359822523de7247d56c87f Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 14 Mar 2018 14:56:53 +0200 Subject: qede: Fix qedr link update Link updates were not reported to qedr correctly. Leading to cases where a link could be down, but qedr would see it as up. In addition, once qede was loaded, link state would be up, regardless of the actual link state. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 2db70eabddfe..5c28209e97d0 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2067,8 +2067,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, link_params.link_up = true; edev->ops->common->set_link(edev->cdev, &link_params); - qede_rdma_dev_event_open(edev); - edev->state = QEDE_STATE_OPEN; DP_INFO(edev, "Ending successfully qede load\n"); @@ -2169,12 +2167,14 @@ static void qede_link_update(void *dev, struct qed_link_output *link) DP_NOTICE(edev, "Link is up\n"); netif_tx_start_all_queues(edev->ndev); netif_carrier_on(edev->ndev); + qede_rdma_dev_event_open(edev); } } else { if (netif_carrier_ok(edev->ndev)) { DP_NOTICE(edev, "Link is down\n"); netif_tx_disable(edev->ndev); netif_carrier_off(edev->ndev); + qede_rdma_dev_event_close(edev); } } } -- cgit From 02a2385f37a7c6594c9d89b64c4a1451276f08eb Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Mar 2018 21:10:23 +0100 Subject: netlink: avoid a double skb free in genlmsg_mcast() nlmsg_multicast() consumes always the skb, thus the original skb must be freed only when this function is called with a clone. Fixes: cb9f7a9a5c96 ("netlink: ensure to loop over all netns in genlmsg_multicast_allns()") Reported-by: Ben Hutchings Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6f02499ef007..b9ce82c9440f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1106,7 +1106,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, if (!err) delivered = true; else if (err != -ESRCH) - goto error; + return err; return delivered ? 0 : -ESRCH; error: kfree_skb(skb); -- cgit From 6e5d58fdc9bedd0255a8781b258f10bbdc63e975 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 14 Mar 2018 13:32:09 -0700 Subject: skbuff: Fix not waking applications when errors are enqueued When errors are enqueued to the error queue via sock_queue_err_skb() function, it is possible that the waiting application is not notified. Calling 'sk->sk_data_ready()' would not notify applications that selected only POLLERR events in poll() (for example). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Randy E. Witt Reviewed-by: Eric Dumazet Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b103f46ec512..1e7acdc30732 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4179,7 +4179,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + sk->sk_error_report(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); -- cgit From 7b7ef57c0dd2bb367ad41a77e5136c5579ae4026 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 16 Mar 2018 18:03:46 +0100 Subject: net: dsa: mv88e6xxx: Fix binding documentation for MDIO busses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MDIO busses are switch properties and so should be inside the switch node. Fix the examples in the binding document. Reported-by: 尤晓杰 Signed-off-by: Andrew Lunn Fixes: a3c53be55c95 ("net: dsa: mv88e6xxx: Support multiple MDIO busses") Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/marvell.txt | 48 +++++++++++----------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt index 1d4d0f49c9d0..8c033d48e2ba 100644 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -50,14 +50,15 @@ Example: compatible = "marvell,mv88e6085"; reg = <0>; reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; - }; - mdio { - #address-cells = <1>; - #size-cells = <0>; - switch1phy0: switch1phy0@0 { - reg = <0>; - interrupt-parent = <&switch0>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch1phy0: switch1phy0@0 { + reg = <0>; + interrupt-parent = <&switch0>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; }; }; }; @@ -74,23 +75,24 @@ Example: compatible = "marvell,mv88e6390"; reg = <0>; reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; - }; - mdio { - #address-cells = <1>; - #size-cells = <0>; - switch1phy0: switch1phy0@0 { - reg = <0>; - interrupt-parent = <&switch0>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch1phy0: switch1phy0@0 { + reg = <0>; + interrupt-parent = <&switch0>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; }; - }; - mdio1 { - compatible = "marvell,mv88e6xxx-mdio-external"; - #address-cells = <1>; - #size-cells = <0>; - switch1phy9: switch1phy0@9 { - reg = <9>; + mdio1 { + compatible = "marvell,mv88e6xxx-mdio-external"; + #address-cells = <1>; + #size-cells = <0>; + switch1phy9: switch1phy0@9 { + reg = <9>; + }; }; }; }; -- cgit From bc44b78157f621ff2a2618fe287a827bcb094ac4 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 16 Mar 2018 11:29:09 +0100 Subject: batman-adv: update data pointers after skb_cow() batadv_check_unicast_ttvn() calls skb_cow(), so pointers into the SKB data must be (re)set after calling it. The ethhdr variable is dropped altogether. Fixes: 7cdcf6dddc42 ("batman-adv: add UNICAST_4ADDR packet type") Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b6891e8b741c..6a9242658c8d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -968,14 +968,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -995,12 +991,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -1015,6 +1013,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); -- cgit From 6f27d2c2a8c236d296201c19abb8533ec20d212b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 16 Mar 2018 11:29:10 +0100 Subject: batman-adv: fix header size check in batadv_dbg_arp() Checking for 0 is insufficient: when an SKB without a batadv header, but with a VLAN header is received, hdr_size will be 4, making the following code interpret the Ethernet header as a batadv header. Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware") Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 9703c791ffc5..87cd962d28d5 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -393,7 +393,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, batadv_arp_hw_src(skb, hdr_size), &ip_src, batadv_arp_hw_dst(skb, hdr_size), &ip_dst); - if (hdr_size == 0) + if (hdr_size < sizeof(struct batadv_unicast_packet)) return; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; -- cgit From cce6294cc2eaa083482e1d85d8db5845c82a7e14 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2018 18:53:00 -0700 Subject: net: sched: fix uses after free syzbot reported one use-after-free in pfifo_fast_enqueue() [1] Issue here is that we can not reuse skb after a successful skb_array_produce() since another cpu might have consumed it already. I believe a similar problem exists in try_bulk_dequeue_skb_slow() in case we put an skb into qdisc_enqueue_skb_bad_txq() for lockless qdisc. [1] BUG: KASAN: use-after-free in qdisc_pkt_len include/net/sch_generic.h:610 [inline] BUG: KASAN: use-after-free in qdisc_qstats_cpu_backlog_inc include/net/sch_generic.h:712 [inline] BUG: KASAN: use-after-free in pfifo_fast_enqueue+0x4bc/0x5e0 net/sched/sch_generic.c:639 Read of size 4 at addr ffff8801cede37e8 by task syzkaller717588/5543 CPU: 1 PID: 5543 Comm: syzkaller717588 Not tainted 4.16.0-rc4+ #265 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23c/0x360 mm/kasan/report.c:412 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:432 qdisc_pkt_len include/net/sch_generic.h:610 [inline] qdisc_qstats_cpu_backlog_inc include/net/sch_generic.h:712 [inline] pfifo_fast_enqueue+0x4bc/0x5e0 net/sched/sch_generic.c:639 __dev_xmit_skb net/core/dev.c:3216 [inline] Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array") Signed-off-by: Eric Dumazet Reported-by: syzbot+ed43b6903ab968b16f54@syzkaller.appspotmail.com Cc: John Fastabend Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 190570f21b20..7e3fbe9cc936 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, __skb_queue_tail(&q->skb_bad_txq, skb); + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; + } + if (lock) spin_unlock(lock); } @@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, break; if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { qdisc_enqueue_skb_bad_txq(q, nskb); - - if (qdisc_is_percpu_stats(q)) { - qdisc_qstats_cpu_backlog_inc(q, nskb); - qdisc_qstats_cpu_qlen_inc(q); - } else { - qdisc_qstats_backlog_inc(q, nskb); - q->q.qlen++; - } break; } skb->next = nskb; @@ -628,6 +628,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct skb_array *q = band2list(priv, band); + unsigned int pkt_len = qdisc_pkt_len(skb); int err; err = skb_array_produce(q, skb); @@ -636,7 +637,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, return qdisc_drop_cpu(skb, qdisc, to_free); qdisc_qstats_cpu_qlen_inc(qdisc); - qdisc_qstats_cpu_backlog_inc(qdisc, skb); + /* Note: skb can not be used after skb_array_produce(), + * so we better not use qdisc_qstats_cpu_backlog_inc() + */ + this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len); return NET_XMIT_SUCCESS; } -- cgit From bcdd5de80a2275f7879dc278bfc747f1caf94442 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 15 Mar 2018 14:49:56 +0200 Subject: mlxsw: spectrum_buffers: Set a minimum quota for CPU port traffic In commit 9ffcc3725f09 ("mlxsw: spectrum: Allow packets to be trapped from any PG") I fixed a problem where packets could not be trapped to the CPU due to exceeded shared buffer quotas. The mentioned commit explains the problem in detail. The problem was fixed by assigning a minimum quota for the CPU port and the traffic class used for scheduling traffic to the CPU. However, commit 117b0dad2d54 ("mlxsw: Create a different trap group list for each device") assigned different traffic classes to different packet types and rendered the fix useless. Fix the problem by assigning a minimum quota for the CPU port and all the traffic classes that are currently in use. Fixes: 117b0dad2d54 ("mlxsw: Create a different trap group list for each device") Signed-off-by: Ido Schimmel Reported-by: Eddie Shklaer Tested-by: Eddie Shklaer Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 93728c694e6d..0a9adc5962fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -385,13 +385,13 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(10000, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, -- cgit From d61d263c8d82db7c4404a29ebc29674b1c0c05c9 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Thu, 15 Mar 2018 17:54:20 +0100 Subject: net: hns: Fix ethtool private flags The driver implementation returns support for private flags, while no private flags are present. When asked for the number of private flags it returns the number of statistic flag names. Fix this by returning EOPNOTSUPP for not implemented ethtool flags. Signed-off-by: Matthias Brugger Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 86944bc3b273..74bd260ca02a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -666,7 +666,7 @@ static void hns_gmac_get_strings(u32 stringset, u8 *data) static int hns_gmac_get_sset_count(int stringset) { - if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS) + if (stringset == ETH_SS_STATS) return ARRAY_SIZE(g_gmac_stats_string); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index b62816c1574e..93e71e27401b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -422,7 +422,7 @@ void hns_ppe_update_stats(struct hns_ppe_cb *ppe_cb) int hns_ppe_get_sset_count(int stringset) { - if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS) + if (stringset == ETH_SS_STATS) return ETH_PPE_STATIC_NUM; return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 6f3570cfb501..e2e28532e4dc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -876,7 +876,7 @@ void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data) */ int hns_rcb_get_ring_sset_count(int stringset) { - if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS) + if (stringset == ETH_SS_STATS) return HNS_RING_STATIC_REG_NUM; return 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 7ea7f8a4aa2a..2e14a3ae1d8b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -993,8 +993,10 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) cnt--; return cnt; - } else { + } else if (stringset == ETH_SS_STATS) { return (HNS_NET_STATS_CNT + ops->get_sset_count(h, stringset)); + } else { + return -EOPNOTSUPP; } } -- cgit From f9db50691db4a7d860fce985f080bb3fc23a7ede Mon Sep 17 00:00:00 2001 From: "SZ Lin (林上智)" Date: Fri, 16 Mar 2018 00:56:01 +0800 Subject: net: ethernet: ti: cpsw: add check for in-band mode setting with RGMII PHY interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to AM335x TRM[1] 14.3.6.2, AM437x TRM[2] 15.3.6.2 and DRA7 TRM[3] 24.11.4.8.7.3.3, in-band mode in EXT_EN(bit18) register is only available when PHY is configured in RGMII mode with 10Mbps speed. It will cause some networking issues without RGMII mode, such as carrier sense errors and low throughput. TI also mentioned this issue in their forum[4]. This patch adds the check mechanism for PHY interface with RGMII interface type, the in-band mode can only be set in RGMII mode with 10Mbps speed. References: [1]: https://www.ti.com/lit/ug/spruh73p/spruh73p.pdf [2]: http://www.ti.com/lit/ug/spruhl7h/spruhl7h.pdf [3]: http://www.ti.com/lit/ug/spruic2b/spruic2b.pdf [4]: https://e2e.ti.com/support/arm/sitara_arm/f/791/p/640765/2392155 Suggested-by: Holsety Chen (陳憲輝) Signed-off-by: SZ Lin (林上智) Signed-off-by: Schuyler Patton Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1b1b78fdc138..b2b30c9df037 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1014,7 +1014,8 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, /* set speed_in input in case RMII mode is used in 100Mbps */ if (phy->speed == 100) mac_control |= BIT(15); - else if (phy->speed == 10) + /* in band mode only works in 10Mbps RGMII mode */ + else if ((phy->speed == 10) && phy_interface_is_rgmii(phy)) mac_control |= BIT(18); /* In Band mode */ if (priv->rx_pause) -- cgit From 1edf8abe04090c4f41a85e42c66638be1ee69156 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:53 +0100 Subject: net/sched: fix NULL dereference in the error path of tcf_vlan_init() when the following command # tc actions replace action vlan pop index 100 is run for the first time, and tcf_vlan_init() fails allocating struct tcf_vlan_params, tcf_vlan_cleanup() calls kfree_rcu(NULL, ...). This causes the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: __call_rcu+0x23/0x2b0 PGD 80000000760a2067 P4D 80000000760a2067 PUD 742c1067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_vlan(E) ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel mbcache snd_hda_codec jbd2 snd_hda_core crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer glue_helper snd cryptd joydev soundcore virtio_balloon pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_console virtio_blk virtio_net ata_piix crc32c_intel libata virtio_pci i2c_core virtio_ring serio_raw virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_vlan] CPU: 3 PID: 3119 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__call_rcu+0x23/0x2b0 RSP: 0018:ffffaac3005fb798 EFLAGS: 00010246 RAX: ffffffffc0704080 RBX: ffff97f2b4bbe900 RCX: 00000000ffffffff RDX: ffffffffabca5f00 RSI: 0000000000000010 RDI: 0000000000000010 RBP: 0000000000000010 R08: 0000000000000001 R09: 0000000000000044 R10: 00000000fd003000 R11: ffff97f2faab5b91 R12: 0000000000000000 R13: ffffffffabca5f00 R14: ffff97f2fb80202c R15: 00000000fffffff4 FS: 00007f68f75b4740(0000) GS:ffff97f2ffd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000072b52001 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_vlan_init+0x168/0x270 [act_vlan] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f68f69c5ba0 RSP: 002b:00007fffd79c1118 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fffd79c1240 RCX: 00007f68f69c5ba0 RDX: 0000000000000000 RSI: 00007fffd79c1190 RDI: 0000000000000003 RBP: 000000005aaa708e R08: 0000000000000002 R09: 0000000000000000 R10: 00007fffd79c0ba0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffd79c1254 R14: 0000000000000001 R15: 0000000000669f60 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89 RIP: __call_rcu+0x23/0x2b0 RSP: ffffaac3005fb798 CR2: 0000000000000018 fix this in tcf_vlan_cleanup(), ensuring that kfree_rcu(p, ...) is called only when p is not NULL. Fixes: 4c5b9d9642c8 ("act_vlan: VLAN action rewrite to use RCU lock/unlock and update") Acked-by: Jiri Pirko Acked-by: Manish Kurup Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index e1a1b3f3983a..c2914e9a4a6f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a) struct tcf_vlan_params *p; p = rcu_dereference_protected(v->vlan_p, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, -- cgit From aab378a7eda21941c4b678b21aea8867b6817f59 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:54 +0100 Subject: net/sched: fix NULL dereference in the error path of tcf_csum_init() when the following command # tc action add action csum udp continue index 100 is run for the first time, and tcf_csum_init() fails allocating struct tcf_csum, tcf_csum_cleanup() calls kfree_rcu(NULL,...). This causes the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: __call_rcu+0x23/0x2b0 PGD 80000000740b4067 P4D 80000000740b4067 PUD 32e7f067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_csum(E) act_vlan ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic pcbc snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer aesni_intel crypto_simd glue_helper cryptd snd joydev pcspkr virtio_balloon i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_blk drm virtio_net virtio_console ata_piix crc32c_intel libata virtio_pci serio_raw i2c_core virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_vlan] CPU: 2 PID: 5763 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__call_rcu+0x23/0x2b0 RSP: 0018:ffffb275803e77c0 EFLAGS: 00010246 RAX: ffffffffc057b080 RBX: ffff9674bc6f5240 RCX: 00000000ffffffff RDX: ffffffff928a5f00 RSI: 0000000000000008 RDI: 0000000000000008 RBP: 0000000000000008 R08: 0000000000000001 R09: 0000000000000044 R10: 0000000000000220 R11: ffff9674b9ab4821 R12: 0000000000000000 R13: ffffffff928a5f00 R14: 0000000000000000 R15: 0000000000000001 FS: 00007fa6368d8740(0000) GS:ffff9674bfd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000073dec001 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_csum_init+0xfb/0x180 [act_csum] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fa635ce9ba0 RSP: 002b:00007ffc185b0fc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffc185b10f0 RCX: 00007fa635ce9ba0 RDX: 0000000000000000 RSI: 00007ffc185b1040 RDI: 0000000000000003 RBP: 000000005aaa85e0 R08: 0000000000000002 R09: 0000000000000000 R10: 00007ffc185b0a20 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc185b1104 R14: 0000000000000001 R15: 0000000000669f60 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89 RIP: __call_rcu+0x23/0x2b0 RSP: ffffb275803e77c0 CR2: 0000000000000010 fix this in tcf_csum_cleanup(), ensuring that kfree_rcu(param, ...) is called only when param is not NULL. Fixes: 9c5f69bbd75a ("net/sched: act_csum: don't use spinlock in the fast path") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_csum.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 24b2e8e681cf..2a5c8fd860cf 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,8 @@ static void tcf_csum_cleanup(struct tc_action *a) struct tcf_csum_params *params; params = rcu_dereference_protected(p->params, 1); - kfree_rcu(params, rcu); + if (params) + kfree_rcu(params, rcu); } static int tcf_csum_walker(struct net *net, struct sk_buff *skb, -- cgit From abdadd3cfd3e7ea3da61ac774f84777d1f702058 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:55 +0100 Subject: net/sched: fix NULL dereference in the error path of tunnel_key_init() when the following command # tc action add action tunnel_key unset index 100 is run for the first time, and tunnel_key_init() fails to allocate struct tcf_tunnel_key_params, tunnel_key_release() dereferences NULL pointers. This causes the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: tunnel_key_release+0xd/0x40 [act_tunnel_key] PGD 8000000033787067 P4D 8000000033787067 PUD 74646067 PMD 0 Oops: 0000 [#1] SMP PTI Modules linked in: act_tunnel_key(E) act_csum ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel pcbc snd_hda_codec snd_hda_core snd_hwdep snd_seq aesni_intel snd_seq_device crypto_simd glue_helper snd_pcm cryptd joydev snd_timer pcspkr virtio_balloon snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net virtio_blk drm virtio_console crc32c_intel ata_piix serio_raw i2c_core virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CPU: 2 PID: 3101 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tunnel_key_release+0xd/0x40 [act_tunnel_key] RSP: 0018:ffffba46803b7768 EFLAGS: 00010286 RAX: ffffffffc09010a0 RBX: 0000000000000000 RCX: 0000000000000024 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff99ee336d7480 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000044 R10: 0000000000000220 R11: ffff99ee79d73131 R12: 0000000000000000 R13: ffff99ee32d67610 R14: ffff99ee7671dc38 R15: 00000000fffffff4 FS: 00007febcb2cd740(0000) GS:ffff99ee7fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000007c8e4005 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tunnel_key_init+0xd9/0x460 [act_tunnel_key] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7febca6deba0 RSP: 002b:00007ffe7b0dd128 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffe7b0dd250 RCX: 00007febca6deba0 RDX: 0000000000000000 RSI: 00007ffe7b0dd1a0 RDI: 0000000000000003 RBP: 000000005aaa90cb R08: 0000000000000002 R09: 0000000000000000 R10: 00007ffe7b0dcba0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe7b0dd264 R14: 0000000000000001 R15: 0000000000669f60 Code: 44 00 00 8b 0d b5 23 00 00 48 8b 87 48 10 00 00 48 8b 3c c8 e9 a5 e5 d8 c3 0f 1f 44 00 00 0f 1f 44 00 00 53 48 8b 9f b0 00 00 00 <83> 7b 10 01 74 0b 48 89 df 31 f6 5b e9 f2 fa 7f c3 48 8b 7b 18 RIP: tunnel_key_release+0xd/0x40 [act_tunnel_key] RSP: ffffba46803b7768 CR2: 0000000000000010 Fix this in tunnel_key_release(), ensuring 'param' is not NULL before dereferencing it. Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fea772e66e62..1281ca463727 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -208,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); + if (params) { + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(¶ms->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); + kfree_rcu(params, rcu); + } } static int tunnel_key_dump_addresses(struct sk_buff *skb, -- cgit From 1f110e7cae09e6c6a144616480d1a9dd99c5208a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:56 +0100 Subject: net/sched: fix NULL dereference in the error path of tcf_sample_init() when the following command # tc action add action sample rate 100 group 100 index 100 is run for the first time, and psample_group_get(100) fails to create a new group, tcf_sample_cleanup() calls psample_group_put(NULL), thus causing the following error: BUG: unable to handle kernel NULL pointer dereference at 000000000000001c IP: psample_group_put+0x15/0x71 [psample] PGD 8000000075775067 P4D 8000000075775067 PUD 7453c067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_sample(E) psample ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core mbcache jbd2 crct10dif_pclmul snd_hwdep crc32_pclmul snd_seq ghash_clmulni_intel pcbc snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer glue_helper snd cryptd joydev pcspkr i2c_piix4 soundcore virtio_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_net ata_piix virtio_console virtio_blk libata serio_raw crc32c_intel virtio_pci i2c_core virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_tunnel_key] CPU: 2 PID: 5740 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:psample_group_put+0x15/0x71 [psample] RSP: 0018:ffffb8a80032f7d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000024 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffffffffc06d93c0 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000044 R10: 00000000bd003000 R11: ffff979fba04aa59 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff979fbba3f22c FS: 00007f7638112740(0000) GS:ffff979fbfd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000001c CR3: 00000000734ea001 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_sample_init+0x125/0x1d0 [act_sample] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f7637523ba0 RSP: 002b:00007fff0473ef58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff0473f080 RCX: 00007f7637523ba0 RDX: 0000000000000000 RSI: 00007fff0473efd0 RDI: 0000000000000003 RBP: 000000005aaaac80 R08: 0000000000000002 R09: 0000000000000000 R10: 00007fff0473e9e0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff0473f094 R14: 0000000000000001 R15: 0000000000669f60 Code: be 02 00 00 00 48 89 df e8 a9 fe ff ff e9 7c ff ff ff 0f 1f 40 00 0f 1f 44 00 00 53 48 89 fb 48 c7 c7 c0 93 6d c0 e8 db 20 8c ef <83> 6b 1c 01 74 10 48 c7 c7 c0 93 6d c0 ff 14 25 e8 83 83 b0 5b RIP: psample_group_put+0x15/0x71 [psample] RSP: ffffb8a80032f7d0 CR2: 000000000000001c Fix it in tcf_sample_cleanup(), ensuring that calls to psample_group_put(p) are done only when p is not NULL. Fixes: cadb9c9fdbc6 ("net/sched: act_sample: Fix error path in init") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_sample.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1ba0df238756..74c5d7e6a0fa 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a) psample_group = rtnl_dereference(s->psample_group); RCU_INIT_POINTER(s->psample_group, NULL); - psample_group_put(psample_group); + if (psample_group) + psample_group_put(psample_group); } static bool tcf_sample_dev_ok_push(struct net_device *dev) -- cgit From 2d433610176d6569e8b3a28f67bc72235bf69efc Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 16 Mar 2018 00:00:57 +0100 Subject: net/sched: fix NULL dereference on the error path of tcf_skbmod_init() when the following command # tc action replace action skbmod swap mac index 100 is run for the first time, and tcf_skbmod_init() fails to allocate struct tcf_skbmod_params, tcf_skbmod_cleanup() calls kfree_rcu(NULL), thus causing the following error: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: __call_rcu+0x23/0x2b0 PGD 8000000034057067 P4D 8000000034057067 PUD 74937067 PMD 0 Oops: 0002 [#1] SMP PTI Modules linked in: act_skbmod(E) psample ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec crct10dif_pclmul mbcache jbd2 crc32_pclmul snd_hda_core ghash_clmulni_intel snd_hwdep pcbc snd_seq snd_seq_device snd_pcm aesni_intel snd_timer crypto_simd glue_helper snd cryptd virtio_balloon joydev soundcore pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_console virtio_net virtio_blk ata_piix libata crc32c_intel virtio_pci serio_raw virtio_ring virtio i2c_core floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_skbmod] CPU: 3 PID: 3144 Comm: tc Tainted: G E 4.16.0-rc4.act_vlan.orig+ #403 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__call_rcu+0x23/0x2b0 RSP: 0018:ffffbd2e403e7798 EFLAGS: 00010246 RAX: ffffffffc0872080 RBX: ffff981d34bff780 RCX: 00000000ffffffff RDX: ffffffff922a5f00 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000021f R10: 000000003d003000 R11: 0000000000aaaaaa R12: 0000000000000000 R13: ffffffff922a5f00 R14: 0000000000000001 R15: ffff981d3b698c2c FS: 00007f3678292740(0000) GS:ffff981d3fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000007c57a006 CR4: 00000000001606e0 Call Trace: __tcf_idr_release+0x79/0xf0 tcf_skbmod_init+0x1d1/0x210 [act_skbmod] tcf_action_init_1+0x2cc/0x430 tcf_action_init+0xd3/0x1b0 tc_ctl_action+0x18b/0x240 rtnetlink_rcv_msg+0x29c/0x310 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1b9/0x270 ? rtnl_calcit.isra.28+0x100/0x100 netlink_rcv_skb+0xd2/0x110 netlink_unicast+0x17c/0x230 netlink_sendmsg+0x2cd/0x3c0 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x27a/0x290 ? filemap_map_pages+0x34a/0x3a0 ? __handle_mm_fault+0xbfd/0xe20 __sys_sendmsg+0x51/0x90 do_syscall_64+0x6e/0x1a0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f36776a3ba0 RSP: 002b:00007fff4703b618 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff4703b740 RCX: 00007f36776a3ba0 RDX: 0000000000000000 RSI: 00007fff4703b690 RDI: 0000000000000003 RBP: 000000005aaaba36 R08: 0000000000000002 R09: 0000000000000000 R10: 00007fff4703b0a0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff4703b754 R14: 0000000000000001 R15: 0000000000669f60 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89 RIP: __call_rcu+0x23/0x2b0 RSP: ffffbd2e403e7798 CR2: 0000000000000008 Fix it in tcf_skbmod_cleanup(), ensuring that kfree_rcu(p, ...) is called only when p is not NULL. Fixes: 86da71b57383 ("net_sched: Introduce skbmod action") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index fa975262dbac..d09565d6433e 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a) struct tcf_skbmod_params *p; p = rcu_dereference_protected(d->skbmod_p, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, -- cgit From 7a4c003d6921e2af215f4790aa43a292bdc78be0 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Fri, 16 Mar 2018 14:47:54 -0700 Subject: vmxnet3: avoid xmit reset due to a race in vmxnet3 The field txNumDeferred is used by the driver to keep track of the number of packets it has pushed to the emulation. The driver increments it on pushing the packet to the emulation and the emulation resets it to 0 at the end of the transmit. There is a possibility of a race either when (a) ESX is under heavy load or (b) workload inside VM is of low packet rate. This race results in xmit hangs when network coalescing is disabled. This change creates a local copy of txNumDeferred and uses it to perform ring arithmetic. Reported-by: Noriho Tanaka Signed-off-by: Ronak Doshi Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 13 ++++++++----- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 8b39c160743d..b466a422b72d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -977,6 +977,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, { int ret; u32 count; + int num_pkts; + int tx_num_deferred; unsigned long flags; struct vmxnet3_tx_ctx ctx; union Vmxnet3_GenericDesc *gdesc; @@ -1075,12 +1077,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, #else gdesc = ctx.sop_txd; #endif + tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); if (ctx.mss) { gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size; gdesc->txd.om = VMXNET3_OM_TSO; gdesc->txd.msscof = ctx.mss; - le32_add_cpu(&tq->shared->txNumDeferred, (skb->len - - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss); + num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss; } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { gdesc->txd.hlen = ctx.eth_ip_hdr_size; @@ -1091,8 +1093,10 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, gdesc->txd.om = 0; gdesc->txd.msscof = 0; } - le32_add_cpu(&tq->shared->txNumDeferred, 1); + num_pkts = 1; } + le32_add_cpu(&tq->shared->txNumDeferred, num_pkts); + tx_num_deferred += num_pkts; if (skb_vlan_tag_present(skb)) { gdesc->txd.ti = 1; @@ -1118,8 +1122,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, spin_unlock_irqrestore(&tq->tx_lock, flags); - if (le32_to_cpu(tq->shared->txNumDeferred) >= - le32_to_cpu(tq->shared->txThreshold)) { + if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { tq->shared->txNumDeferred = 0; VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_TXPROD + tq->qid * 8, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 5ba222920e80..b94fdfd0b6f1 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.11.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.12.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040b00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040c00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ -- cgit From 034f405793897a3c8f642935f5494b86c340cde7 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Fri, 16 Mar 2018 14:49:19 -0700 Subject: vmxnet3: use correct flag to indicate LRO feature 'Commit 45dac1d6ea04 ("vmxnet3: Changes for vmxnet3 adapter version 2 (fwd)")' introduced a flag "lro" in structure vmxnet3_adapter which is used to indicate whether LRO is enabled or not. However, the patch did not set the flag and hence it was never exercised. So, when LRO is enabled, it resulted in poor TCP performance due to delayed acks. This issue is seen with packets which are larger than the mss getting a delayed ack rather than an immediate ack, thus resulting in high latency. This patch removes the lro flag and directly uses device features against NETIF_F_LRO to check if lro is enabled. Fixes: 45dac1d6ea04 ("vmxnet3: Changes for vmxnet3 adapter version 2 (fwd)") Reported-by: Rachel Lunnon Signed-off-by: Ronak Doshi Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 3 ++- drivers/net/vmxnet3/vmxnet3_int.h | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b466a422b72d..e04937f44f33 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1473,7 +1473,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - if (!rcd->tcp || !adapter->lro) + if (!rcd->tcp || + !(adapter->netdev->features & NETIF_F_LRO)) goto not_lro; if (segCnt != 0 && mss != 0) { diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index b94fdfd0b6f1..99387a4a20a8 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.12.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.13.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040c00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040d00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ @@ -343,7 +343,6 @@ struct vmxnet3_adapter { u8 version; bool rxcsum; - bool lro; #ifdef VMXNET3_RSS struct UPT1_RSSConf *rss_conf; -- cgit From 48881ed56b395bdf2cff6c102039016223ca4da6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 18 Mar 2018 09:48:03 +0100 Subject: batman-adv: Add missing include for EPOLL* constants Fixes: a9a08845e9ac ("vfs: do bulk POLL* -> EPOLL* replacement") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/icmp_socket.c | 1 + net/batman-adv/log.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index e91f29c7c638..5daa3d50da17 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index dc9fa37ddd14..cdbe0e5e208b 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include -- cgit From fc04fdb2c8a894283259f5621d31d75610701091 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 16 Mar 2018 21:14:32 +0100 Subject: batman-adv: Fix skbuff rcsum on packet reroute batadv_check_unicast_ttvn may redirect a packet to itself or another originator. This involves rewriting the ttvn and the destination address in the batadv unicast header. These field were not yet pulled (with skb rcsum update) and thus any change to them also requires a change in the receive checksum. Reported-by: Matthias Schiffer Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 6a9242658c8d..e61dc1293bb5 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -759,6 +759,7 @@ free_skb: /** * batadv_reroute_unicast_packet() - update the unicast header for re-routing * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast packet to process * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination * @vid: VLAN identifier @@ -770,7 +771,7 @@ free_skb: * Return: true if the packet header has been updated, false otherwise */ static bool -batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, +batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_unicast_packet *unicast_packet, u8 *dst_addr, unsigned short vid) { @@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, } /* update the packet header */ + skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ether_addr_copy(unicast_packet->dest, orig_addr); unicast_packet->ttvn = orig_ttvn; + skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ret = true; out: @@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * the packet to */ if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) { - if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, + if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet, ethhdr->h_dest, vid)) batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, @@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * destination can possibly be updated and forwarded towards the new * target host */ - if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, + if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet, ethhdr->h_dest, vid)) { batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", @@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, if (!primary_if) return false; + /* update the packet header */ + skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); + unicast_packet->ttvn = curr_ttvn; + skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); batadv_hardif_put(primary_if); - unicast_packet->ttvn = curr_ttvn; - return true; } -- cgit From a069215cf5985f3aa1bba550264907d6bd05c5f7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 18 Mar 2018 12:49:51 -0700 Subject: net: fec: Fix unbalanced PM runtime calls When unbinding/removing the driver, we will run into the following warnings: [ 259.655198] fec 400d1000.ethernet: 400d1000.ethernet supply phy not found, using dummy regulator [ 259.665065] fec 400d1000.ethernet: Unbalanced pm_runtime_enable! [ 259.672770] fec 400d1000.ethernet (unnamed net_device) (uninitialized): Invalid MAC address: 00:00:00:00:00:00 [ 259.683062] fec 400d1000.ethernet (unnamed net_device) (uninitialized): Using random MAC address: f2:3e:93:b7:29:c1 [ 259.696239] libphy: fec_enet_mii_bus: probed Avoid these warnings by balancing the runtime PM calls during fec_drv_remove(). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 7a7f3a42b2aa..d4604bc8eb5b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3600,6 +3600,8 @@ fec_drv_remove(struct platform_device *pdev) fec_enet_mii_remove(fep); if (fep->reg_phy) regulator_disable(fep->reg_phy); + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); -- cgit From 9ffd7503944ec7c0ef41c3245d1306c221aef2be Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Thu, 15 Mar 2018 18:23:17 +0000 Subject: can: cc770: Fix use after free in cc770_tx_interrupt() This fixes use after free introduced by the last cc770 patch. Signed-off-by: Andri Yngvason Fixes: 746201235b3f ("can: cc770: Fix queue stall & dropped RTR reply") Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 2743d82d4424..6da69af103e6 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -706,13 +706,12 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) return; } - can_put_echo_skb(priv->tx_skb, dev, 0); - can_get_echo_skb(dev, 0); - cf = (struct can_frame *)priv->tx_skb->data; stats->tx_bytes += cf->can_dlc; stats->tx_packets++; + can_put_echo_skb(priv->tx_skb, dev, 0); + can_get_echo_skb(dev, 0); priv->tx_skb = NULL; netif_wake_queue(dev); -- cgit From 2399ac42e762ab25c58420e25359b2921afdc55f Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 16 Mar 2018 17:08:34 -0500 Subject: sysfs: symlink: export sysfs_create_link_nowarn() The sysfs_create_link_nowarn() is going to be used in phylib framework in subsequent patch which can be built as module. Hence, export sysfs_create_link_nowarn() to avoid build errors. Cc: Florian Fainelli Cc: Andrew Lunn Fixes: a3995460491d ("net: phy: Relax error checking on sysfs_create_link()") Signed-off-by: Grygorii Strashko Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- fs/sysfs/symlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 8664db25a9a6..215c225b2ca1 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, { return sysfs_do_create_link(kobj, target, name, 0); } +EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn); /** * sysfs_delete_link - remove symlink in object's directory. -- cgit From 4414b3ed74be0e205e04e12cd83542a727d88255 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 16 Mar 2018 17:08:35 -0500 Subject: net: phy: relax error checking when creating sysfs link netdev->phydev Some ethernet drivers (like TI CPSW) may connect and manage >1 Net PHYs per one netdevice, as result such drivers will produce warning during system boot and fail to connect second phy to netdevice when PHYLIB framework will try to create sysfs link netdev->phydev for second PHY in phy_attach_direct(), because sysfs link with the same name has been created already for the first PHY. As result, second CPSW external port will became unusable. Fix it by relaxing error checking when PHYLIB framework is creating sysfs link netdev->phydev in phy_attach_direct(), suppressing warning by using sysfs_create_link_nowarn() and adding error message instead. After this change links (phy->netdev and netdev->phy) creation failure is not fatal any more and system can continue working, which fixes TI CPSW issue. Cc: Florian Fainelli Cc: Andrew Lunn Fixes: a3995460491d ("net: phy: Relax error checking on sysfs_create_link()") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 478405e544cc..fe16f5894092 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1012,10 +1012,17 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj, "attached_dev"); if (!err) { - err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj, - "phydev"); - if (err) - goto error; + err = sysfs_create_link_nowarn(&dev->dev.kobj, + &phydev->mdio.dev.kobj, + "phydev"); + if (err) { + dev_err(&dev->dev, "could not add device link to %s err %d\n", + kobject_name(&phydev->mdio.dev.kobj), + err); + /* non-fatal - some net drivers can use one netdevice + * with more then one phy + */ + } phydev->sysfs_links = true; } -- cgit From 8137a8e2190975ed9060111ce13000792360aba3 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Sat, 17 Mar 2018 18:17:58 -0700 Subject: vmxnet3: remove unused flag "rxcsum" from struct vmxnet3_adapter Signed-off-by: Igor Pylypiv Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_int.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 99387a4a20a8..59ec34052a65 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -342,8 +342,6 @@ struct vmxnet3_adapter { u8 __iomem *hw_addr1; /* for BAR 1 */ u8 version; - bool rxcsum; - #ifdef VMXNET3_RSS struct UPT1_RSSConf *rss_conf; bool rss; -- cgit From 7fe4d6dcbcb43fe0282d4213fc52be178bb30e91 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 18 Mar 2018 17:37:22 +0200 Subject: devlink: Remove redundant free on error path The current code performs unneeded free. Remove the redundant skb freeing during the error path. Fixes: 1555d204e743 ("devlink: Support for pipeline debug (dpipe)") Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 2f2307d94787..effd4848c2b4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1798,7 +1798,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } @@ -1807,7 +1807,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_table_put: -err_skb_send_alloc: genlmsg_cancel(skb, hdr); nlmsg_free(skb); return err; @@ -2073,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info, table->counters_enabled, &dump_ctx); if (err) - goto err_entries_dump; + return err; send_done: nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, @@ -2081,16 +2080,10 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(dump_ctx.skb, info); - -err_entries_dump: -err_skb_send_alloc: - genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); - nlmsg_free(dump_ctx.skb); - return err; } static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, @@ -2229,7 +2222,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(skb, info); @@ -2237,7 +2230,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_table_put: -err_skb_send_alloc: genlmsg_cancel(skb, hdr); nlmsg_free(skb); return err; -- cgit From 00777fac28ba3e126b9e63e789a613e8bd2cab25 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 Mar 2018 23:59:36 +0100 Subject: net: ethernet: arc: Fix a potential memory leak if an optional regulator is deferred If the optional regulator is deferred, we must release some resources. They will be re-allocated when the probe function will be called again. Fixes: 6eacf31139bf ("ethernet: arc: Add support for Rockchip SoC layer device tree bindings") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_rockchip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index 16f9bee992fe..0f6576802607 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -169,8 +169,10 @@ static int emac_rockchip_probe(struct platform_device *pdev) /* Optional regulator for PHY */ priv->regulator = devm_regulator_get_optional(dev, "phy"); if (IS_ERR(priv->regulator)) { - if (PTR_ERR(priv->regulator) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(priv->regulator) == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out_clk_disable; + } dev_err(dev, "no regulator found\n"); priv->regulator = NULL; } -- cgit From 44caebd368a5174f8aa7b80076350385eb2c2f42 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Sun, 18 Mar 2018 23:40:51 -0700 Subject: net: gemini: fix memory leak cppcheck report: [drivers/net/ethernet/cortina/gemini.c:543]: (error) Memory leak: skb_tab Signed-off-by: Igor Pylypiv Acked-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 5eb999af2c40..bd3f6e4d1341 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -540,6 +540,7 @@ static int gmac_setup_txqs(struct net_device *netdev) if (port->txq_dma_base & ~DMA_Q_BASE_MASK) { dev_warn(geth->dev, "TX queue base it not aligned\n"); + kfree(skb_tab); return -ENOMEM; } -- cgit From 5f2fb802eee1df0810b47ea251942fe3fd36589a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 19 Mar 2018 11:24:58 +0100 Subject: ipv6: old_dport should be a __be16 in __ip6_datagram_connect() Fixes: 2f987a76a977 ("net: ipv6: keep sk status consistent after datagram connect failure") Signed-off-by: Stefano Brivio Acked-by: Paolo Abeni Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8a9ac2d0f5d3..a9f7eca0b6a3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -149,7 +149,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, struct in6_addr *daddr, old_daddr; __be32 fl6_flowlabel = 0; __be32 old_fl6_flowlabel; - __be32 old_dport; + __be16 old_dport; int addr_type; int err; -- cgit From 52fda36d63bfc8c8e8ae5eda8eb5ac6f52cd67ed Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 20 Mar 2018 09:58:51 -0300 Subject: test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches Function bpf_fill_maxinsns11 is designed to not be able to be JITed on x86_64. So, it fails when CONFIG_BPF_JIT_ALWAYS_ON=y, and commit 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") makes sure that failure is detected on that case. However, it does not fail on other architectures, which have a different JIT compiler design. So, test_bpf has started to fail to load on those. After this fix, test_bpf loads fine on both x86_64 and ppc64el. Fixes: 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Yonghong Song Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2efb213716fa..3e9335493fe4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5467,7 +5467,7 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, -#ifdef CONFIG_BPF_JIT_ALWAYS_ON +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86) CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, #else CLASSIC | FLAG_NO_DATA, -- cgit From f005afede992e265bb98534b86912bb669ccd0d2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 20 Mar 2018 11:19:17 -0700 Subject: trace/bpf: remove helper bpf_perf_prog_read_value from tracepoint type programs Commit 4bebdc7a85aa ("bpf: add helper bpf_perf_prog_read_value") added helper bpf_perf_prog_read_value so that perf_event type program can read event counter and enabled/running time. This commit, however, introduced a bug which allows this helper for tracepoint type programs. This is incorrect as bpf_perf_prog_read_value needs to access perf_event through its bpf_perf_event_data_kern type context, which is not available for tracepoint type program. This patch fixed the issue by separating bpf_func_proto between tracepoint and perf_event type programs and removed bpf_perf_prog_read_value from tracepoint func prototype. Fixes: 4bebdc7a85aa ("bpf: add helper bpf_perf_prog_read_value") Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 68 ++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c0a9e310d715..01e6b3a38871 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -661,7 +661,41 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .arg3_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, +static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto_tp; + case BPF_FUNC_get_stackid: + return &bpf_get_stackid_proto_tp; + default: + return tracing_func_proto(func_id); + } +} + +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + + BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); + return true; +} + +const struct bpf_verifier_ops tracepoint_verifier_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = tp_prog_is_valid_access, +}; + +const struct bpf_prog_ops tracepoint_prog_ops = { +}; + +BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, struct bpf_perf_event_value *, buf, u32, size) { int err = -EINVAL; @@ -678,8 +712,8 @@ clear: return err; } -static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { - .func = bpf_perf_prog_read_value_tp, +static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { + .func = bpf_perf_prog_read_value, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -687,7 +721,7 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { .arg3_type = ARG_CONST_SIZE, }; -static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) +static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_perf_event_output: @@ -695,34 +729,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; case BPF_FUNC_perf_prog_read_value: - return &bpf_perf_prog_read_value_proto_tp; + return &bpf_perf_prog_read_value_proto; default: return tracing_func_proto(func_id); } } -static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ - if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - - BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); - return true; -} - -const struct bpf_verifier_ops tracepoint_verifier_ops = { - .get_func_proto = tp_prog_func_proto, - .is_valid_access = tp_prog_is_valid_access, -}; - -const struct bpf_prog_ops tracepoint_prog_ops = { -}; - static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -779,7 +791,7 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, } const struct bpf_verifier_ops perf_event_verifier_ops = { - .get_func_proto = tp_prog_func_proto, + .get_func_proto = pe_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; -- cgit From 0fa4fe85f4724fff89b09741c437cbee9cf8b008 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Mon, 19 Mar 2018 17:57:27 -0700 Subject: bpf: skip unnecessary capability check The current check statement in BPF syscall will do a capability check for CAP_SYS_ADMIN before checking sysctl_unprivileged_bpf_disabled. This code path will trigger unnecessary security hooks on capability checking and cause false alarms on unprivileged process trying to get CAP_SYS_ADMIN access. This can be resolved by simply switch the order of the statement and CAP_SYS_ADMIN is not required anyway if unprivileged bpf syscall is allowed. Signed-off-by: Chenbo Feng Acked-by: Lorenzo Colitti Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e24aa3241387..43f95d190eea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1845,7 +1845,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz union bpf_attr attr = {}; int err; - if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) + if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) return -EPERM; err = check_uarg_tail_zero(uattr, sizeof(attr), size); -- cgit From 87e0d4f0f37fb0c8c4aeeac46fff5e957738df79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Mar 2018 01:18:24 +0100 Subject: kbuild: disable clang's default use of -fmerge-all-constants Prasad reported that he has seen crashes in BPF subsystem with netd on Android with arm64 in the form of (note, the taint is unrelated): [ 4134.721483] Unable to handle kernel paging request at virtual address 800000001 [ 4134.820925] Mem abort info: [ 4134.901283] Exception class = DABT (current EL), IL = 32 bits [ 4135.016736] SET = 0, FnV = 0 [ 4135.119820] EA = 0, S1PTW = 0 [ 4135.201431] Data abort info: [ 4135.301388] ISV = 0, ISS = 0x00000021 [ 4135.359599] CM = 0, WnR = 0 [ 4135.470873] user pgtable: 4k pages, 39-bit VAs, pgd = ffffffe39b946000 [ 4135.499757] [0000000800000001] *pgd=0000000000000000, *pud=0000000000000000 [ 4135.660725] Internal error: Oops: 96000021 [#1] PREEMPT SMP [ 4135.674610] Modules linked in: [ 4135.682883] CPU: 5 PID: 1260 Comm: netd Tainted: G S W 4.14.19+ #1 [ 4135.716188] task: ffffffe39f4aa380 task.stack: ffffff801d4e0000 [ 4135.731599] PC is at bpf_prog_add+0x20/0x68 [ 4135.741746] LR is at bpf_prog_inc+0x20/0x2c [ 4135.751788] pc : [] lr : [] pstate: 60400145 [ 4135.769062] sp : ffffff801d4e3ce0 [...] [ 4136.258315] Process netd (pid: 1260, stack limit = 0xffffff801d4e0000) [ 4136.273746] Call trace: [...] [ 4136.442494] 3ca0: ffffff94ab7ad584 0000000060400145 ffffffe3a01bf8f8 0000000000000006 [ 4136.460936] 3cc0: 0000008000000000 ffffff94ab844204 ffffff801d4e3cf0 ffffff94ab7ad584 [ 4136.479241] [] bpf_prog_add+0x20/0x68 [ 4136.491767] [] bpf_prog_inc+0x20/0x2c [ 4136.504536] [] bpf_obj_get_user+0x204/0x22c [ 4136.518746] [] SyS_bpf+0x5a8/0x1a88 Android's netd was basically pinning the uid cookie BPF map in BPF fs (/sys/fs/bpf/traffic_cookie_uid_map) and later on retrieving it again resulting in above panic. Issue is that the map was wrongly identified as a prog! Above kernel was compiled with clang 4.0, and it turns out that clang decided to merge the bpf_prog_iops and bpf_map_iops into a single memory location, such that the two i_ops could then not be distinguished anymore. Reason for this miscompilation is that clang has the more aggressive -fmerge-all-constants enabled by default. In fact, clang source code has a comment about it in lib/AST/ExprConstant.cpp on why it is okay to do so: Pointers with different bases cannot represent the same object. (Note that clang defaults to -fmerge-all-constants, which can lead to inconsistent results for comparisons involving the address of a constant; this generally doesn't matter in practice.) The issue never appeared with gcc however, since gcc does not enable -fmerge-all-constants by default and even *explicitly* states in it's option description that using this flag results in non-conforming behavior, quote from man gcc: Languages like C or C++ require each variable, including multiple instances of the same variable in recursive calls, to have distinct locations, so using this option results in non-conforming behavior. There are also various clang bug reports open on that matter [1], where clang developers acknowledge the non-conforming behavior, and refer to disabling it with -fno-merge-all-constants. But even if this gets fixed in clang today, there are already users out there that triggered this. Thus, fix this issue by explicitly adding -fno-merge-all-constants to the kernel's Makefile to generically disable this optimization, since potentially other places in the kernel could subtly break as well. Note, there is also a flag called -fmerge-constants (not supported by clang), which is more conservative and only applies to strings and it's enabled in gcc's -O/-O2/-O3/-Os optimization levels. In gcc's code, the two flags -fmerge-{all-,}constants share the same variable internally, so when disabling it via -fno-merge-all-constants, then we really don't merge any const data (e.g. strings), and text size increases with gcc (14,927,214 -> 14,942,646 for vmlinux.o). $ gcc -fverbose-asm -O2 foo.c -S -o foo.S -> foo.S lists -fmerge-constants under options enabled $ gcc -fverbose-asm -O2 -fno-merge-all-constants foo.c -S -o foo.S -> foo.S doesn't list -fmerge-constants under options enabled $ gcc -fverbose-asm -O2 -fno-merge-all-constants -fmerge-constants foo.c -S -o foo.S -> foo.S lists -fmerge-constants under options enabled Thus, as a workaround we need to set both -fno-merge-all-constants *and* -fmerge-constants in the Makefile in order for text size to stay as is. [1] https://bugs.llvm.org/show_bug.cgi?id=18538 Reported-by: Prasad Sodagudi Signed-off-by: Daniel Borkmann Cc: Linus Torvalds Cc: Chenbo Feng Cc: Richard Smith Cc: Chandler Carruth Cc: linux-kernel@vger.kernel.org Tested-by: Prasad Sodagudi Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Makefile b/Makefile index c4322dea3ca2..af07bf5bb238 100644 --- a/Makefile +++ b/Makefile @@ -826,6 +826,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# clang sets -fmerge-all-constants by default as optimization, but this +# is non-conforming behavior for C and in fact breaks the kernel, so we +# need to disable it here generally. +KBUILD_CFLAGS += $(call cc-option,-fno-merge-all-constants) + +# for gcc -fno-merge-all-constants disables everything, but it is fine +# to have actual conforming behavior enabled. +KBUILD_CFLAGS += $(call cc-option,-fmerge-constants) + # Make sure -fstack-check isn't enabled (like gentoo apparently did) KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) -- cgit From 7c181f4fcdc62e5dc7a87fd33387d322262c3b52 Mon Sep 17 00:00:00 2001 From: Ben Caradoc-Davies Date: Mon, 19 Mar 2018 12:57:44 +1300 Subject: mac80211: add ieee80211_hw flag for QoS NDP support Commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") added an argument qos_ok to ieee80211_nullfunc_get to support QoS NDP. Despite the claim in the commit log "Change all the drivers to *not* allow QoS NDP for now, even though it looks like most of them should be OK with that", this commit enables QoS NDP in response to beacons (see change to mlme.c:ieee80211_send_nullfunc), causing ath9k_htc to lose IP connectivity. See: https://patchwork.kernel.org/patch/10241109/ https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=891060 Introduce a hardware flag to allow such buggy drivers to override the correct default behaviour of mac80211 of sending QoS NDP packets. Signed-off-by: Ben Caradoc-Davies Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/debugfs.c | 1 + net/mac80211/mlme.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c96511fa9198..2b581bd93812 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2063,6 +2063,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on * TDLS links. * + * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't + * support QoS NDP for AP probing - that's most likely a driver bug. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2106,6 +2109,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, + IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1f466d12a6bc..94c7ee9df33b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -212,6 +212,7 @@ static const char *hw_flag_names[] = { FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), + FLAG(DOESNT_SUPPORT_QOS_NDP), #undef FLAG }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 39b660b9a908..5f303abac5ad 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -896,7 +896,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, + !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); if (!skb) return; -- cgit From 4ea5aca27eab1c495985eb2a30b65b78cbf99404 Mon Sep 17 00:00:00 2001 From: Andrew Zaborowski Date: Wed, 21 Mar 2018 08:05:18 +0100 Subject: mac80211_hwsim: Set wmediumd for new radios Set the wmediumd to the net's wmediumd when the radio gets created. Radios created after HWSIM_CMD_REGISTER don't currently get their data->wmediumd set and the userspace would need to reconnect to netlink to be able to call HWSIM_CMD_REGISTER again. Alternatively I think data->netgroup and data->wmedium could be replaced with a pointer to hwsim_net. Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6e0af815f25e..35b21f8152bb 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2727,6 +2727,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, mutex_init(&data->mutex); data->netgroup = hwsim_net_get_netgroup(net); + data->wmediumd = hwsim_net_get_wmediumd(net); /* Enable frame retransmissions for lossy channels */ hw->max_rates = 4; -- cgit From 60b01bcce97191f473fa869df2713143936d6ef4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 21 Mar 2018 11:00:14 +0100 Subject: ath9k_htc: use non-QoS NDP for AP probing When switching mac80211 to use QoS NDP, it turned out that ath9k_htc is somehow broken by this, e.g. see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=891060. Fix this by using the new mac80211 flag to go back to the old, incorrect, behaviour for this driver. Fixes: 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") Reported-by: Ben Caradoc-Davies Acked-by: Kalle Valo Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index e89e5ef2c2a4..f246e9ed4a81 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -729,6 +729,7 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv, ieee80211_hw_set(hw, SPECTRUM_MGMT); ieee80211_hw_set(hw, SIGNAL_DBM); ieee80211_hw_set(hw, AMPDU_AGGREGATION); + ieee80211_hw_set(hw, DOESNT_SUPPORT_QOS_NDP); if (ath9k_ps_enable) ieee80211_hw_set(hw, SUPPORTS_PS); -- cgit From 924613d3a87feaa886dd1b18496463c5359e7965 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 19 Mar 2018 14:32:59 +0000 Subject: bnx2x: fix spelling mistake: "registeration" -> "registration" Trivial fix to spelling mistake in BNX2X_ERR error message text Signed-off-by: Colin Ian King Acked-by: Sudarsana Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 74fc9af4aadb..b8388e93520a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13913,7 +13913,7 @@ static void bnx2x_register_phc(struct bnx2x *bp) bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &bp->pdev->dev); if (IS_ERR(bp->ptp_clock)) { bp->ptp_clock = NULL; - BNX2X_ERR("PTP clock registeration failed\n"); + BNX2X_ERR("PTP clock registration failed\n"); } } -- cgit From 3f2176dd7fe9e4eb1444766741fe4ea6535eb6d8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 19 Mar 2018 14:57:11 +0000 Subject: qede: fix spelling mistake: "registeration" -> "registration" Trivial fix to spelling mistakes in DP_ERR error message text and comments Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5c28209e97d0..a01e7d6e5442 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -288,7 +288,7 @@ int __init qede_init(void) } /* Must register notifier before pci ops, since we might miss - * interface rename after pci probe and netdev registeration. + * interface rename after pci probe and netdev registration. */ ret = register_netdevice_notifier(&qede_netdev_notifier); if (ret) { @@ -988,7 +988,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, if (rc) goto err3; - /* Prepare the lock prior to the registeration of the netdev, + /* Prepare the lock prior to the registration of the netdev, * as once it's registered we might reach flows requiring it * [it's even possible to reach a flow needing it directly * from there, although it's unlikely]. diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 9b2280badaf7..02adb513f475 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -485,7 +485,7 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); if (IS_ERR(ptp->clock)) { rc = -EINVAL; - DP_ERR(edev, "PTP clock registeration failed\n"); + DP_ERR(edev, "PTP clock registration failed\n"); goto err2; } -- cgit From bbc09e7842a5023ba5bc0f8d559b9dd464e44006 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:22 +0100 Subject: net/sched: fix idr leak on the error path of tcf_bpf_init() when the following command sequence is entered # tc action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100 RTNETLINK answers: Invalid argument We have an error talking to the kernel # tc action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel act_bpf correctly refuses to install the first TC rule, because 31 is not a valid instruction. However, it refuses to install the second TC rule, even if the BPF code is correct. Furthermore, it's no more possible to install any other rule having the same value of 'index' until act_bpf module is unloaded/inserted again. After the idr has been reserved, call tcf_idr_release() instead of tcf_idr_cleanup(), to fix this issue. Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b3f2c15affa7..9d2cabf1dc7e 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -352,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_idr_cleanup(*act, est); + tcf_idr_release(*act, bind); return ret; } -- cgit From 60e10b3adc3bac0f6a894c28e0eb1f2d13607362 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:23 +0100 Subject: net/sched: fix idr leak in the error path of tcf_simp_init() if the kernel fails to duplicate 'sdata', creation of a new action fails with -ENOMEM. However, subsequent attempts to install the same action using the same value of 'index' systematically fail with -ENOSPC, and that value of 'index' will no more be usable by act_simple, until rmmod / insmod of act_simple.ko is done: # tc actions add action simple sdata hello index 100 # tc actions list action simple action order 0: Simple index 100 ref 1 bind 0 # tc actions flush action simple # tc actions add action simple sdata hello index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc actions flush action simple # tc actions add action simple sdata hello index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc actions add action simple sdata hello index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in the error path of tcf_simp_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Suggested-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_simple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 425eac11f6da..b1f38063ada0 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return ret; } d->tcf_action = parm->action; -- cgit From 5bf7f8185f7c7112decdfe3d3e5c5d5e67f099a1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:24 +0100 Subject: net/sched: fix idr leak in the error path of tcf_act_police_init() tcf_act_police_init() can fail after the idr has been successfully reserved (e.g., qdisc_get_rtab() may return NULL). When this happens, subsequent attempts to configure a police rule using the same idr value systematiclly fail with -ENOSPC: # tc action add action police rate 1000 burst 1000 drop index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action police rate 1000 burst 1000 drop index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action police rate 1000 burst 1000 drop index 100 RTNETLINK answers: No space left on device ... Fix this in the error path of tcf_act_police_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_police.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 95d3c9097b25..faebf82b99f1 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -194,7 +194,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return err; } -- cgit From 94fa3f929ec0c048b1f3658cc335b940df4f6d22 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:25 +0100 Subject: net/sched: fix idr leak in the error path of tcp_pedit_init() tcf_pedit_init() can fail to allocate 'keys' after the idr has been successfully reserved. When this happens, subsequent attempts to configure a pedit rule using the same idr value systematically fail with -ENOSPC: # tc action add action pedit munge ip ttl set 63 index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action pedit munge ip ttl set 63 index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action pedit munge ip ttl set 63 index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in the error path of tcf_act_pedit_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 349beaffb29e..fef08835f26d 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); kfree(keys_ex); return -ENOMEM; } -- cgit From 1e46ef1762bb2e52f0f996131a4d16ed4e9fd065 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:26 +0100 Subject: net/sched: fix idr leak in the error path of __tcf_ipt_init() __tcf_ipt_init() can fail after the idr has been successfully reserved. When this happens, subsequent attempts to configure xt/ipt rules using the same idr value systematically fail with -ENOSPC: # tc action add action xt -j LOG --log-prefix test1 index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "test1" index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel Command "(null)" is unknown, try "tc actions help". # tc action add action xt -j LOG --log-prefix test1 index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "test1" index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel Command "(null)" is unknown, try "tc actions help". # tc action add action xt -j LOG --log-prefix test1 index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "test1" index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in the error path of __tcf_ipt_init(), calling tcf_idr_release() in place of tcf_idr_cleanup(). Since tcf_ipt_release() can now be called when tcfi_t is NULL, we also need to protect calls to ipt_destroy_target() to avoid NULL pointer dereference. Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 06e380ae0928..7e06b9b62613 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t) static void tcf_ipt_release(struct tc_action *a) { struct tcf_ipt *ipt = to_ipt(a); - ipt_destroy_target(ipt->tcfi_t); + + if (ipt->tcfi_t) { + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_t); + } kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); } static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { @@ -187,7 +190,7 @@ err2: kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return err; } -- cgit From d7f20015736048f494988093b59fe513dc232ce9 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:27 +0100 Subject: net/sched: fix idr leak in the error path of tcf_vlan_init() tcf_vlan_init() can fail after the idr has been successfully reserved. When this happens, every subsequent attempt to configure vlan rules using the same idr value will systematically fail with -ENOSPC, unless the first attempt was done using the 'replace' keyword. # tc action add action vlan pop index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action vlan pop index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action vlan pop index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in tcf_vlan_init(), ensuring that tcf_idr_release() is called on the error path when the idr has been reserved, but not yet inserted. Also, don't test 'ovr' in the error path, to avoid a 'replace' failure implicitly become a 'delete' that leaks refcount in act_vlan module: # rmmod act_vlan; modprobe act_vlan # tc action add action vlan push id 5 index 100 # tc action replace action vlan push id 7 index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action list action vlan # # rmmod act_vlan rmmod: ERROR: Module act_vlan is in use Fixes: 4c5b9d9642c8 ("act_vlan: VLAN action rewrite to use RCU lock/unlock and update") Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index c2914e9a4a6f..c49cb61adedf 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ASSERT_RTNL(); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - if (ovr) + if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); return -ENOMEM; } -- cgit From f29cdfbe33d6915ba8056179b0041279a67e3647 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 19 Mar 2018 15:31:28 +0100 Subject: net/sched: fix idr leak in the error path of tcf_skbmod_init() tcf_skbmod_init() can fail after the idr has been successfully reserved. When this happens, every subsequent attempt to configure skbmod rules using the same idr value will systematically fail with -ENOSPC, unless the first attempt was done using the 'replace' keyword: # tc action add action skbmod swap mac index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action add action skbmod swap mac index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel # tc action add action skbmod swap mac index 100 RTNETLINK answers: No space left on device We have an error talking to the kernel ... Fix this in tcf_skbmod_init(), ensuring that tcf_idr_release() is called on the error path when the idr has been reserved, but not yet inserted. Also, don't test 'ovr' in the error path, to avoid a 'replace' failure implicitly become a 'delete' that leaks refcount in act_skbmod module: # rmmod act_skbmod; modprobe act_skbmod # tc action add action skbmod swap mac index 100 # tc action add action skbmod swap mac continue index 100 RTNETLINK answers: File exists We have an error talking to the kernel # tc action replace action skbmod swap mac continue index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action list action skbmod # # rmmod act_skbmod rmmod: ERROR: Module act_skbmod is in use Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d09565d6433e..7b0700f52b50 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, ASSERT_RTNL(); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - if (ovr) + if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); return -ENOMEM; } -- cgit From 5df7af85ecd88e8b5f1f31d6456c3cf38a8bbdda Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:52 +0800 Subject: net: phy: Add general dummy stubs for MMD register access For some phy devices, even though they don't support the MMD extended register access, it does have some side effect if we are trying to read/write the MMD registers via indirect method. So introduce general dummy stubs for MMD register access which these devices can use to avoid such side effect. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 17 +++++++++++++++++ include/linux/phy.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index fe16f5894092..74664a6c0cdc 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1673,6 +1673,23 @@ int genphy_config_init(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_config_init); +/* This is used for the phy device which doesn't support the MMD extended + * register access, but it does have side effect when we are trying to access + * the MMD register via indirect method. + */ +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_read_mmd_unsupported); + +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_write_mmd_unsupported); + int genphy_suspend(struct phy_device *phydev) { return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN); diff --git a/include/linux/phy.h b/include/linux/phy.h index b260fb336b25..7c4c2379e010 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -984,6 +984,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, + u16 regnum); +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); -- cgit From 0231b1a074c672f8c00da00a57144072890d816b Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:53 +0800 Subject: net: phy: realtek: Use the dummy stubs for MMD register access for rtl8211b The Ethernet on mpc8315erdb is broken since commit b6b5e8a69118 ("gianfar: Disable EEE autoneg by default"). The reason is that even though the rtl8211b doesn't support the MMD extended registers access, it does return some random values if we trying to access the MMD register via indirect method. This makes it seem that the EEE is supported by this phy device. And the subsequent writing to the MMD registers does cause the phy malfunction. So use the dummy stubs for the MMD register access to fix this issue. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index ee3ca4a2f12b..9f48ecf9c627 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -172,6 +172,8 @@ static struct phy_driver realtek_drvs[] = { .flags = PHY_HAS_INTERRUPT, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, + .read_mmd = &genphy_read_mmd_unsupported, + .write_mmd = &genphy_write_mmd_unsupported, }, { .phy_id = 0x001cc914, .name = "RTL8211DN Gigabit Ethernet", -- cgit From c846a2b7bd0e6900a726afb7c0a066f3a93617cf Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:54 +0800 Subject: net: phy: micrel: Use the general dummy stubs for MMD register access The new general dummy stubs for MMD register access were introduced. Use that for the codes reuse. Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 49be85afbea9..f41b224a9cdb 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -635,25 +635,6 @@ static int ksz8873mll_config_aneg(struct phy_device *phydev) return 0; } -/* This routine returns -1 as an indication to the caller that the - * Micrel ksz9021 10/100/1000 PHY does not support standard IEEE - * MMD extended PHY registers. - */ -static int -ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum) -{ - return -1; -} - -/* This routine does nothing since the Micrel ksz9021 does not support - * standard IEEE MMD extended PHY registers. - */ -static int -ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum, u16 val) -{ - return -1; -} - static int kszphy_get_sset_count(struct phy_device *phydev) { return ARRAY_SIZE(kszphy_hw_stats); @@ -946,8 +927,8 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, - .read_mmd = ksz9021_rd_mmd_phyreg, - .write_mmd = ksz9021_wr_mmd_phyreg, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, { .phy_id = PHY_ID_KSZ9031, .phy_id_mask = MICREL_PHY_ID_MASK, -- cgit From 6be687395b3124f002a653c1a50b3260222b3cd7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:12 +0100 Subject: s390/qeth: free netdevice when removing a card On removal, a qeth card's netdevice is currently not properly freed because the call chain looks as follows: qeth_core_remove_device(card) lx_remove_device(card) unregister_netdev(card->dev) card->dev = NULL !!! qeth_core_free_card(card) if (card->dev) !!! free_netdev(card->dev) Fix it by free'ing the netdev straight after unregistering. This also fixes the sysfs-driven layer switch case (qeth_dev_layer2_store()), where the need to free the current netdevice was not considered at all. Note that free_netdev() takes care of the netif_napi_del() for us too. Fixes: 4a71df50047f ("qeth: new qeth device driver") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 -- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c8b308cfabf1..4a820afececd 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5087,8 +5087,6 @@ static void qeth_core_free_card(struct qeth_card *card) QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *)); qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); - if (card->dev) - free_netdev(card->dev); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); kfree(card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7f236440483f..5ef4c978ad19 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -915,8 +915,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_l2_set_offline(cgdev); if (card->dev) { - netif_napi_del(&card->napi); unregister_netdev(card->dev); + free_netdev(card->dev); card->dev = NULL; } return; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 962a04b68dd2..b6b12220da71 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2865,8 +2865,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_l3_set_offline(cgdev); if (card->dev) { - netif_napi_del(&card->napi); unregister_netdev(card->dev); + free_netdev(card->dev); card->dev = NULL; } -- cgit From 1063e432bb45be209427ed3f1ca3908e4aa3c7d7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:13 +0100 Subject: s390/qeth: when thread completes, wake up all waiters qeth_wait_for_threads() is potentially called by multiple users, make sure to notify all of them after qeth_clear_thread_running_bit() adjusted the thread_running_mask. With no timeout, callers would otherwise stall. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4a820afececd..42ee8806fa53 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -960,7 +960,7 @@ void qeth_clear_thread_running_bit(struct qeth_card *card, unsigned long thread) spin_lock_irqsave(&card->thread_mask_lock, flags); card->thread_running_mask &= ~thread; spin_unlock_irqrestore(&card->thread_mask_lock, flags); - wake_up(&card->wait_q); + wake_up_all(&card->wait_q); } EXPORT_SYMBOL_GPL(qeth_clear_thread_running_bit); -- cgit From 17bf8c9b3d499d5168537c98b61eb7a1fcbca6c2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:14 +0100 Subject: s390/qeth: lock read device while queueing next buffer For calling ccw_device_start(), issue_next_read() needs to hold the device's ccwlock. This is satisfied for the IRQ handler path (where qeth_irq() gets called under the ccwlock), but we need explicit locking for the initial call by the MPC initialization. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 42ee8806fa53..2a9afaf8f264 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -527,8 +527,7 @@ static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue) queue == card->qdio.no_in_queues - 1; } - -static int qeth_issue_next_read(struct qeth_card *card) +static int __qeth_issue_next_read(struct qeth_card *card) { int rc; struct qeth_cmd_buffer *iob; @@ -559,6 +558,17 @@ static int qeth_issue_next_read(struct qeth_card *card) return rc; } +static int qeth_issue_next_read(struct qeth_card *card) +{ + int ret; + + spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card))); + ret = __qeth_issue_next_read(card); + spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card))); + + return ret; +} + static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) { struct qeth_reply *reply; @@ -1182,7 +1192,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, return; if (channel == &card->read && channel->state == CH_STATE_UP) - qeth_issue_next_read(card); + __qeth_issue_next_read(card); iob = channel->iob; index = channel->buf_no; -- cgit From a6c3d93963e4b333c764fde69802c3ea9eaa9d5c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 20 Mar 2018 07:59:15 +0100 Subject: s390/qeth: on channel error, reject further cmd requests When the IRQ handler determines that one of the cmd IO channels has failed and schedules recovery, block any further cmd requests from being submitted. The request would inevitably stall, and prevent the recovery from making progress until the request times out. This sort of error was observed after Live Guest Relocation, where the pending IO on the READ channel intentionally gets terminated to kick-start recovery. Simultaneously the guest executed SIOCETHTOOL, triggering qeth to issue a QUERY CARD INFO command. The command then stalled in the inoperabel WRITE channel. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2a9afaf8f264..3653bea38470 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1174,6 +1174,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } rc = qeth_get_problem(cdev, irb); if (rc) { + card->read_or_write_problem = 1; qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; -- cgit From 1bf9a7520fadaebfb8891284b046dd3fa6a2dc32 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:31 +0300 Subject: net: aquantia: Fix hardware reset when SPI may rarely hangup Under some circumstances (notably using thunderbolt interface) SPI on chip reset may be in active transaction. Here we forcibly cleanup SPI to prevent possible hangups. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 967f0fd07fcf..fcb3279ff9c7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -79,16 +79,15 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) { + u32 gsr, val; int k = 0; - u32 gsr; aq_hw_write_reg(self, 0x404, 0x40e1); AQ_HW_SLEEP(50); /* Cleanup SPI */ - aq_hw_write_reg(self, 0x534, 0xA0); - aq_hw_write_reg(self, 0x100, 0x9F); - aq_hw_write_reg(self, 0x100, 0x809F); + val = aq_hw_read_reg(self, 0x53C); + aq_hw_write_reg(self, 0x53C, val | 0x10); gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR); aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000); @@ -97,7 +96,14 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) aq_hw_write_reg(self, 0x404, 0x80e0); aq_hw_write_reg(self, 0x32a8, 0x0); aq_hw_write_reg(self, 0x520, 0x1); + + /* Reset SPI again because of possible interrupted SPI burst */ + val = aq_hw_read_reg(self, 0x53C); + aq_hw_write_reg(self, 0x53C, val | 0x10); AQ_HW_SLEEP(10); + /* Clear SPI reset state */ + aq_hw_write_reg(self, 0x53C, val & ~0x10); + aq_hw_write_reg(self, 0x404, 0x180e0); for (k = 0; k < 1000; k++) { @@ -147,7 +153,7 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) { - u32 gsr, rbl_status; + u32 gsr, val, rbl_status; int k; aq_hw_write_reg(self, 0x404, 0x40e1); @@ -157,6 +163,10 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) /* Alter RBL status */ aq_hw_write_reg(self, 0x388, 0xDEAD); + /* Cleanup SPI */ + val = aq_hw_read_reg(self, 0x53C); + aq_hw_write_reg(self, 0x53C, val | 0x10); + /* Global software reset*/ hw_atl_rx_rx_reg_res_dis_set(self, 0U); hw_atl_tx_tx_reg_res_dis_set(self, 0U); -- cgit From d0f0fb25d6c7a7c299d9bdaa2a11e96e4102e944 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:32 +0300 Subject: net: aquantia: Fix a regression with reset on old firmware FW 1.5.58 and below needs a fixed delay even after 0x18 register is filled. Otherwise, setting MPI_INIT state too fast causes traffic hang. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index fcb3279ff9c7..dcb27bc7e97c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -147,6 +147,8 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) aq_pr_err("FW kickstart failed\n"); return -EIO; } + /* Old FW requires fixed delay after init */ + AQ_HW_SLEEP(15); return 0; } @@ -214,6 +216,8 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) aq_pr_err("FW kickstart failed\n"); return -EIO; } + /* Old FW requires fixed delay after init */ + AQ_HW_SLEEP(15); return 0; } -- cgit From 47203b3426a6d0ac8b7a96259ed6784158b6d74b Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:33 +0300 Subject: net: aquantia: Change inefficient wait loop on fw data reads B1 hardware changes behavior of mailbox interface, it has busy bit always raised. Data ready condition should be detected by increment of address register. Old code has empty `for` loop, and that caused cpu overloads on B1 hardware. aq_nic_service_timer_cb consumed ~100ms because of that. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 42 ++++++++++++++-------- .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 1 + 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index dcb27bc7e97c..d3b847ec7465 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -21,6 +21,10 @@ #define HW_ATL_UCP_0X370_REG 0x0370U +#define HW_ATL_MIF_CMD 0x0200U +#define HW_ATL_MIF_ADDR 0x0208U +#define HW_ATL_MIF_VAL 0x020CU + #define HW_ATL_FW_SM_RAM 0x2U #define HW_ATL_MPI_FW_VERSION 0x18 #define HW_ATL_MPI_CONTROL_ADR 0x0368U @@ -269,18 +273,22 @@ int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a, } } - aq_hw_write_reg(self, 0x00000208U, a); - - for (++cnt; --cnt;) { - u32 i = 0U; + aq_hw_write_reg(self, HW_ATL_MIF_ADDR, a); - aq_hw_write_reg(self, 0x00000200U, 0x00008000U); + for (++cnt; --cnt && !err;) { + aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U); - for (i = 1024U; - (0x100U & aq_hw_read_reg(self, 0x00000200U)) && --i;) { - } + if (IS_CHIP_FEATURE(REVISION_B1)) + AQ_HW_WAIT_FOR(a != aq_hw_read_reg(self, + HW_ATL_MIF_ADDR), + 1, 1000U); + else + AQ_HW_WAIT_FOR(!(0x100 & aq_hw_read_reg(self, + HW_ATL_MIF_CMD)), + 1, 1000U); - *(p++) = aq_hw_read_reg(self, 0x0000020CU); + *(p++) = aq_hw_read_reg(self, HW_ATL_MIF_VAL); + a += 4; } hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM); @@ -676,14 +684,18 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p) u32 val = hw_atl_reg_glb_mif_id_get(self); u32 mif_rev = val & 0xFFU; - if ((3U & mif_rev) == 1U) { - chip_features |= - HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 | + if ((0xFU & mif_rev) == 1U) { + chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 | HAL_ATLANTIC_UTILS_CHIP_MPI_AQ | HAL_ATLANTIC_UTILS_CHIP_MIPS; - } else if ((3U & mif_rev) == 2U) { - chip_features |= - HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 | + } else if ((0xFU & mif_rev) == 2U) { + chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 | + HAL_ATLANTIC_UTILS_CHIP_MPI_AQ | + HAL_ATLANTIC_UTILS_CHIP_MIPS | + HAL_ATLANTIC_UTILS_CHIP_TPO2 | + HAL_ATLANTIC_UTILS_CHIP_RPF2; + } else if ((0xFU & mif_rev) == 0xAU) { + chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 | HAL_ATLANTIC_UTILS_CHIP_MPI_AQ | HAL_ATLANTIC_UTILS_CHIP_MIPS | HAL_ATLANTIC_UTILS_CHIP_TPO2 | diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index 2c690947910a..cd8f18f39c61 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -161,6 +161,7 @@ struct __packed hw_aq_atl_utils_mbox { #define HAL_ATLANTIC_UTILS_CHIP_MPI_AQ 0x00000010U #define HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 0x01000000U #define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 0x02000000U +#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 0x04000000U #define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \ self->chip_features) -- cgit From b647d3980948e881e6bb9bd898465e675d5e8486 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:34 +0300 Subject: net: aquantia: Add tx clean budget and valid budget handling logic We should report to napi full budget only when we have more job to do. Before this fix, on any tx queue cleanup we forced napi to do poll again. Thats a waste of cpu resources and caused storming with napi polls when there was at least one tx on each interrupt. With this fix we report full budget only when there is more job on TX to do. Or, as before, when rx budget was fully consumed. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 2 ++ drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 7 +++++-- drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 11 +++++------ 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 0b49f1aeebd3..fc7383106946 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -36,6 +36,8 @@ #define AQ_CFG_TX_FRAME_MAX (16U * 1024U) #define AQ_CFG_RX_FRAME_MAX (4U * 1024U) +#define AQ_CFG_TX_CLEAN_BUDGET 256U + /* LRO */ #define AQ_CFG_IS_LRO_DEF 1U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0be6a11370bb..b5f1f62e8e25 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -136,11 +136,12 @@ void aq_ring_queue_stop(struct aq_ring_s *ring) netif_stop_subqueue(ndev, ring->idx); } -void aq_ring_tx_clean(struct aq_ring_s *self) +bool aq_ring_tx_clean(struct aq_ring_s *self) { struct device *dev = aq_nic_get_dev(self->aq_nic); + unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET; - for (; self->sw_head != self->hw_head; + for (; self->sw_head != self->hw_head && budget--; self->sw_head = aq_ring_next_dx(self, self->sw_head)) { struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head]; @@ -167,6 +168,8 @@ void aq_ring_tx_clean(struct aq_ring_s *self) buff->pa = 0U; buff->eop_index = 0xffffU; } + + return !!budget; } #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 965fae0fb6e0..ac1329f4051d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -153,7 +153,7 @@ void aq_ring_free(struct aq_ring_s *self); void aq_ring_update_queue_state(struct aq_ring_s *ring); void aq_ring_queue_wake(struct aq_ring_s *ring); void aq_ring_queue_stop(struct aq_ring_s *ring); -void aq_ring_tx_clean(struct aq_ring_s *self); +bool aq_ring_tx_clean(struct aq_ring_s *self); int aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, int *work_done, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f890b8a5a862..d335c334fa56 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -35,12 +35,12 @@ struct aq_vec_s { static int aq_vec_poll(struct napi_struct *napi, int budget) { struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi); + unsigned int sw_tail_old = 0U; struct aq_ring_s *ring = NULL; + bool was_tx_cleaned = true; + unsigned int i = 0U; int work_done = 0; int err = 0; - unsigned int i = 0U; - unsigned int sw_tail_old = 0U; - bool was_tx_cleaned = false; if (!self) { err = -EINVAL; @@ -57,9 +57,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (ring[AQ_VEC_TX_ID].sw_head != ring[AQ_VEC_TX_ID].hw_head) { - aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); + was_tx_cleaned = aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]); - was_tx_cleaned = true; } err = self->aq_hw_ops->hw_ring_rx_receive(self->aq_hw, @@ -90,7 +89,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) } } - if (was_tx_cleaned) + if (!was_tx_cleaned) work_done = budget; if (work_done < budget) { -- cgit From 3e9a545131723ff12fe4304245c222157f0e4622 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:35 +0300 Subject: net: aquantia: Allow live mac address changes There is nothing prevents us from changing MAC on the running interface. Allow this with ndev priv flag. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index ebbaf63eaf47..34120d5b7c03 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -247,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->hw_features |= aq_hw_caps->hw_features; self->ndev->features = aq_hw_caps->hw_features; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; + self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN; self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN; -- cgit From 90869ddfefebb1a79bd7bebfa4f28baa9f8c82cd Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:36 +0300 Subject: net: aquantia: Implement pci shutdown callback We should close link and all NIC operations during shutdown. On some systems graceful reboot never closes NIC interface on its own, but only indicates pci device shutdown. Without explicit handler, NIC rx rings continued to transfer DMA data into prepared buffers while CPU rebooted already. That caused memory corruptions on soft reboot. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 20 ++++++++++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 1 + drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 15 +++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 34120d5b7c03..c96a92118b8b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -939,3 +939,23 @@ err_exit: out: return err; } + +void aq_nic_shutdown(struct aq_nic_s *self) +{ + int err = 0; + + if (!self->ndev) + return; + + rtnl_lock(); + + netif_device_detach(self->ndev); + + err = aq_nic_stop(self); + if (err < 0) + goto err_exit; + aq_nic_deinit(self); + +err_exit: + rtnl_unlock(); +} \ No newline at end of file diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index d16b0f1a95aa..219b550d1665 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -118,5 +118,6 @@ struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self); u32 aq_nic_get_fw_version(struct aq_nic_s *self); int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg); int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self); +void aq_nic_shutdown(struct aq_nic_s *self); #endif /* AQ_NIC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 87c4308b52a7..ecc6306f940f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -323,6 +323,20 @@ static void aq_pci_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static void aq_pci_shutdown(struct pci_dev *pdev) +{ + struct aq_nic_s *self = pci_get_drvdata(pdev); + + aq_nic_shutdown(self); + + pci_disable_device(pdev); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } +} + static int aq_pci_suspend(struct pci_dev *pdev, pm_message_t pm_msg) { struct aq_nic_s *self = pci_get_drvdata(pdev); @@ -345,6 +359,7 @@ static struct pci_driver aq_pci_ops = { .remove = aq_pci_remove, .suspend = aq_pci_suspend, .resume = aq_pci_resume, + .shutdown = aq_pci_shutdown, }; module_pci_driver(aq_pci_ops); -- cgit From c89bf1cd3498db851bdc184b26e26baaa5f141c5 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Tue, 20 Mar 2018 14:40:37 +0300 Subject: net: aquantia: driver version bump Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/ver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h index 5265b937677b..a445de6837a6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/ver.h +++ b/drivers/net/ethernet/aquantia/atlantic/ver.h @@ -13,7 +13,7 @@ #define NIC_MAJOR_DRIVER_VERSION 2 #define NIC_MINOR_DRIVER_VERSION 0 #define NIC_BUILD_DRIVER_VERSION 2 -#define NIC_REVISION_DRIVER_VERSION 0 +#define NIC_REVISION_DRIVER_VERSION 1 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern" -- cgit From 191f86ca8ef27f7a492fd1c03620498c6e94f0ac Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 20 Mar 2018 14:44:55 +0000 Subject: ipv6: sr: fix scheduling in RCU when creating seg6 lwtunnel state The seg6_build_state() function is called with RCU read lock held, so we cannot use GFP_KERNEL. This patch uses GFP_ATOMIC instead. [ 92.770271] ============================= [ 92.770628] WARNING: suspicious RCU usage [ 92.770921] 4.16.0-rc4+ #12 Not tainted [ 92.771277] ----------------------------- [ 92.771585] ./include/linux/rcupdate.h:302 Illegal context switch in RCU read-side critical section! [ 92.772279] [ 92.772279] other info that might help us debug this: [ 92.772279] [ 92.773067] [ 92.773067] rcu_scheduler_active = 2, debug_locks = 1 [ 92.773514] 2 locks held by ip/2413: [ 92.773765] #0: (rtnl_mutex){+.+.}, at: [<00000000e5461720>] rtnetlink_rcv_msg+0x441/0x4d0 [ 92.774377] #1: (rcu_read_lock){....}, at: [<00000000df4f161e>] lwtunnel_build_state+0x59/0x210 [ 92.775065] [ 92.775065] stack backtrace: [ 92.775371] CPU: 0 PID: 2413 Comm: ip Not tainted 4.16.0-rc4+ #12 [ 92.775791] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 [ 92.776608] Call Trace: [ 92.776852] dump_stack+0x7d/0xbc [ 92.777130] __schedule+0x133/0xf00 [ 92.777393] ? unwind_get_return_address_ptr+0x50/0x50 [ 92.777783] ? __sched_text_start+0x8/0x8 [ 92.778073] ? rcu_is_watching+0x19/0x30 [ 92.778383] ? kernel_text_address+0x49/0x60 [ 92.778800] ? __kernel_text_address+0x9/0x30 [ 92.779241] ? unwind_get_return_address+0x29/0x40 [ 92.779727] ? pcpu_alloc+0x102/0x8f0 [ 92.780101] _cond_resched+0x23/0x50 [ 92.780459] __mutex_lock+0xbd/0xad0 [ 92.780818] ? pcpu_alloc+0x102/0x8f0 [ 92.781194] ? seg6_build_state+0x11d/0x240 [ 92.781611] ? save_stack+0x9b/0xb0 [ 92.781965] ? __ww_mutex_wakeup_for_backoff+0xf0/0xf0 [ 92.782480] ? seg6_build_state+0x11d/0x240 [ 92.782925] ? lwtunnel_build_state+0x1bd/0x210 [ 92.783393] ? ip6_route_info_create+0x687/0x1640 [ 92.783846] ? ip6_route_add+0x74/0x110 [ 92.784236] ? inet6_rtm_newroute+0x8a/0xd0 Fixes: 6c8702c60b886 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index bd6cc688bd19..8367b859a934 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -418,7 +418,7 @@ static int seg6_build_state(struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); - err = dst_cache_init(&slwt->cache, GFP_KERNEL); + err = dst_cache_init(&slwt->cache, GFP_ATOMIC); if (err) { kfree(newts); return err; -- cgit From 8936ef7604c11b5d701580d779e0f5684abc7b68 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 20 Mar 2018 14:44:56 +0000 Subject: ipv6: sr: fix NULL pointer dereference when setting encap source address When using seg6 in encap mode, we call ipv6_dev_get_saddr() to set the source address of the outer IPv6 header, in case none was specified. Using skb->dev can lead to BUG() when it is in an inconsistent state. This patch uses the net_device attached to the skb's dst instead. [940807.667429] BUG: unable to handle kernel NULL pointer dereference at 000000000000047c [940807.762427] IP: ipv6_dev_get_saddr+0x8b/0x1d0 [940807.815725] PGD 0 P4D 0 [940807.847173] Oops: 0000 [#1] SMP PTI [940807.890073] Modules linked in: [940807.927765] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G W 4.16.0-rc1-seg6bpf+ #2 [940808.028988] Hardware name: HP ProLiant DL120 G6/ProLiant DL120 G6, BIOS O26 09/06/2010 [940808.128128] RIP: 0010:ipv6_dev_get_saddr+0x8b/0x1d0 [940808.187667] RSP: 0018:ffff88043fd836b0 EFLAGS: 00010206 [940808.251366] RAX: 0000000000000005 RBX: ffff88042cb1c860 RCX: 00000000000000fe [940808.338025] RDX: 00000000000002c0 RSI: ffff88042cb1c860 RDI: 0000000000004500 [940808.424683] RBP: ffff88043fd83740 R08: 0000000000000000 R09: ffffffffffffffff [940808.511342] R10: 0000000000000040 R11: 0000000000000000 R12: ffff88042cb1c850 [940808.598012] R13: ffffffff8208e380 R14: ffff88042ac8da00 R15: 0000000000000002 [940808.684675] FS: 0000000000000000(0000) GS:ffff88043fd80000(0000) knlGS:0000000000000000 [940808.783036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [940808.852975] CR2: 000000000000047c CR3: 00000004255fe000 CR4: 00000000000006e0 [940808.939634] Call Trace: [940808.970041] [940808.995250] ? ip6t_do_table+0x265/0x640 [940809.043341] seg6_do_srh_encap+0x28f/0x300 [940809.093516] ? seg6_do_srh+0x1a0/0x210 [940809.139528] seg6_do_srh+0x1a0/0x210 [940809.183462] seg6_output+0x28/0x1e0 [940809.226358] lwtunnel_output+0x3f/0x70 [940809.272370] ip6_xmit+0x2b8/0x530 [940809.313185] ? ac6_proc_exit+0x20/0x20 [940809.359197] inet6_csk_xmit+0x7d/0xc0 [940809.404173] tcp_transmit_skb+0x548/0x9a0 [940809.453304] __tcp_retransmit_skb+0x1a8/0x7a0 [940809.506603] ? ip6_default_advmss+0x40/0x40 [940809.557824] ? tcp_current_mss+0x24/0x90 [940809.605925] tcp_retransmit_skb+0xd/0x80 [940809.654016] tcp_xmit_retransmit_queue.part.17+0xf9/0x210 [940809.719797] tcp_ack+0xa47/0x1110 [940809.760612] tcp_rcv_established+0x13c/0x570 [940809.812865] tcp_v6_do_rcv+0x151/0x3d0 [940809.858879] tcp_v6_rcv+0xa5c/0xb10 [940809.901770] ? seg6_output+0xdd/0x1e0 [940809.946745] ip6_input_finish+0xbb/0x460 [940809.994837] ip6_input+0x74/0x80 [940810.034612] ? ip6_rcv_finish+0xb0/0xb0 [940810.081663] ipv6_rcv+0x31c/0x4c0 ... Fixes: 6c8702c60b886 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Reported-by: Tom Herbert Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8367b859a934..7a78dcfda68a 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -93,7 +93,8 @@ static void set_tun_src(struct net *net, struct net_device *dev, /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; @@ -134,7 +135,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { -- cgit From 6d066734e9f09cdea4a3b9cb76136db3f29cfb02 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 20 Mar 2018 16:49:26 +0100 Subject: ppp: avoid loop in xmit recursion detection code We already detect situations where a PPP channel sends packets back to its upper PPP device. While this is enough to avoid deadlocking on xmit locks, this doesn't prevent packets from looping between the channel and the unit. The problem is that ppp_start_xmit() enqueues packets in ppp->file.xq before checking for xmit recursion. Therefore, __ppp_xmit_process() might dequeue a packet from ppp->file.xq and send it on the channel which, in turn, loops it back on the unit. Then ppp_start_xmit() queues the packet back to ppp->file.xq and __ppp_xmit_process() picks it up and sends it again through the channel. Therefore, the packet will loop between __ppp_xmit_process() and ppp_start_xmit() until some other part of the xmit path drops it. For L2TP, we rapidly fill the skb's headroom and pppol2tp_xmit() drops the packet after a few iterations. But PPTP reallocates the headroom if necessary, letting the loop run and exhaust the machine resources (as reported in https://bugzilla.kernel.org/show_bug.cgi?id=199109). Fix this by letting __ppp_xmit_process() enqueue the skb to ppp->file.xq, so that we can check for recursion before adding it to the queue. Now ppp_xmit_process() can drop the packet when recursion is detected. __ppp_channel_push() is a bit special. It calls __ppp_xmit_process() without having any actual packet to send. This is used by ppp_output_wakeup() to re-enable transmission on the parent unit (for implementations like ppp_async.c, where the .start_xmit() function might not consume the skb, leaving it in ppp->xmit_pending and disabling transmission). Therefore, __ppp_xmit_process() needs to handle the case where skb is NULL, dequeuing as many packets as possible from ppp->file.xq. Reported-by: xu heng Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index fa2a9bdd1866..da1937832c99 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -257,7 +257,7 @@ struct ppp_net { /* Prototypes. */ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct file *file, unsigned int cmd, unsigned long arg); -static void ppp_xmit_process(struct ppp *ppp); +static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb); static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); static void ppp_push(struct ppp *ppp); static void ppp_channel_push(struct channel *pch); @@ -513,13 +513,12 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, goto out; } - skb_queue_tail(&pf->xq, skb); - switch (pf->kind) { case INTERFACE: - ppp_xmit_process(PF_TO_PPP(pf)); + ppp_xmit_process(PF_TO_PPP(pf), skb); break; case CHANNEL: + skb_queue_tail(&pf->xq, skb); ppp_channel_push(PF_TO_CHANNEL(pf)); break; } @@ -1267,8 +1266,8 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) put_unaligned_be16(proto, pp); skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev))); - skb_queue_tail(&ppp->file.xq, skb); - ppp_xmit_process(ppp); + ppp_xmit_process(ppp, skb); + return NETDEV_TX_OK; outf: @@ -1420,13 +1419,14 @@ static void ppp_setup(struct net_device *dev) */ /* Called to do any work queued up on the transmit side that can now be done */ -static void __ppp_xmit_process(struct ppp *ppp) +static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { - struct sk_buff *skb; - ppp_xmit_lock(ppp); if (!ppp->closing) { ppp_push(ppp); + + if (skb) + skb_queue_tail(&ppp->file.xq, skb); while (!ppp->xmit_pending && (skb = skb_dequeue(&ppp->file.xq))) ppp_send_frame(ppp, skb); @@ -1440,7 +1440,7 @@ static void __ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } -static void ppp_xmit_process(struct ppp *ppp) +static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { local_bh_disable(); @@ -1448,7 +1448,7 @@ static void ppp_xmit_process(struct ppp *ppp) goto err; (*this_cpu_ptr(ppp->xmit_recursion))++; - __ppp_xmit_process(ppp); + __ppp_xmit_process(ppp, skb); (*this_cpu_ptr(ppp->xmit_recursion))--; local_bh_enable(); @@ -1458,6 +1458,8 @@ static void ppp_xmit_process(struct ppp *ppp) err: local_bh_enable(); + kfree_skb(skb); + if (net_ratelimit()) netdev_err(ppp->dev, "recursion detected\n"); } @@ -1942,7 +1944,7 @@ static void __ppp_channel_push(struct channel *pch) if (skb_queue_empty(&pch->file.xq)) { ppp = pch->ppp; if (ppp) - __ppp_xmit_process(ppp); + __ppp_xmit_process(ppp, NULL); } } -- cgit From 68e2ffdeb5dbf54bc3a0684aa4e73c6db8675eed Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 20 Mar 2018 10:06:59 -0700 Subject: net/ipv6: Handle onlink flag with multipath routes For multipath routes the ONLINK flag can be specified per nexthop in rtnh_flags or globally in rtm_flags. Update ip6_route_multipath_add to consider the ONLINK setting coming from rtnh_flags. Each loop over nexthops the config for the sibling route is initialized to the global config and then per nexthop settings overlayed. The flag is 'or'ed into fib6_config to handle the ONLINK flag coming from either rtm_flags or rtnh_flags. Fixes: fc1e64e1092f ("net/ipv6: Add support for onlink flag") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b3c1137ad0b8..b0d5c64e1978 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4112,6 +4112,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, r_cfg.fc_encap_type = nla_get_u16(nla); } + r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); rt = ip6_route_info_create(&r_cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); -- cgit From 8348e0460ab1473f06c8b824699dd2eed3c1979d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:02 -0700 Subject: hv_netvsc: disable NAPI before channel close This makes sure that no CPU is still process packets when the channel is closed. Fixes: 76bb5db5c749 ("netvsc: fix use after free on module removal") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0265d703eb03..e70a44273f55 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -568,6 +568,10 @@ void netvsc_device_remove(struct hv_device *device) RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + /* And disassociate NAPI context from device */ + for (i = 0; i < net_device->num_chn; i++) + netif_napi_del(&net_device->chan_table[i].napi); + /* * At this point, no one should be accessing net_device * except in here @@ -579,10 +583,6 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_gpadl(device, net_device); - /* And dissassociate NAPI context from device */ - for (i = 0; i < net_device->num_chn; i++) - netif_napi_del(&net_device->chan_table[i].napi); - /* Release all resources */ free_netvsc_device_rcu(net_device); } -- cgit From 02400fcee2542ee334a2394e0d9f6efd969fe782 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:03 -0700 Subject: hv_netvsc: use RCU to fix concurrent rx and queue changes The receive processing may continue to happen while the internal network device state is in RCU grace period. The internal RNDIS structure is associated with the internal netvsc_device structure; both have the same RCU lifetime. Defer freeing all associated parts until after grace period. Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 17 +++++------------ drivers/net/hyperv/rndis_filter.c | 39 ++++++++++++++++----------------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e70a44273f55..12c044baf1af 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -90,6 +90,11 @@ static void free_netvsc_device(struct rcu_head *head) = container_of(head, struct netvsc_device, rcu); int i; + kfree(nvdev->extension); + vfree(nvdev->recv_buf); + vfree(nvdev->send_buf); + kfree(nvdev->send_section_map); + for (i = 0; i < VRSS_CHANNEL_MAX; i++) vfree(nvdev->chan_table[i].mrc.slots); @@ -211,12 +216,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device, net_device->recv_buf_gpadl_handle = 0; } - if (net_device->recv_buf) { - /* Free up the receive buffer */ - vfree(net_device->recv_buf); - net_device->recv_buf = NULL; - } - if (net_device->send_buf_gpadl_handle) { ret = vmbus_teardown_gpadl(device->channel, net_device->send_buf_gpadl_handle); @@ -231,12 +230,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device, } net_device->send_buf_gpadl_handle = 0; } - if (net_device->send_buf) { - /* Free up the send buffer */ - vfree(net_device->send_buf); - net_device->send_buf = NULL; - } - kfree(net_device->send_section_map); } int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 00ec80c23fe5..963314eb3226 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -264,13 +264,23 @@ static void rndis_set_link_state(struct rndis_device *rdev, } } -static void rndis_filter_receive_response(struct rndis_device *dev, - struct rndis_message *resp) +static void rndis_filter_receive_response(struct net_device *ndev, + struct netvsc_device *nvdev, + const struct rndis_message *resp) { + struct rndis_device *dev = nvdev->extension; struct rndis_request *request = NULL; bool found = false; unsigned long flags; - struct net_device *ndev = dev->ndev; + + /* This should never happen, it means control message + * response received after device removed. + */ + if (dev->state == RNDIS_DEV_UNINITIALIZED) { + netdev_err(ndev, + "got rndis message uninitialized\n"); + return; + } spin_lock_irqsave(&dev->request_lock, flags); list_for_each_entry(request, &dev->req_list, list_ent) { @@ -352,7 +362,6 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) static int rndis_filter_receive_data(struct net_device *ndev, struct netvsc_device *nvdev, - struct rndis_device *dev, struct rndis_message *msg, struct vmbus_channel *channel, void *data, u32 data_buflen) @@ -372,7 +381,7 @@ static int rndis_filter_receive_data(struct net_device *ndev, * should be the data packet size plus the trailer padding size */ if (unlikely(data_buflen < rndis_pkt->data_len)) { - netdev_err(dev->ndev, "rndis message buffer " + netdev_err(ndev, "rndis message buffer " "overflow detected (got %u, min %u)" "...dropping this message!\n", data_buflen, rndis_pkt->data_len); @@ -400,35 +409,20 @@ int rndis_filter_receive(struct net_device *ndev, void *data, u32 buflen) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct rndis_device *rndis_dev = net_dev->extension; struct rndis_message *rndis_msg = data; - /* Make sure the rndis device state is initialized */ - if (unlikely(!rndis_dev)) { - netif_dbg(net_device_ctx, rx_err, ndev, - "got rndis message but no rndis device!\n"); - return NVSP_STAT_FAIL; - } - - if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { - netif_dbg(net_device_ctx, rx_err, ndev, - "got rndis message uninitialized\n"); - return NVSP_STAT_FAIL; - } - if (netif_msg_rx_status(net_device_ctx)) dump_rndis_message(ndev, rndis_msg); switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: - return rndis_filter_receive_data(ndev, net_dev, - rndis_dev, rndis_msg, + return rndis_filter_receive_data(ndev, net_dev, rndis_msg, channel, data, buflen); case RNDIS_MSG_INIT_C: case RNDIS_MSG_QUERY_C: case RNDIS_MSG_SET_C: /* completion msgs */ - rndis_filter_receive_response(rndis_dev, rndis_msg); + rndis_filter_receive_response(ndev, net_dev, rndis_msg); break; case RNDIS_MSG_INDICATE: @@ -1357,7 +1351,6 @@ void rndis_filter_device_remove(struct hv_device *dev, net_dev->extension = NULL; netvsc_device_remove(dev); - kfree(rndis_dev); } int rndis_filter_open(struct netvsc_device *nvdev) -- cgit From 0ef58b0a05c127762f975c3dfe8b922e4aa87a29 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:04 -0700 Subject: hv_netvsc: change GPAD teardown order on older versions On older versions of Windows, the host ignores messages after vmbus channel is closed. Workaround this by doing what Windows does and send the teardown before close on older versions of NVSP protocol. Reported-by: Mohammed Gamal Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 12c044baf1af..37b0a30d6b03 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -571,10 +571,15 @@ void netvsc_device_remove(struct hv_device *device) */ netdev_dbg(ndev, "net device safe to remove\n"); + /* older versions require that buffer be revoked before close */ + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4) + netvsc_teardown_gpadl(device, net_device); + /* Now, we can close the channel safely */ vmbus_close(device->channel); - netvsc_teardown_gpadl(device, net_device); + if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4) + netvsc_teardown_gpadl(device, net_device); /* Release all resources */ free_netvsc_device_rcu(net_device); -- cgit From 7b2ee50c0cd513a176a26a71f2989facdd75bfea Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 20 Mar 2018 15:03:05 -0700 Subject: hv_netvsc: common detach logic Make common function for detaching internals of device during changes to MTU and RSS. Make sure no more packets are transmitted and all packets have been received before doing device teardown. Change the wait logic to be common and use usleep_range(). Changes transmit enabling logic so that transmit queues are disabled during the period when lower device is being changed. And enabled only after sub channels are setup. This avoids issue where it could be that a packet was being sent while subchannel was not initialized. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 20 +-- drivers/net/hyperv/netvsc_drv.c | 278 +++++++++++++++++++++----------------- drivers/net/hyperv/rndis_filter.c | 17 +-- 4 files changed, 173 insertions(+), 143 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cd538d5a7986..32861036c3fc 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -212,7 +212,6 @@ void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); void rndis_set_subchannel(struct work_struct *w); -bool rndis_filter_opened(const struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 37b0a30d6b03..7472172823f3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -555,8 +555,6 @@ void netvsc_device_remove(struct hv_device *device) = rtnl_dereference(net_device_ctx->nvdev); int i; - cancel_work_sync(&net_device->subchan_work); - netvsc_revoke_buf(device, net_device); RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); @@ -643,14 +641,18 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, queue_sends = atomic_dec_return(&net_device->chan_table[q_idx].queue_sends); - if (net_device->destroy && queue_sends == 0) - wake_up(&net_device->wait_drain); + if (unlikely(net_device->destroy)) { + if (queue_sends == 0) + wake_up(&net_device->wait_drain); + } else { + struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); - if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && - (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || - queue_sends < 1)) { - netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); - ndev_ctx->eth_stats.wake_queue++; + if (netif_tx_queue_stopped(txq) && + (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || + queue_sends < 1)) { + netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; + } } } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index faea0be18924..f28c85d212ce 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -46,7 +46,10 @@ #include "hyperv_net.h" -#define RING_SIZE_MIN 64 +#define RING_SIZE_MIN 64 +#define RETRY_US_LO 5000 +#define RETRY_US_HI 10000 +#define RETRY_MAX 2000 /* >10 sec */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) @@ -123,10 +126,8 @@ static int netvsc_open(struct net_device *net) } rdev = nvdev->extension; - if (!rdev->link_state) { + if (!rdev->link_state) netif_carrier_on(net); - netif_tx_wake_all_queues(net); - } if (vf_netdev) { /* Setting synthetic device up transparently sets @@ -142,36 +143,25 @@ static int netvsc_open(struct net_device *net) return 0; } -static int netvsc_close(struct net_device *net) +static int netvsc_wait_until_empty(struct netvsc_device *nvdev) { - struct net_device_context *net_device_ctx = netdev_priv(net); - struct net_device *vf_netdev - = rtnl_dereference(net_device_ctx->vf_netdev); - struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); - int ret = 0; - u32 aread, i, msec = 10, retry = 0, retry_max = 20; - struct vmbus_channel *chn; - - netif_tx_disable(net); - - /* No need to close rndis filter if it is removed already */ - if (!nvdev) - goto out; - - ret = rndis_filter_close(nvdev); - if (ret != 0) { - netdev_err(net, "unable to close device (ret %d).\n", ret); - return ret; - } + unsigned int retry = 0; + int i; /* Ensure pending bytes in ring are read */ - while (true) { - aread = 0; + for (;;) { + u32 aread = 0; + for (i = 0; i < nvdev->num_chn; i++) { - chn = nvdev->chan_table[i].channel; + struct vmbus_channel *chn + = nvdev->chan_table[i].channel; + if (!chn) continue; + /* make sure receive not running now */ + napi_synchronize(&nvdev->chan_table[i].napi); + aread = hv_get_bytes_to_read(&chn->inbound); if (aread) break; @@ -181,22 +171,40 @@ static int netvsc_close(struct net_device *net) break; } - retry++; - if (retry > retry_max || aread == 0) - break; + if (aread == 0) + return 0; - msleep(msec); + if (++retry > RETRY_MAX) + return -ETIMEDOUT; - if (msec < 1000) - msec *= 2; + usleep_range(RETRY_US_LO, RETRY_US_HI); } +} - if (aread) { - netdev_err(net, "Ring buffer not empty after closing rndis\n"); - ret = -ETIMEDOUT; +static int netvsc_close(struct net_device *net) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev + = rtnl_dereference(net_device_ctx->vf_netdev); + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); + int ret; + + netif_tx_disable(net); + + /* No need to close rndis filter if it is removed already */ + if (!nvdev) + return 0; + + ret = rndis_filter_close(nvdev); + if (ret != 0) { + netdev_err(net, "unable to close device (ret %d).\n", ret); + return ret; } -out: + ret = netvsc_wait_until_empty(nvdev); + if (ret) + netdev_err(net, "Ring buffer not empty after closing rndis\n"); + if (vf_netdev) dev_close(vf_netdev); @@ -845,16 +853,81 @@ static void netvsc_get_channels(struct net_device *net, } } +static int netvsc_detach(struct net_device *ndev, + struct netvsc_device *nvdev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hdev = ndev_ctx->device_ctx; + int ret; + + /* Don't try continuing to try and setup sub channels */ + if (cancel_work_sync(&nvdev->subchan_work)) + nvdev->num_chn = 1; + + /* If device was up (receiving) then shutdown */ + if (netif_running(ndev)) { + netif_tx_disable(ndev); + + ret = rndis_filter_close(nvdev); + if (ret) { + netdev_err(ndev, + "unable to close device (ret %d).\n", ret); + return ret; + } + + ret = netvsc_wait_until_empty(nvdev); + if (ret) { + netdev_err(ndev, + "Ring buffer not empty after closing rndis\n"); + return ret; + } + } + + netif_device_detach(ndev); + + rndis_filter_device_remove(hdev, nvdev); + + return 0; +} + +static int netvsc_attach(struct net_device *ndev, + struct netvsc_device_info *dev_info) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hdev = ndev_ctx->device_ctx; + struct netvsc_device *nvdev; + struct rndis_device *rdev; + int ret; + + nvdev = rndis_filter_device_add(hdev, dev_info); + if (IS_ERR(nvdev)) + return PTR_ERR(nvdev); + + /* Note: enable and attach happen when sub-channels setup */ + + netif_carrier_off(ndev); + + if (netif_running(ndev)) { + ret = rndis_filter_open(nvdev); + if (ret) + return ret; + + rdev = nvdev->extension; + if (!rdev->link_state) + netif_carrier_on(ndev); + } + + return 0; +} + static int netvsc_set_channels(struct net_device *net, struct ethtool_channels *channels) { struct net_device_context *net_device_ctx = netdev_priv(net); - struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); unsigned int orig, count = channels->combined_count; struct netvsc_device_info device_info; - bool was_opened; - int ret = 0; + int ret; /* We do not support separate count for rx, tx, or other */ if (count == 0 || @@ -871,9 +944,6 @@ static int netvsc_set_channels(struct net_device *net, return -EINVAL; orig = nvdev->num_chn; - was_opened = rndis_filter_opened(nvdev); - if (was_opened) - rndis_filter_close(nvdev); memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = count; @@ -882,28 +952,17 @@ static int netvsc_set_channels(struct net_device *net, device_info.recv_sections = nvdev->recv_section_cnt; device_info.recv_section_size = nvdev->recv_section_size; - rndis_filter_device_remove(dev, nvdev); + ret = netvsc_detach(net, nvdev); + if (ret) + return ret; - nvdev = rndis_filter_device_add(dev, &device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); + ret = netvsc_attach(net, &device_info); + if (ret) { device_info.num_chn = orig; - nvdev = rndis_filter_device_add(dev, &device_info); - - if (IS_ERR(nvdev)) { - netdev_err(net, "restoring channel setting failed: %ld\n", - PTR_ERR(nvdev)); - return ret; - } + if (netvsc_attach(net, &device_info)) + netdev_err(net, "restoring channel setting failed\n"); } - if (was_opened) - rndis_filter_open(nvdev); - - /* We may have missed link change notifications */ - net_device_ctx->last_reconfig = 0; - schedule_delayed_work(&net_device_ctx->dwork, 0); - return ret; } @@ -969,10 +1028,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct net_device_context *ndevctx = netdev_priv(ndev); struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); - struct hv_device *hdev = ndevctx->device_ctx; int orig_mtu = ndev->mtu; struct netvsc_device_info device_info; - bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) @@ -985,11 +1042,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) return ret; } - netif_device_detach(ndev); - was_opened = rndis_filter_opened(nvdev); - if (was_opened) - rndis_filter_close(nvdev); - memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = nvdev->num_chn; device_info.send_sections = nvdev->send_section_cnt; @@ -997,35 +1049,27 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) device_info.recv_sections = nvdev->recv_section_cnt; device_info.recv_section_size = nvdev->recv_section_size; - rndis_filter_device_remove(hdev, nvdev); + ret = netvsc_detach(ndev, nvdev); + if (ret) + goto rollback_vf; ndev->mtu = mtu; - nvdev = rndis_filter_device_add(hdev, &device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); - - /* Attempt rollback to original MTU */ - ndev->mtu = orig_mtu; - nvdev = rndis_filter_device_add(hdev, &device_info); - - if (vf_netdev) - dev_set_mtu(vf_netdev, orig_mtu); - - if (IS_ERR(nvdev)) { - netdev_err(ndev, "restoring mtu failed: %ld\n", - PTR_ERR(nvdev)); - return ret; - } - } + ret = netvsc_attach(ndev, &device_info); + if (ret) + goto rollback; - if (was_opened) - rndis_filter_open(nvdev); + return 0; - netif_device_attach(ndev); +rollback: + /* Attempt rollback to original MTU */ + ndev->mtu = orig_mtu; - /* We may have missed link change notifications */ - schedule_delayed_work(&ndevctx->dwork, 0); + if (netvsc_attach(ndev, &device_info)) + netdev_err(ndev, "restoring mtu failed\n"); +rollback_vf: + if (vf_netdev) + dev_set_mtu(vf_netdev, orig_mtu); return ret; } @@ -1531,11 +1575,9 @@ static int netvsc_set_ringparam(struct net_device *ndev, { struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); - struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; struct ethtool_ringparam orig; u32 new_tx, new_rx; - bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) @@ -1560,34 +1602,18 @@ static int netvsc_set_ringparam(struct net_device *ndev, device_info.recv_sections = new_rx; device_info.recv_section_size = nvdev->recv_section_size; - netif_device_detach(ndev); - was_opened = rndis_filter_opened(nvdev); - if (was_opened) - rndis_filter_close(nvdev); - - rndis_filter_device_remove(hdev, nvdev); - - nvdev = rndis_filter_device_add(hdev, &device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); + ret = netvsc_detach(ndev, nvdev); + if (ret) + return ret; + ret = netvsc_attach(ndev, &device_info); + if (ret) { device_info.send_sections = orig.tx_pending; device_info.recv_sections = orig.rx_pending; - nvdev = rndis_filter_device_add(hdev, &device_info); - if (IS_ERR(nvdev)) { - netdev_err(ndev, "restoring ringparam failed: %ld\n", - PTR_ERR(nvdev)); - return ret; - } - } - if (was_opened) - rndis_filter_open(nvdev); - netif_device_attach(ndev); - - /* We may have missed link change notifications */ - ndevctx->last_reconfig = 0; - schedule_delayed_work(&ndevctx->dwork, 0); + if (netvsc_attach(ndev, &device_info)) + netdev_err(ndev, "restoring ringparam failed"); + } return ret; } @@ -2072,8 +2098,8 @@ no_net: static int netvsc_remove(struct hv_device *dev) { struct net_device_context *ndev_ctx; - struct net_device *vf_netdev; - struct net_device *net; + struct net_device *vf_netdev, *net; + struct netvsc_device *nvdev; net = hv_get_drvdata(dev); if (net == NULL) { @@ -2083,10 +2109,14 @@ static int netvsc_remove(struct hv_device *dev) ndev_ctx = netdev_priv(net); - netif_device_detach(net); - cancel_delayed_work_sync(&ndev_ctx->dwork); + rcu_read_lock(); + nvdev = rcu_dereference(ndev_ctx->nvdev); + + if (nvdev) + cancel_work_sync(&nvdev->subchan_work); + /* * Call to the vsc driver to let it know that the device is being * removed. Also blocks mtu and channel changes. @@ -2096,11 +2126,13 @@ static int netvsc_remove(struct hv_device *dev) if (vf_netdev) netvsc_unregister_vf(vf_netdev); + if (nvdev) + rndis_filter_device_remove(dev, nvdev); + unregister_netdevice(net); - rndis_filter_device_remove(dev, - rtnl_dereference(ndev_ctx->nvdev)); rtnl_unlock(); + rcu_read_unlock(); hv_set_drvdata(dev, NULL); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 963314eb3226..a6ec41c399d6 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1118,6 +1118,7 @@ void rndis_set_subchannel(struct work_struct *w) for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ndev_ctx->tx_table[i] = i % nvdev->num_chn; + netif_device_attach(ndev); rtnl_unlock(); return; @@ -1128,6 +1129,8 @@ failed: nvdev->max_chn = 1; nvdev->num_chn = 1; + + netif_device_attach(ndev); unlock: rtnl_unlock(); } @@ -1330,6 +1333,10 @@ out: net_device->num_chn = 1; } + /* No sub channels, device is ready */ + if (net_device->num_chn == 1) + netif_device_attach(net); + return net_device; err_dev_remv: @@ -1342,9 +1349,6 @@ void rndis_filter_device_remove(struct hv_device *dev, { struct rndis_device *rndis_dev = net_dev->extension; - /* Don't try and setup sub channels if about to halt */ - cancel_work_sync(&net_dev->subchan_work); - /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); @@ -1368,10 +1372,3 @@ int rndis_filter_close(struct netvsc_device *nvdev) return rndis_filter_close_device(nvdev->extension); } - -bool rndis_filter_opened(const struct netvsc_device *nvdev) -{ - const struct rndis_device *dev = nvdev->extension; - - return dev->state == RNDIS_DEV_DATAINITIALIZED; -} -- cgit From 40013ff20b1beed31184935fc0aea6a859d4d4ef Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 20 Mar 2018 17:31:10 -0700 Subject: net: dsa: Fix functional dsa-loop dependency on FIXED_PHY We have a functional dependency on the FIXED_PHY MDIO bus because we register fixed PHY devices "the old way" which only works if the code that does this has had a chance to run before the fixed MDIO bus is probed. Make sure we account for that and have dsa_loop_bdinfo.o be either built-in or modular depending on whether CONFIG_FIXED_PHY reflects that too. Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index d040aeb45172..15c2a831edf1 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_NET_DSA_BCM_SF2) += bcm-sf2.o bcm-sf2-objs := bcm_sf2.o bcm_sf2_cfp.o -obj-$(CONFIG_NET_DSA_LOOP) += dsa_loop.o dsa_loop_bdinfo.o +obj-$(CONFIG_NET_DSA_LOOP) += dsa_loop.o +ifdef CONFIG_NET_DSA_LOOP +obj-$(CONFIG_FIXED_PHY) += dsa_loop_bdinfo.o +endif obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o -- cgit From 5dcd8400884cc4a043a6d4617e042489e5d566a9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Mar 2018 11:09:01 +0300 Subject: macsec: missing dev_put() on error in macsec_newlink() We moved the dev_hold(real_dev); call earlier in the function but forgot to update the error paths. Fixes: 0759e552bce7 ("macsec: fix negative refcnt on parent link") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/macsec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 7de88b33d5b9..9cbb0c8a896a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3277,7 +3277,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) - goto unregister; + goto put_dev; /* need to be already registered so that ->init has run and * the MAC addr is set @@ -3316,7 +3316,8 @@ del_dev: macsec_del_dev(macsec); unlink: netdev_upper_dev_unlink(real_dev, dev); -unregister: +put_dev: + dev_put(real_dev); unregister_netdevice(dev); return err; } -- cgit