From e9e7e85d75f3731079ffd77c1a66f037aef04fe7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:20 +0200 Subject: xfrm interface: avoid corruption on changelink The new parameters must not be stored in the netdev_priv() before validation, it may corrupt the interface. Note also that if data is NULL, only a memset() is done. $ ip link add xfrm1 type xfrm dev lo if_id 1 $ ip link add xfrm2 type xfrm dev lo if_id 2 $ ip link set xfrm1 type xfrm dev lo if_id 2 RTNETLINK answers: File exists $ ip -d link list dev xfrm1 5: xfrm1@lo: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 minmtu 68 maxmtu 1500 xfrm if_id 0x2 addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 => "if_id 0x2" Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel Tested-by: Julien Floret Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 74868f9d81fb..2310dc9e35c2 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -671,12 +671,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct xfrm_if *xi = netdev_priv(dev); struct net *net = dev_net(dev); + struct xfrm_if_parms p; + struct xfrm_if *xi; - xfrmi_netlink_parms(data, &xi->p); - - xi = xfrmi_locate(net, &xi->p); + xfrmi_netlink_parms(data, &p); + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); } else { @@ -684,7 +684,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; } - return xfrmi_update(xi, &xi->p); + return xfrmi_update(xi, &p); } static size_t xfrmi_get_size(const struct net_device *dev) -- cgit From e0aaa332e6a97dae57ad59cdb19e21f83c3d081c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:21 +0200 Subject: xfrm interface: ifname may be wrong in logs The ifname is copied when the interface is created, but is never updated later. In fact, this property is used only in one error message, where the netdevice pointer is available, thus let's use it. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/xfrm/xfrm_interface.c | 10 +--------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b22db30c3d88..ad761ef84797 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -983,7 +983,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { - char name[IFNAMSIZ]; /* name of XFRM device */ int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ }; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 2310dc9e35c2..68336ee00d72 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -145,8 +145,6 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - strcpy(xi->p.name, dev->name); - dev_hold(dev); xfrmi_link(xfrmn, xi); @@ -294,7 +292,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (tdev == dev) { stats->collisions++; net_warn_ratelimited("%s: Local routing loop detected!\n", - xi->p.name); + dev->name); goto tx_err_dst_release; } @@ -638,12 +636,6 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, int err; xfrmi_netlink_parms(data, &p); - - if (!tb[IFLA_IFNAME]) - return -EINVAL; - - nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ); - xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; -- cgit From c5d1030f23002430c2a336b2b629b9d6f72b3564 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:22 +0200 Subject: xfrm interface: fix list corruption for x-netns dev_net(dev) is the netns of the device and xi->net is the link netns, where the device has been linked. changelink() must operate in the link netns to avoid a corruption of the xfrm lists. Note that xi->net and dev_net(xi->physdev) are always the same. Before the patch, the xfrmi lists may be corrupted and can later trigger a kernel panic. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: Julien Floret Signed-off-by: Nicolas Dichtel Tested-by: Julien Floret Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 68336ee00d72..53e5e47b2c55 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -503,7 +503,7 @@ static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p) static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) { - struct net *net = dev_net(xi->dev); + struct net *net = xi->net; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); int err; @@ -663,9 +663,9 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); + struct xfrm_if *xi = netdev_priv(dev); + struct net *net = xi->net; struct xfrm_if_parms p; - struct xfrm_if *xi; xfrmi_netlink_parms(data, &p); xi = xfrmi_locate(net, &p); @@ -707,7 +707,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return dev_net(xi->phydev); + return xi->net; } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { -- cgit From 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:23 +0200 Subject: xfrm interface: fix management of phydev With the current implementation, phydev cannot be removed: $ ip link add dummy type dummy $ ip link add xfrm1 type xfrm dev dummy if_id 1 $ ip l d dummy kernel:[77938.465445] unregister_netdevice: waiting for dummy to become free. Usage count = 1 Manage it like in ip tunnels, ie just keep the ifindex. Not that the side effect, is that the phydev is now optional. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel Tested-by: Julien Floret Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/xfrm/xfrm_interface.c | 32 +++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ad761ef84797..aa08a7a5f6ac 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -990,7 +990,6 @@ struct xfrm_if_parms { struct xfrm_if { struct xfrm_if __rcu *next; /* next interface in list */ struct net_device *dev; /* virtual device associated with interface */ - struct net_device *phydev; /* physical device */ struct net *net; /* netns for packet i/o */ struct xfrm_if_parms p; /* interface parms */ diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 53e5e47b2c55..2ab4859df55a 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -175,7 +175,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); xfrmi_unlink(xfrmn, xi); - dev_put(xi->phydev); dev_put(dev); } @@ -362,7 +361,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } - fl.flowi_oif = xi->phydev->ifindex; + fl.flowi_oif = xi->p.link; ret = xfrmi_xmit2(skb, dev, &fl); if (ret < 0) @@ -548,7 +547,7 @@ static int xfrmi_get_iflink(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return xi->phydev->ifindex; + return xi->p.link; } @@ -574,12 +573,14 @@ static void xfrmi_dev_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = xfrmi_dev_free; netif_keep_dst(dev); + + eth_broadcast_addr(dev->broadcast); } static int xfrmi_dev_init(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device *phydev = xi->phydev; + struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link); int err; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -594,13 +595,19 @@ static int xfrmi_dev_init(struct net_device *dev) dev->features |= NETIF_F_LLTX; - dev->needed_headroom = phydev->needed_headroom; - dev->needed_tailroom = phydev->needed_tailroom; + if (phydev) { + dev->needed_headroom = phydev->needed_headroom; + dev->needed_tailroom = phydev->needed_tailroom; - if (is_zero_ether_addr(dev->dev_addr)) - eth_hw_addr_inherit(dev, phydev); - if (is_zero_ether_addr(dev->broadcast)) - memcpy(dev->broadcast, phydev->broadcast, dev->addr_len); + if (is_zero_ether_addr(dev->dev_addr)) + eth_hw_addr_inherit(dev, phydev); + if (is_zero_ether_addr(dev->broadcast)) + memcpy(dev->broadcast, phydev->broadcast, + dev->addr_len); + } else { + eth_hw_addr_random(dev); + eth_broadcast_addr(dev->broadcast); + } return 0; } @@ -644,13 +651,8 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, xi->p = p; xi->net = net; xi->dev = dev; - xi->phydev = dev_get_by_index(net, p.link); - if (!xi->phydev) - return -ENODEV; err = xfrmi_create(dev); - if (err < 0) - dev_put(xi->phydev); return err; } -- cgit From 769a807d0b41df4201dbeb01c22eaeb3e5905532 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 12 Aug 2019 10:32:13 +0200 Subject: xfrm: policy: avoid warning splat when merging nodes syzbot reported a splat: xfrm_policy_inexact_list_reinsert+0x625/0x6e0 net/xfrm/xfrm_policy.c:877 CPU: 1 PID: 6756 Comm: syz-executor.1 Not tainted 5.3.0-rc2+ #57 Call Trace: xfrm_policy_inexact_node_reinsert net/xfrm/xfrm_policy.c:922 [inline] xfrm_policy_inexact_node_merge net/xfrm/xfrm_policy.c:958 [inline] xfrm_policy_inexact_insert_node+0x537/0xb50 net/xfrm/xfrm_policy.c:1023 xfrm_policy_inexact_alloc_chain+0x62b/0xbd0 net/xfrm/xfrm_policy.c:1139 xfrm_policy_inexact_insert+0xe8/0x1540 net/xfrm/xfrm_policy.c:1182 xfrm_policy_insert+0xdf/0xce0 net/xfrm/xfrm_policy.c:1574 xfrm_add_policy+0x4cf/0x9b0 net/xfrm/xfrm_user.c:1670 xfrm_user_rcv_msg+0x46b/0x720 net/xfrm/xfrm_user.c:2676 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2477 xfrm_netlink_rcv+0x74/0x90 net/xfrm/xfrm_user.c:2684 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x809/0x9a0 net/netlink/af_netlink.c:1328 netlink_sendmsg+0xa70/0xd30 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] There is no reproducer, however, the warning can be reproduced by adding rules with ever smaller prefixes. The sanity check ("does the policy match the node") uses the prefix value of the node before its updated to the smaller value. To fix this, update the prefix earlier. The bug has no impact on tree correctness, this is only to prevent a false warning. Reported-by: syzbot+8cc27ace5f6972910b31@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 6 ++++-- tools/testing/selftests/net/xfrm_policy.sh | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8ca637a72697..0fa7c5ce3b2c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -912,6 +912,7 @@ restart: } else if (delta > 0) { p = &parent->rb_right; } else { + bool same_prefixlen = node->prefixlen == n->prefixlen; struct xfrm_policy *tmp; hlist_for_each_entry(tmp, &n->hhead, bydst) { @@ -919,9 +920,11 @@ restart: hlist_del_rcu(&tmp->bydst); } + node->prefixlen = prefixlen; + xfrm_policy_inexact_list_reinsert(net, node, family); - if (node->prefixlen == n->prefixlen) { + if (same_prefixlen) { kfree_rcu(n, rcu); return; } @@ -929,7 +932,6 @@ restart: rb_erase(*p, new); kfree_rcu(n, rcu); n = node; - n->prefixlen = prefixlen; goto restart; } } diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 5445943bf07f..7a1bf94c5bd3 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -106,6 +106,13 @@ do_overlap() # # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23. ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block + + # similar to above: add policies (with partially random address), with shrinking prefixes. + for p in 29 28 27;do + for k in $(seq 1 32); do + ip -net $ns xfrm policy add src 10.253.1.$((RANDOM%255))/$p dst 10.254.1.$((RANDOM%255))/$p dir fwd priority $((200+k)) action block 2>/dev/null + done + done } do_esp_policy_get_check() { -- cgit From 1ffdb51f28e8ec6be0a2b812c1765b5cf5c44a8f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Aug 2019 12:04:08 -0500 Subject: Revert "Bluetooth: btusb: driver to enable the usb-wakeup feature" This reverts commit a0085f2510e8976614ad8f766b209448b385492f. This commit has caused regressions in notebooks that support suspend to idle such as the XPS 9360, XPS 9370 and XPS 9380. These notebooks will wakeup from suspend to idle from an unsolicited advertising packet from an unpaired BLE device. In a bug report it was sugggested that this is caused by a generic lack of LE privacy support. Revert this commit until that behavior can be avoided by the kernel. Fixes: a0085f2510e8 ("Bluetooth: btusb: driver to enable the usb-wakeup feature") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=200039 Link: https://marc.info/?l=linux-bluetooth&m=156441081612627&w=2 Link: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/750073/ CC: Bastien Nocera CC: Christian Kellner CC: Sukumar Ghorai Signed-off-by: Mario Limonciello Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5cf0734eb31b..5c67d41ca254 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1170,10 +1170,6 @@ static int btusb_open(struct hci_dev *hdev) } data->intf->needs_remote_wakeup = 1; - /* device specific wakeup source enabled and required for USB - * remote wakeup while host is suspended - */ - device_wakeup_enable(&data->udev->dev); if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags)) goto done; @@ -1238,7 +1234,6 @@ static int btusb_close(struct hci_dev *hdev) goto failed; data->intf->needs_remote_wakeup = 0; - device_wakeup_disable(&data->udev->dev); usb_autopm_put_interface(data->intf); failed: -- cgit From e33b4325e60e146c2317a8b548cbd633239ff83b Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Fri, 30 Aug 2019 19:00:48 -0700 Subject: net: stmmac: dwmac-sun8i: Variable "val" in function sun8i_dwmac_set_syscon() could be uninitialized In function sun8i_dwmac_set_syscon(), local variable "val" could be uninitialized if function regmap_field_read() returns -EINVAL. However, it will be used directly in the if statement, which is potentially unsafe. Signed-off-by: Yizhuo Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 4083019c547a..f97a4096f8fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -873,7 +873,12 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) int ret; u32 reg, val; - regmap_field_read(gmac->regmap_field, &val); + ret = regmap_field_read(gmac->regmap_field, &val); + if (ret) { + dev_err(priv->device, "Fail to read from regmap field.\n"); + return ret; + } + reg = gmac->variant->default_syscon_value; if (reg != val) dev_warn(priv->device, -- cgit From 48bd0d68cd4b849c0bb99dc1fb01b013ad4aa942 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Sat, 31 Aug 2019 01:40:33 -0300 Subject: netfilter: bridge: Drops IPv6 packets if IPv6 module is not loaded A kernel panic can happen if a host has disabled IPv6 on boot and have to process guest packets (coming from a bridge) using it's ip6tables. IPv6 packets need to be dropped if the IPv6 module is not loaded, and the host ip6tables will be used. Signed-off-by: Leonardo Bras Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d3f9592f4ff8..af7800103e51 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -496,6 +496,10 @@ static unsigned int br_nf_pre_routing(void *priv, if (!brnet->call_ip6tables && !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) return NF_ACCEPT; + if (!ipv6_mod_enabled()) { + pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); + return NF_DROP; + } nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(priv, skb, state); -- cgit From 039b1f4f24ecc8493b6bb9d70b4b78750d1b35c2 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Sun, 1 Sep 2019 11:48:08 +0200 Subject: netfilter: nft_socket: fix erroneous socket assignment The socket assignment is wrong, see skb_orphan(): When skb->destructor callback is not set, but skb->sk is set, this hits BUG(). Link: https://bugzilla.redhat.com/show_bug.cgi?id=1651813 Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index d7f3776dfd71..637ce3e8c575 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -47,9 +47,6 @@ static void nft_socket_eval(const struct nft_expr *expr, return; } - /* So that subsequent socket matching not to require other lookups. */ - skb->sk = sk; - switch(priv->key) { case NFT_SOCKET_TRANSPARENT: nft_reg_store8(dest, inet_sk_transparent(sk)); @@ -66,6 +63,9 @@ static void nft_socket_eval(const struct nft_expr *expr, WARN_ON(1); regs->verdict.code = NFT_BREAK; } + + if (sk != skb->sk) + sock_gen_put(sk); } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { -- cgit From 34b0e9b767bfa09ae233ca0d6ceb299bf2e24600 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 13 Aug 2019 15:36:40 +0200 Subject: mt76: mt76x0e: don't use hw encryption for MT7630E Since 41634aa8d6db ("mt76: only schedule txqs from the tx tasklet") I can observe firmware hangs on MT7630E on station mode: tx stop functioning after minor activity (rx keep working) and on module unload device fail to stop with messages: [ 5446.141413] mt76x0e 0000:06:00.0: TX DMA did not stop [ 5449.176764] mt76x0e 0000:06:00.0: TX DMA did not stop Loading module again results in failure to associate with AP. Only machine power off / power on cycle can make device work again. It's unclear why commit 41634aa8d6db causes the problem, but it is related to HW encryption. Since issue is a firmware hang, that is super hard to debug, just disable HW encryption as fix for the issue. Fixes: 41634aa8d6db ("mt76: only schedule txqs from the tx tasklet") Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 4585e1b756c2..6117e6ca08cb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -62,6 +62,19 @@ static void mt76x0e_stop(struct ieee80211_hw *hw) mt76x0e_stop_hw(dev); } +static int +mt76x0e_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, + struct ieee80211_vif *vif, struct ieee80211_sta *sta, + struct ieee80211_key_conf *key) +{ + struct mt76x02_dev *dev = hw->priv; + + if (is_mt7630(dev)) + return -EOPNOTSUPP; + + return mt76x02_set_key(hw, cmd, vif, sta, key); +} + static void mt76x0e_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) @@ -78,7 +91,7 @@ static const struct ieee80211_ops mt76x0e_ops = { .configure_filter = mt76x02_configure_filter, .bss_info_changed = mt76x02_bss_info_changed, .sta_state = mt76_sta_state, - .set_key = mt76x02_set_key, + .set_key = mt76x0e_set_key, .conf_tx = mt76x02_conf_tx, .sw_scan_start = mt76x02_sw_scan, .sw_scan_complete = mt76x02_sw_scan_complete, -- cgit From 70702265a04aa0ce5a7bde77d13456209992b32f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 13 Aug 2019 15:36:56 +0200 Subject: mt76: mt76x0e: disable 5GHz band for MT7630E MT7630E hardware does support 5GHz, but we do not properly configure phy for 5GHz channels. Scanning at this band not only do not show any APs but also can hang the firmware. Since vendor reference driver do not support 5GHz we don't know how properly configure 5GHz channels. So disable this band for MT7630E . Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index 40c0d536e20d..9d4426f6905f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -59,6 +59,11 @@ static void mt76x0_set_chip_cap(struct mt76x02_dev *dev) dev_dbg(dev->mt76.dev, "mask out 2GHz support\n"); } + if (is_mt7630(dev)) { + dev->mt76.cap.has_5ghz = false; + dev_dbg(dev->mt76.dev, "mask out 5GHz support\n"); + } + if (!mt76x02_field_valid(nic_conf1 & 0xff)) nic_conf1 &= 0xff00; -- cgit From 7caac62ed598a196d6ddf8d9c121e12e082cac3a Mon Sep 17 00:00:00 2001 From: Wen Huang Date: Wed, 28 Aug 2019 10:07:51 +0800 Subject: mwifiex: Fix three heap overflow at parsing element in cfg80211_ap_settings mwifiex_update_vs_ie(),mwifiex_set_uap_rates() and mwifiex_set_wmm_params() call memcpy() without checking the destination size.Since the source is given from user-space, this may trigger a heap buffer overflow. Fix them by putting the length check before performing memcpy(). This fix addresses CVE-2019-14814,CVE-2019-14815,CVE-2019-14816. Signed-off-by: Wen Huang Acked-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/ie.c | 3 +++ drivers/net/wireless/marvell/mwifiex/uap_cmd.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/ie.c b/drivers/net/wireless/marvell/mwifiex/ie.c index 653d347a9a19..580387f9f12a 100644 --- a/drivers/net/wireless/marvell/mwifiex/ie.c +++ b/drivers/net/wireless/marvell/mwifiex/ie.c @@ -241,6 +241,9 @@ static int mwifiex_update_vs_ie(const u8 *ies, int ies_len, } vs_ie = (struct ieee_types_header *)vendor_ie; + if (le16_to_cpu(ie->ie_length) + vs_ie->len + 2 > + IEEE_MAX_IE_SIZE) + return -EINVAL; memcpy(ie->ie_buffer + le16_to_cpu(ie->ie_length), vs_ie, vs_ie->len + 2); le16_unaligned_add_cpu(&ie->ie_length, vs_ie->len + 2); diff --git a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c index 18f7d9bf30b2..0939a8c8f3ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c @@ -265,6 +265,8 @@ mwifiex_set_uap_rates(struct mwifiex_uap_bss_param *bss_cfg, rate_ie = (void *)cfg80211_find_ie(WLAN_EID_SUPP_RATES, var_pos, len); if (rate_ie) { + if (rate_ie->len > MWIFIEX_SUPPORTED_RATES) + return; memcpy(bss_cfg->rates, rate_ie + 1, rate_ie->len); rate_len = rate_ie->len; } @@ -272,8 +274,11 @@ mwifiex_set_uap_rates(struct mwifiex_uap_bss_param *bss_cfg, rate_ie = (void *)cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, params->beacon.tail, params->beacon.tail_len); - if (rate_ie) + if (rate_ie) { + if (rate_ie->len > MWIFIEX_SUPPORTED_RATES - rate_len) + return; memcpy(bss_cfg->rates + rate_len, rate_ie + 1, rate_ie->len); + } return; } @@ -391,6 +396,8 @@ mwifiex_set_wmm_params(struct mwifiex_private *priv, params->beacon.tail_len); if (vendor_ie) { wmm_ie = vendor_ie; + if (*(wmm_ie + 1) > sizeof(struct mwifiex_types_wmm_info)) + return; memcpy(&bss_cfg->wmm_info, wmm_ie + sizeof(struct ieee_types_header), *(wmm_ie + 1)); priv->wmm_enabled = 1; -- cgit From 968dcfb4905245dc64d65312c0d17692fa087b99 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 29 Aug 2019 11:13:46 +0300 Subject: iwlwifi: assign directly to iwl_trans->cfg in QuZ detection We were erroneously assigning the new configuration to a local variable cfg, but that was not being assigned to anything, so the change was getting lost. Assign directly to iwl_trans->cfg instead. Fixes: 5a8c31aa6357 ("iwlwifi: pcie: fix recognition of QuZ devices") Cc: stable@vger.kernel.org # 5.2 Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index d9ed53b7c768..3b12e7ad35e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1070,18 +1070,18 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* same thing for QuZ... */ if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QUZ) { - if (cfg == &iwl_ax101_cfg_qu_hr) - cfg = &iwl_ax101_cfg_quz_hr; - else if (cfg == &iwl_ax201_cfg_qu_hr) - cfg = &iwl_ax201_cfg_quz_hr; - else if (cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0) - cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc; - else if (cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0) - cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc; - else if (cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0) - cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc; - else if (cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) - cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc; + if (iwl_trans->cfg == &iwl_ax101_cfg_qu_hr) + iwl_trans->cfg = &iwl_ax101_cfg_quz_hr; + else if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr) + iwl_trans->cfg = &iwl_ax201_cfg_quz_hr; + else if (iwl_trans->cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc; + else if (iwl_trans->cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc; + else if (iwl_trans->cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc; + else if (iwl_trans->cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc; } #endif -- cgit From 14d5e14c8a6c257eb322ddeb294ac4c243a7d2e1 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 23 Aug 2019 14:48:03 +0200 Subject: rt2x00: clear up IV's on key removal After looking at code I realized that my previous fix 95844124385e ("rt2x00: clear IV's on start to fix AP mode regression") was incomplete. We can still have wrong IV's after re-keyring. To fix that, clear up IV's also on key removal. Fixes: 710e6cc1595e ("rt2800: do not nullify initialization vector data") Signed-off-by: Stanislaw Gruszka tested-by: Emil Karlson Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index ecbe78b8027b..28e2de04834e 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -1654,13 +1654,18 @@ static void rt2800_config_wcid_attr_cipher(struct rt2x00_dev *rt2x00dev, offset = MAC_IVEIV_ENTRY(key->hw_key_idx); - rt2800_register_multiread(rt2x00dev, offset, - &iveiv_entry, sizeof(iveiv_entry)); - if ((crypto->cipher == CIPHER_TKIP) || - (crypto->cipher == CIPHER_TKIP_NO_MIC) || - (crypto->cipher == CIPHER_AES)) - iveiv_entry.iv[3] |= 0x20; - iveiv_entry.iv[3] |= key->keyidx << 6; + if (crypto->cmd == SET_KEY) { + rt2800_register_multiread(rt2x00dev, offset, + &iveiv_entry, sizeof(iveiv_entry)); + if ((crypto->cipher == CIPHER_TKIP) || + (crypto->cipher == CIPHER_TKIP_NO_MIC) || + (crypto->cipher == CIPHER_AES)) + iveiv_entry.iv[3] |= 0x20; + iveiv_entry.iv[3] |= key->keyidx << 6; + } else { + memset(&iveiv_entry, 0, sizeof(iveiv_entry)); + } + rt2800_register_multiwrite(rt2x00dev, offset, &iveiv_entry, sizeof(iveiv_entry)); } -- cgit From 13fa451568ab9e8b3074ef741477c7938c713c42 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 29 Aug 2019 13:29:59 +0200 Subject: Revert "rt2800: enable TX_PIN_CFG_LNA_PE_ bits per band" This reverts commit 9ad3b55654455258a9463384edb40077439d879f. As reported by Sergey: "I got some problem after upgrade kernel to 5.2 version (debian testing linux-image-5.2.0-2-amd64). 5Ghz client stopped to see AP. Some tests with 1metre distance between client-AP: 2.4Ghz -22dBm, for 5Ghz - 53dBm !, for longer distance (8m + walls) 2.4 - 61dBm, 5Ghz not visible." It was identified that rx signal level degradation was caused by 9ad3b5565445 ("rt2800: enable TX_PIN_CFG_LNA_PE_ bits per band"). So revert this commit. Cc: # v5.1+ Reported-and-tested-by: Sergey Maranchuk Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 28e2de04834e..f1cdcd61c54a 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -4242,24 +4242,18 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev, switch (rt2x00dev->default_ant.rx_chain_num) { case 3: /* Turn on tertiary LNAs */ - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A2_EN, - rf->channel > 14); - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G2_EN, - rf->channel <= 14); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A2_EN, 1); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G2_EN, 1); /* fall-through */ case 2: /* Turn on secondary LNAs */ - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A1_EN, - rf->channel > 14); - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G1_EN, - rf->channel <= 14); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A1_EN, 1); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G1_EN, 1); /* fall-through */ case 1: /* Turn on primary LNAs */ - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A0_EN, - rf->channel > 14); - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G0_EN, - rf->channel <= 14); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A0_EN, 1); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G0_EN, 1); break; } -- cgit From 8b51dc7291473093c821195c4b6af85fadedbc2f Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Mon, 19 Aug 2019 18:02:29 -0400 Subject: rsi: fix a double free bug in rsi_91x_deinit() `dev` (struct rsi_91x_usbdev *) field of adapter (struct rsi_91x_usbdev *) is allocated and initialized in `rsi_init_usb_interface`. If any error is detected in information read from the device side, `rsi_init_usb_interface` will be freed. However, in the higher level error handling code in `rsi_probe`, if error is detected, `rsi_91x_deinit` is called again, in which `dev` will be freed again, resulting double free. This patch fixes the double free by removing the free operation on `dev` in `rsi_init_usb_interface`, because `rsi_91x_deinit` is also used in `rsi_disconnect`, in that code path, the `dev` field is not (and thus needs to be) freed. This bug was found in v4.19, but is also present in the latest version of kernel. Fixes CVE-2019-15504. Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Reviewed-by: Guenter Roeck Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_usb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index f5048d4b8cb6..760eaffeebd6 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -645,7 +645,6 @@ fail_rx: kfree(rsi_dev->tx_buffer); fail_eps: - kfree(rsi_dev); return status; } -- cgit From 88209141392a4a2521a2f67c13d7db5e84efbb58 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Fri, 30 Aug 2019 15:13:53 -0300 Subject: netfilter: nft_fib_netdev: Terminate rule eval if protocol=IPv6 and ipv6 module is disabled If IPv6 is disabled on boot (ipv6.disable=1), but nft_fib_inet ends up dealing with a IPv6 packet, it causes a kernel panic in fib6_node_lookup_1(), crashing in bad_page_fault. The panic is caused by trying to deference a very low address (0x38 in ppc64le), due to ipv6.fib6_main_tbl = NULL. BUG: Kernel NULL pointer dereference at 0x00000038 The kernel panic was reproduced in a host that disabled IPv6 on boot and have to process guest packets (coming from a bridge) using it's ip6tables. Terminate rule evaluation when packet protocol is IPv6 but the ipv6 module is not loaded. Signed-off-by: Leonardo Bras Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_fib_netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index 2cf3f32fe6d2..a2e726ae7f07 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -34,6 +35,8 @@ static void nft_fib_netdev_eval(const struct nft_expr *expr, } break; case ETH_P_IPV6: + if (!ipv6_mod_enabled()) + break; switch (priv->result) { case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: -- cgit From b067fa009c884401d23846251031c1f14d8a9c77 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Sep 2019 19:37:42 +0200 Subject: netfilter: ctnetlink: honor IPS_OFFLOAD flag If this flag is set, timeout and state are irrelevant to userspace. Fixes: 90964016e5d3 ("netfilter: nf_conntrack: add IPS_OFFLOAD status bit") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6aa01eb6fe99..e2d13cd18875 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -553,10 +553,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_timeout(skb, ct) < 0 || ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || - ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || @@ -568,6 +566,11 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_ct_synproxy(skb, ct) < 0) goto nla_put_failure; + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) && + (ctnetlink_dump_timeout(skb, ct) < 0 || + ctnetlink_dump_protoinfo(skb, ct) < 0)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return skb->len; -- cgit From 110e48725db6262f260f10727d0fb2d3d25895e4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Sep 2019 19:37:43 +0200 Subject: netfilter: nf_flow_table: set default timeout after successful insertion Set up the default timeout for this new entry otherwise the garbage collector might quickly remove it right after the flowtable insertion. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 80a8f9ae4c93..a0b4bf654de2 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -217,7 +217,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) return err; } - flow->timeout = (u32)jiffies; + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); -- cgit From 6d0762b19c5963ff9e178e8af3626532ee04d93d Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 3 Sep 2019 17:10:42 +0800 Subject: Bluetooth: btrtl: Additional Realtek 8822CE Bluetooth devices The ASUS X412FA laptop contains a Realtek RTL8822CE device with an associated BT chip using a USB ID of 04ca:4005. This ID is added to the driver. The /sys/kernel/debug/usb/devices portion for this device is: T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=04 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=4005 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=204707 Signed-off-by: Jian-Hong Pan Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5c67d41ca254..ba4149054304 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -384,6 +384,9 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3526), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8822CE Bluetooth devices */ + { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK }, + /* Silicon Wave based devices */ { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, -- cgit From 31fb1bbdabb34d53e3d83f0353085d9b79977786 Mon Sep 17 00:00:00 2001 From: Harish Bandi Date: Wed, 4 Sep 2019 10:04:16 +0530 Subject: Bluetooth: hci_qca: disable irqs when spinlock is acquired Looks like Deadlock is observed in hci_qca while performing stress and stability tests. Since same lock is getting acquired from qca_wq_awake_rx and hci_ibs_tx_idle_timeout seeing spinlock recursion, irqs should be disable while acquiring the spinlock always. Signed-off-by: Harish Bandi Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 9a970fd1975a..31dec435767b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -309,13 +309,14 @@ static void qca_wq_awake_device(struct work_struct *work) ws_awake_device); struct hci_uart *hu = qca->hu; unsigned long retrans_delay; + unsigned long flags; BT_DBG("hu %p wq awake device", hu); /* Vote for serial clock */ serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_ON, hu); - spin_lock(&qca->hci_ibs_lock); + spin_lock_irqsave(&qca->hci_ibs_lock, flags); /* Send wake indication to device */ if (send_hci_ibs_cmd(HCI_IBS_WAKE_IND, hu) < 0) @@ -327,7 +328,7 @@ static void qca_wq_awake_device(struct work_struct *work) retrans_delay = msecs_to_jiffies(qca->wake_retrans); mod_timer(&qca->wake_retrans_timer, jiffies + retrans_delay); - spin_unlock(&qca->hci_ibs_lock); + spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); /* Actually send the packets */ hci_uart_tx_wakeup(hu); @@ -338,12 +339,13 @@ static void qca_wq_awake_rx(struct work_struct *work) struct qca_data *qca = container_of(work, struct qca_data, ws_awake_rx); struct hci_uart *hu = qca->hu; + unsigned long flags; BT_DBG("hu %p wq awake rx", hu); serial_clock_vote(HCI_IBS_RX_VOTE_CLOCK_ON, hu); - spin_lock(&qca->hci_ibs_lock); + spin_lock_irqsave(&qca->hci_ibs_lock, flags); qca->rx_ibs_state = HCI_IBS_RX_AWAKE; /* Always acknowledge device wake up, @@ -354,7 +356,7 @@ static void qca_wq_awake_rx(struct work_struct *work) qca->ibs_sent_wacks++; - spin_unlock(&qca->hci_ibs_lock); + spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); /* Actually send the packets */ hci_uart_tx_wakeup(hu); -- cgit From d94dfd798c4839b642f534580109dc6dfc3901a9 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 31 Aug 2019 14:23:40 -0500 Subject: Bluetooth: bpa10x: change return value When returning from bpa10x_send_frame, it is necessary to propagate any potential errno returned from usb_submit_urb. Signed-off-by: Navid Emamdoost Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bpa10x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index a0e84538cec8..1fa58c059cbf 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -337,7 +337,7 @@ static int bpa10x_send_frame(struct hci_dev *hdev, struct sk_buff *skb) usb_free_urb(urb); - return 0; + return err; } static int bpa10x_set_diag(struct hci_dev *hdev, bool enable) -- cgit From 591328948ba60423e28d000da99d34f7586de54a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Sep 2019 11:34:08 +0100 Subject: rxrpc: Fix misplaced traceline There's a misplaced traceline in rxrpc_input_packet() which is looking at a packet that just got released rather than the replacement packet. Fix this by moving the traceline after the assignment that moves the new packet pointer to the actual packet pointer. Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") Reported-by: Hillf Danton Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d122c53c8697..157be1ff8697 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1262,8 +1262,8 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) if (nskb != skb) { rxrpc_eaten_skb(skb, rxrpc_skb_received); - rxrpc_new_skb(skb, rxrpc_skb_unshared); skb = nskb; + rxrpc_new_skb(skb, rxrpc_skb_unshared); sp = rxrpc_skb(skb); } } -- cgit From 10eb56c582c557c629271f1ee31e15e7a9b2558b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 2 Sep 2019 23:24:21 +0800 Subject: sctp: use transport pf_retrans in sctp_do_8_2_transport_strike Transport should use its own pf_retrans to do the error_count check, instead of asoc's. Otherwise, it's meaningless to make pf_retrans per transport. Fixes: 5aa93bcf66f4 ("sctp: Implement quick failover draft from tsvwg") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1cf5bb5b73c4..e52b2128e43b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -547,7 +547,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, if (net->sctp.pf_enable && (transport->state == SCTP_ACTIVE) && (transport->error_count < transport->pathmaxrxt) && - (transport->error_count > asoc->pf_retrans)) { + (transport->error_count > transport->pf_retrans)) { sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_PF, -- cgit From d55a2e374a94fa34a3048c6a2be535266e506d97 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Mon, 2 Sep 2019 09:23:36 -0700 Subject: net-ipv6: fix excessive RTF_ADDRCONF flag on ::1/128 local route (and others) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a subtle change in behaviour introduced by: commit c7a1ce397adacaf5d4bb2eab0a738b5f80dc3e43 'ipv6: Change addrconf_f6i_alloc to use ip6_route_info_create' Before that patch /proc/net/ipv6_route includes: 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo Afterwards /proc/net/ipv6_route includes: 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80240001 lo ie. the above commit causes the ::1/128 local (automatic) route to be flagged with RTF_ADDRCONF (0x040000). AFAICT, this is incorrect since these routes are *not* coming from RA's. As such, this patch restores the old behaviour. Fixes: c7a1ce397ada ("ipv6: Change addrconf_f6i_alloc to use ip6_route_info_create") Cc: David Ahern Cc: Lorenzo Colitti Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fd059e08785a..2927e72e511a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4388,13 +4388,14 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, struct fib6_config cfg = { .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, .fc_ifindex = idev->dev->ifindex, - .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP, + .fc_flags = RTF_UP | RTF_NONEXTHOP, .fc_dst = *addr, .fc_dst_len = 128, .fc_protocol = RTPROT_KERNEL, .fc_nlinfo.nl_net = net, .fc_ignore_dev_down = true, }; + struct fib6_info *f6i; if (anycast) { cfg.fc_type = RTN_ANYCAST; @@ -4404,7 +4405,10 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, cfg.fc_flags |= RTF_LOCAL; } - return ip6_route_info_create(&cfg, gfp_flags, NULL); + f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); + if (f6i) + f6i->dst_nocount = true; + return f6i; } /* remove deleted ip from prefsrc entries */ -- cgit From 68d19d7d995759b96169da5aac313363f92a9075 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Sep 2019 20:13:08 +0200 Subject: Revert "Bluetooth: validate BLE connection interval updates" This reverts commit c49a8682fc5d298d44e8d911f4fa14690ea9485e. There are devices which require low connection intervals for usable operation including keyboards and mice. Forcing a static connection interval for these types of devices has an impact in latency and causes a regression. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_event.c | 5 ----- net/bluetooth/l2cap_core.c | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cdb00c2ef242..c1d3a303d97f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5660,11 +5660,6 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); - if (min < hcon->le_conn_min_interval || - max > hcon->le_conn_max_interval) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); - if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dfc1edb168b7..da7fdbdf9c41 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5305,14 +5305,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (min < hcon->le_conn_min_interval || - max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else -- cgit From 42dec1dbe38239cf91cc1f4df7830c66276ced37 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Sep 2019 17:53:12 +0800 Subject: tipc: add NULL pointer check before calling kfree_rcu Unlike kfree(p), kfree_rcu(p, rcu) won't do NULL pointer check. When tipc_nametbl_remove_publ returns NULL, the panic below happens: BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 RIP: 0010:__call_rcu+0x1d/0x290 Call Trace: tipc_publ_notify+0xa9/0x170 [tipc] tipc_node_write_unlock+0x8d/0x100 [tipc] tipc_node_link_down+0xae/0x1d0 [tipc] tipc_node_check_dest+0x3ea/0x8f0 [tipc] ? tipc_disc_rcv+0x2c7/0x430 [tipc] tipc_disc_rcv+0x2c7/0x430 [tipc] ? tipc_rcv+0x6bb/0xf20 [tipc] tipc_rcv+0x6bb/0xf20 [tipc] ? ip_route_input_slow+0x9cf/0xb10 tipc_udp_recv+0x195/0x1e0 [tipc] ? tipc_udp_is_known_peer+0x80/0x80 [tipc] udp_queue_rcv_skb+0x180/0x460 udp_unicast_rcv_skb.isra.56+0x75/0x90 __udp4_lib_rcv+0x4ce/0xb90 ip_local_deliver_finish+0x11c/0x210 ip_local_deliver+0x6b/0xe0 ? ip_rcv_finish+0xa9/0x410 ip_rcv+0x273/0x362 Fixes: 97ede29e80ee ("tipc: convert name table read-write lock to RCU") Reported-by: Li Shuang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 44abc8e9c990..241ed2274473 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -223,7 +223,8 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) publ->key); } - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } /** -- cgit From ebe26aca98fcf9fbe5017b5cbe216413cee69df5 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 3 Sep 2019 11:46:52 -0700 Subject: net: fixed_phy: Add forward declaration for struct gpio_desc; Add forward declaration for struct gpio_desc in order to address the following: ./include/linux/phy_fixed.h:48:17: error: 'struct gpio_desc' declared inside parameter list [-Werror] ./include/linux/phy_fixed.h:48:17: error: its scope is only this definition or declaration, which is probably not what you want [-Werror] Fixes: 71bd106d2567 ("net: fixed-phy: Add fixed_phy_register_with_gpiod() API") Signed-off-by: Moritz Fischer Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 1e5d86ebdaeb..52bc8e487ef7 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -11,6 +11,7 @@ struct fixed_phy_status { }; struct device_node; +struct gpio_desc; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -- cgit From 44580a0118d3ede95fec4dce32df5f75f73cd663 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 3 Sep 2019 13:24:50 -0700 Subject: net: sock_map, fix missing ulp check in sock hash case sock_map and ULP only work together when ULP is loaded after the sock map is loaded. In the sock_map case we added a check for this to fail the load if ULP is already set. However, we missed the check on the sock_hash side. Add a ULP check to the sock_hash update path. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+7a6ee4d0078eac6bf782@syzkaller.appspotmail.com Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/sock_map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 1330a7442e5b..50916f9bc4f2 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -656,6 +656,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct inet_connection_sock *icsk = inet_csk(sk); u32 key_size = map->key_size, hash; struct bpf_htab_elem *elem, *elem_new; struct bpf_htab_bucket *bucket; @@ -666,6 +667,8 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(flags > BPF_EXIST)) return -EINVAL; + if (unlikely(icsk->icsk_ulp_data)) + return -EINVAL; link = sk_psock_init_link(); if (!link) -- cgit From 4255ff0544ee307e9a8acf66000e6fd49e9203f8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Sep 2019 15:22:12 -0700 Subject: ipv6: Fix RTA_MULTIPATH with nexthop objects A change to the core nla helpers was missed during the push of the nexthop changes. rt6_fill_node_nexthop should be calling nla_nest_start_noflag not nla_nest_start. Currently, iproute2 does not print multipath data because of parsing issues with the attribute. Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2927e72e511a..977cfd09e2a3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5329,7 +5329,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, if (nexthop_is_multipath(nh)) { struct nlattr *mp; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; -- cgit From 91bfb564853f4aaa0487a2b5e9c6ecd400768abd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Sep 2019 15:22:13 -0700 Subject: selftest: A few cleanups for fib_nexthops.sh Cleanups of the tests in fib_nexthops.sh 1. Several tests noted unexpected route output, but the discrepancy was not showing in the summary output and overlooked in the verbose output. Add a WARNING message to the summary output to make it clear a test is not showing expected output. 2. Several check_* calls are missing extra data like scope and metric causing mismatches when the nexthops or routes are correct - some of them are a side effect of the evolving iproute2 command. Update the data to the expected output. 3. Several check_routes are checking for the wrong nexthop data, most likely a copy-paste-update error. 4. A couple of tests were re-using a nexthop id that already existed. Fix those to use a new id. Fixes: 6345266a9989 ("selftests: Add test cases for nexthop objects") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index c5c93d5fb3ad..f9ebeac1e6f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -212,6 +212,8 @@ check_output() printf " ${out}\n" printf " Expected:\n" printf " ${expected}\n\n" + else + echo " WARNING: Unexpected route entry" fi fi @@ -274,7 +276,7 @@ ipv6_fcnal() run_cmd "$IP nexthop get id 52" log_test $? 0 "Get nexthop by id" - check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1" + check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1 scope link" run_cmd "$IP nexthop del id 52" log_test $? 0 "Delete nexthop by id" @@ -479,12 +481,12 @@ ipv6_fcnal_runtime() run_cmd "$IP -6 nexthop add id 85 dev veth1" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85" log_test $? 0 "IPv6 route with device only nexthop" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium" run_cmd "$IP nexthop add id 123 group 81/85" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123" log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium" # # IPv6 route with v4 nexthop - not allowed @@ -538,7 +540,7 @@ ipv4_fcnal() run_cmd "$IP nexthop get id 12" log_test $? 0 "Get nexthop by id" - check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link" + check_nexthop "id 12" "id 12 via 172.16.1.2 dev veth1 scope link" run_cmd "$IP nexthop del id 12" log_test $? 0 "Delete nexthop by id" @@ -685,7 +687,7 @@ ipv4_withv6_fcnal() set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" log_test $? 0 "IPv6 nexthop with IPv4 route" - check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1" + check_route "172.16.101.1" "172.16.101.1 nhid 11 via inet6 ${lladdr} dev veth1" set -e run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" @@ -694,11 +696,11 @@ ipv4_withv6_fcnal() run_cmd "$IP ro replace 172.16.101.1/32 nhid 101" log_test $? 0 "IPv6 nexthop with IPv4 route" - check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" log_test $? 0 "IPv4 route with IPv6 gateway" - check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1" + check_route "172.16.101.1" "172.16.101.1 via inet6 ${lladdr} dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" log_test $? 2 "IPv4 route with invalid IPv6 gateway" @@ -785,10 +787,10 @@ ipv4_fcnal_runtime() log_test $? 0 "IPv4 route with device only nexthop" check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1" - run_cmd "$IP nexthop add id 122 group 21/85" - run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" + run_cmd "$IP nexthop add id 123 group 21/85" + run_cmd "$IP ro replace 172.16.101.1/32 nhid 123" log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw" - check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1" + check_route "172.16.101.1" "172.16.101.1 nhid 123 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop dev veth1 weight 1" # # IPv4 with IPv6 @@ -820,7 +822,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro replace 172.16.101.1/32 nhid 101" log_test $? 0 "IPv4 route with mixed v4-v6 multipath route" - check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" -- cgit From 7bdf4de1267780aa194b3a28c85a6c4d617b0bdb Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 4 Sep 2019 10:11:58 -0400 Subject: net: Properly update v4 routes with v6 nexthop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating a v4 route that uses a v6 nexthop from a nexthop group. Allow the kernel to properly send the nexthop as v6 via the RTA_VIA attribute. Broken behavior: $ ip nexthop add via fe80::9 dev eth0 $ ip nexthop show id 1 via fe80::9 dev eth0 scope link $ ip route add 4.5.6.7/32 nhid 1 $ ip route show default via 10.0.2.2 dev eth0 4.5.6.7 nhid 1 via 254.128.0.0 dev eth0 10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15 $ Fixed behavior: $ ip nexthop add via fe80::9 dev eth0 $ ip nexthop show id 1 via fe80::9 dev eth0 scope link $ ip route add 4.5.6.7/32 nhid 1 $ ip route show default via 10.0.2.2 dev eth0 4.5.6.7 nhid 1 via inet6 fe80::9 dev eth0 10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15 $ v2, v3: Addresses code review comments from David Ahern Fixes: dcb1ecb50edf (“ipv4: Prepare for fib6_nh from a nexthop object”) Signed-off-by: Donald Sharp Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 ++-- include/net/nexthop.h | 5 +++-- net/ipv4/fib_semantics.c | 15 ++++++++------- net/ipv6/route.c | 11 ++++++----- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4c81846ccce8..ab1ca9e238d2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -513,7 +513,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct netlink_callback *cb); int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, - unsigned char *flags, bool skip_oif); + u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight); + int nh_weight, u8 rt_family); #endif /* _NET_FIB_H */ diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 95f766c31c90..331ebbc94fe7 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -161,7 +161,8 @@ struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel) } static inline -int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) +int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, + u8 rt_family) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); int i; @@ -172,7 +173,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) return -EMSGSIZE; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2db089e10ba0..0913a090b2bf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1582,7 +1582,7 @@ failure: } int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, - unsigned char *flags, bool skip_oif) + u8 rt_family, unsigned char *flags, bool skip_oif) { if (nhc->nhc_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; @@ -1613,7 +1613,7 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, /* if gateway family does not match nexthop family * gateway is encoded as RTA_VIA */ - if (nhc->nhc_gw_family != nhc->nhc_family) { + if (rt_family != nhc->nhc_gw_family) { int alen = sizeof(struct in6_addr); struct nlattr *nla; struct rtvia *via; @@ -1654,7 +1654,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info); #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, - int nh_weight) + int nh_weight, u8 rt_family) { const struct net_device *dev = nhc->nhc_dev; struct rtnexthop *rtnh; @@ -1667,7 +1667,7 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, rtnh->rtnh_hops = nh_weight - 1; rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; - if (fib_nexthop_info(skb, nhc, &flags, true) < 0) + if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0) goto nla_put_failure; rtnh->rtnh_flags = flags; @@ -1693,13 +1693,14 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) goto nla_put_failure; if (unlikely(fi->nh)) { - if (nexthop_mpath_fill_node(skb, fi->nh) < 0) + if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0) goto nla_put_failure; goto mp_end; } for_nexthops(fi) { - if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, + AF_INET) < 0) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid && @@ -1775,7 +1776,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); unsigned char flags = 0; - if (fib_nexthop_info(skb, nhc, &flags, false) < 0) + if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0) goto nla_put_failure; rtm->rtm_flags = flags; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 977cfd09e2a3..85bcd97f7a72 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5333,7 +5333,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, if (!mp) goto nla_put_failure; - if (nexthop_mpath_fill_node(skb, nh)) + if (nexthop_mpath_fill_node(skb, nh, AF_INET6)) goto nla_put_failure; nla_nest_end(skb, mp); @@ -5341,7 +5341,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, struct fib6_nh *fib6_nh; fib6_nh = nexthop_fib6_nh(nh); - if (fib_nexthop_info(skb, &fib6_nh->nh_common, + if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6, flags, false) < 0) goto nla_put_failure; } @@ -5470,13 +5470,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, - rt->fib6_nh->fib_nh_weight) < 0) + rt->fib6_nh->fib_nh_weight, AF_INET6) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, - sibling->fib6_nh->fib_nh_weight) < 0) + sibling->fib6_nh->fib_nh_weight, + AF_INET6) < 0) goto nla_put_failure; } @@ -5493,7 +5494,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, rtm->rtm_flags |= nh_flags; } else { - if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, + if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6, &nh_flags, false) < 0) goto nla_put_failure; -- cgit From 2339cd6cd0b5401fa3fe886bf1c0cb8822041957 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 3 Sep 2019 15:16:17 -0700 Subject: bpf: fix precision tracking of stack slots The problem can be seen in the following two tests: 0: (bf) r3 = r10 1: (55) if r3 != 0x7b goto pc+0 2: (7a) *(u64 *)(r3 -8) = 0 3: (79) r4 = *(u64 *)(r10 -8) .. 0: (85) call bpf_get_prandom_u32#7 1: (bf) r3 = r10 2: (55) if r3 != 0x7b goto pc+0 3: (7b) *(u64 *)(r3 -8) = r0 4: (79) r4 = *(u64 *)(r10 -8) When backtracking need to mark R4 it will mark slot fp-8. But ST or STX into fp-8 could belong to the same block of instructions. When backtracing is done the parent state may have fp-8 slot as "unallocated stack". Which will cause verifier to warn and incorrectly reject such programs. Writes into stack via non-R10 register are rare. llvm always generates canonical stack spill/fill. For such pathological case fall back to conservative precision tracking instead of rejecting. Reported-by: syzbot+c8d66267fd2b5955287e@syzkaller.appspotmail.com Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b5c14c9d7b98..c36a719fee6d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1772,16 +1772,21 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, bitmap_from_u64(mask, stack_mask); for_each_set_bit(i, mask, 64) { if (i >= func->allocated_stack / BPF_REG_SIZE) { - /* This can happen if backtracking - * is propagating stack precision where - * caller has larger stack frame - * than callee, but backtrack_insn() should - * have returned -ENOTSUPP. + /* the sequence of instructions: + * 2: (bf) r3 = r10 + * 3: (7b) *(u64 *)(r3 -8) = r0 + * 4: (79) r4 = *(u64 *)(r10 -8) + * doesn't contain jmps. It's backtracked + * as a single block. + * During backtracking insn 3 is not recognized as + * stack access, so at the end of backtracking + * stack slot fp-8 is still marked in stack_mask. + * However the parent state may not have accessed + * fp-8 and it's "unallocated" stack space. + * In such case fallback to conservative. */ - verbose(env, "BUG spi %d stack_size %d\n", - i, func->allocated_stack); - WARN_ONCE(1, "verifier backtracking bug"); - return -EFAULT; + mark_all_scalars_precise(env, st); + return 0; } if (func->stack[i].slot_type[0] != STACK_SPILL) { -- cgit From 6e1cdedcf0362fed3aedfe051d46bd7ee2a85fe1 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 5 Sep 2019 09:57:12 +0800 Subject: net: sonic: return NETDEV_TX_OK if failed to map buffer NETDEV_TX_BUSY really should only be used by drivers that call netif_tx_stop_queue() at the wrong moment. If dma_map_single() is failed to map tx DMA buffer, it might trigger an infinite loop. This patch use NETDEV_TX_OK instead of NETDEV_TX_BUSY, and change printk to pr_err_ratelimited. Fixes: d9fb9f384292 ("*sonic/natsemi/ns83829: Move the National Semi-conductor drivers") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index d0a01e8f000a..18fd62fbfb64 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -232,9 +232,9 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { - printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name); + pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); dev_kfree_skb(skb); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ -- cgit From f4b633b911fd3b4cbe1dc065e8fb064078d0889d Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 5 Sep 2019 05:15:42 -0400 Subject: forcedeth: use per cpu to collect xmit/recv statistics When testing with a background iperf pushing 1Gbit/sec traffic and running both ifconfig and netstat to collect statistics, some deadlocks occurred. Ifconfig and netstat will call nv_get_stats64 to get software xmit/recv statistics. In the commit f5d827aece36 ("forcedeth: implement ndo_get_stats64() API"), the normal tx/rx variables is to collect tx/rx statistics. The fix is to replace normal tx/rx variables with per cpu 64-bit variable to collect xmit/recv statistics. The per cpu variable will avoid deadlocks and provide fast efficient statistics updates. In nv_probe, the per cpu variable is initialized. In nv_remove, this per cpu variable is freed. In xmit/recv process, this per cpu variable will be updated. In nv_get_stats64, this per cpu variable on each cpu is added up. Then the driver can get xmit/recv packets statistics. A test runs for several days with this commit, the deadlocks disappear and the performance is better. Tested: - iperf SMP x86_64 -> Client connecting to 1.1.1.108, TCP port 5001 TCP window size: 85.0 KByte (default) ------------------------------------------------------------ [ 3] local 1.1.1.105 port 38888 connected with 1.1.1.108 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 1.10 GBytes 943 Mbits/sec ifconfig results: enp0s9 Link encap:Ethernet HWaddr 00:21:28:6f:de:0f inet addr:1.1.1.105 Bcast:0.0.0.0 Mask:255.255.255.0 UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:5774764531 errors:0 dropped:0 overruns:0 frame:0 TX packets:633534193 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:7646159340904 (7.6 TB) TX bytes:11425340407722 (11.4 TB) netstat results: Kernel Interface table Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flg ... enp0s9 1500 0 5774764531 0 0 0 633534193 0 0 0 BMRU ... Fixes: f5d827aece36 ("forcedeth: implement ndo_get_stats64() API") CC: Joe Jin CC: JUNXIAO_BI Reported-and-tested-by: Nan san Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 143 ++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index b327b29f5d57..a6b4bfae4684 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -713,6 +713,21 @@ struct nv_skb_map { struct nv_skb_map *next_tx_ctx; }; +struct nv_txrx_stats { + u64 stat_rx_packets; + u64 stat_rx_bytes; /* not always available in HW */ + u64 stat_rx_missed_errors; + u64 stat_rx_dropped; + u64 stat_tx_packets; /* not always available in HW */ + u64 stat_tx_bytes; + u64 stat_tx_dropped; +}; + +#define nv_txrx_stats_inc(member) \ + __this_cpu_inc(np->txrx_stats->member) +#define nv_txrx_stats_add(member, count) \ + __this_cpu_add(np->txrx_stats->member, (count)) + /* * SMP locking: * All hardware access under netdev_priv(dev)->lock, except the performance @@ -797,10 +812,7 @@ struct fe_priv { /* RX software stats */ struct u64_stats_sync swstats_rx_syncp; - u64 stat_rx_packets; - u64 stat_rx_bytes; /* not always available in HW */ - u64 stat_rx_missed_errors; - u64 stat_rx_dropped; + struct nv_txrx_stats __percpu *txrx_stats; /* media detection workaround. * Locking: Within irq hander or disable_irq+spin_lock(&np->lock); @@ -826,9 +838,6 @@ struct fe_priv { /* TX software stats */ struct u64_stats_sync swstats_tx_syncp; - u64 stat_tx_packets; /* not always available in HW */ - u64 stat_tx_bytes; - u64 stat_tx_dropped; /* msi/msi-x fields */ u32 msi_flags; @@ -1721,6 +1730,39 @@ static void nv_update_stats(struct net_device *dev) } } +static void nv_get_stats(int cpu, struct fe_priv *np, + struct rtnl_link_stats64 *storage) +{ + struct nv_txrx_stats *src = per_cpu_ptr(np->txrx_stats, cpu); + unsigned int syncp_start; + u64 rx_packets, rx_bytes, rx_dropped, rx_missed_errors; + u64 tx_packets, tx_bytes, tx_dropped; + + do { + syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); + rx_packets = src->stat_rx_packets; + rx_bytes = src->stat_rx_bytes; + rx_dropped = src->stat_rx_dropped; + rx_missed_errors = src->stat_rx_missed_errors; + } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); + + storage->rx_packets += rx_packets; + storage->rx_bytes += rx_bytes; + storage->rx_dropped += rx_dropped; + storage->rx_missed_errors += rx_missed_errors; + + do { + syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); + tx_packets = src->stat_tx_packets; + tx_bytes = src->stat_tx_bytes; + tx_dropped = src->stat_tx_dropped; + } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); + + storage->tx_packets += tx_packets; + storage->tx_bytes += tx_bytes; + storage->tx_dropped += tx_dropped; +} + /* * nv_get_stats64: dev->ndo_get_stats64 function * Get latest stats value from the nic. @@ -1733,7 +1775,7 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) __releases(&netdev_priv(dev)->hwstats_lock) { struct fe_priv *np = netdev_priv(dev); - unsigned int syncp_start; + int cpu; /* * Note: because HW stats are not always available and for @@ -1746,20 +1788,8 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) */ /* software stats */ - do { - syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); - storage->rx_packets = np->stat_rx_packets; - storage->rx_bytes = np->stat_rx_bytes; - storage->rx_dropped = np->stat_rx_dropped; - storage->rx_missed_errors = np->stat_rx_missed_errors; - } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); - - do { - syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); - storage->tx_packets = np->stat_tx_packets; - storage->tx_bytes = np->stat_tx_bytes; - storage->tx_dropped = np->stat_tx_dropped; - } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); + for_each_online_cpu(cpu) + nv_get_stats(cpu, np, storage); /* If the nic supports hw counters then retrieve latest values */ if (np->driver_data & DEV_HAS_STATISTICS_V123) { @@ -1827,7 +1857,7 @@ static int nv_alloc_rx(struct net_device *dev) } else { packet_dropped: u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_dropped++; + nv_txrx_stats_inc(stat_rx_dropped); u64_stats_update_end(&np->swstats_rx_syncp); return 1; } @@ -1869,7 +1899,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev) } else { packet_dropped: u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_dropped++; + nv_txrx_stats_inc(stat_rx_dropped); u64_stats_update_end(&np->swstats_rx_syncp); return 1; } @@ -2013,7 +2043,7 @@ static void nv_drain_tx(struct net_device *dev) } if (nv_release_txskb(np, &np->tx_skb[i])) { u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); } np->tx_skb[i].dma = 0; @@ -2227,7 +2257,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2273,7 +2303,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); np->put_tx_ctx = start_tx_ctx; u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2384,7 +2414,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2431,7 +2461,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, dev_kfree_skb_any(skb); np->put_tx_ctx = start_tx_ctx; u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2560,9 +2590,12 @@ static int nv_tx_done(struct net_device *dev, int limit) && !(flags & NV_TX_RETRYCOUNT_MASK)) nv_legacybackoff_reseed(dev); } else { + unsigned int len; + u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_packets++; - np->stat_tx_bytes += np->get_tx_ctx->skb->len; + nv_txrx_stats_inc(stat_tx_packets); + len = np->get_tx_ctx->skb->len; + nv_txrx_stats_add(stat_tx_bytes, len); u64_stats_update_end(&np->swstats_tx_syncp); } bytes_compl += np->get_tx_ctx->skb->len; @@ -2577,9 +2610,12 @@ static int nv_tx_done(struct net_device *dev, int limit) && !(flags & NV_TX2_RETRYCOUNT_MASK)) nv_legacybackoff_reseed(dev); } else { + unsigned int len; + u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_packets++; - np->stat_tx_bytes += np->get_tx_ctx->skb->len; + nv_txrx_stats_inc(stat_tx_packets); + len = np->get_tx_ctx->skb->len; + nv_txrx_stats_add(stat_tx_bytes, len); u64_stats_update_end(&np->swstats_tx_syncp); } bytes_compl += np->get_tx_ctx->skb->len; @@ -2627,9 +2663,12 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit) nv_legacybackoff_reseed(dev); } } else { + unsigned int len; + u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_packets++; - np->stat_tx_bytes += np->get_tx_ctx->skb->len; + nv_txrx_stats_inc(stat_tx_packets); + len = np->get_tx_ctx->skb->len; + nv_txrx_stats_add(stat_tx_bytes, len); u64_stats_update_end(&np->swstats_tx_syncp); } @@ -2806,6 +2845,15 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen) } } +static void rx_missing_handler(u32 flags, struct fe_priv *np) +{ + if (flags & NV_RX_MISSEDFRAME) { + u64_stats_update_begin(&np->swstats_rx_syncp); + nv_txrx_stats_inc(stat_rx_missed_errors); + u64_stats_update_end(&np->swstats_rx_syncp); + } +} + static int nv_rx_process(struct net_device *dev, int limit) { struct fe_priv *np = netdev_priv(dev); @@ -2848,11 +2896,7 @@ static int nv_rx_process(struct net_device *dev, int limit) } /* the rest are hard errors */ else { - if (flags & NV_RX_MISSEDFRAME) { - u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_missed_errors++; - u64_stats_update_end(&np->swstats_rx_syncp); - } + rx_missing_handler(flags, np); dev_kfree_skb(skb); goto next_pkt; } @@ -2896,8 +2940,8 @@ static int nv_rx_process(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&np->napi, skb); u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_packets++; - np->stat_rx_bytes += len; + nv_txrx_stats_inc(stat_rx_packets); + nv_txrx_stats_add(stat_rx_bytes, len); u64_stats_update_end(&np->swstats_rx_syncp); next_pkt: if (unlikely(np->get_rx.orig++ == np->last_rx.orig)) @@ -2982,8 +3026,8 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) } napi_gro_receive(&np->napi, skb); u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_packets++; - np->stat_rx_bytes += len; + nv_txrx_stats_inc(stat_rx_packets); + nv_txrx_stats_add(stat_rx_bytes, len); u64_stats_update_end(&np->swstats_rx_syncp); } else { dev_kfree_skb(skb); @@ -5651,6 +5695,12 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) SET_NETDEV_DEV(dev, &pci_dev->dev); u64_stats_init(&np->swstats_rx_syncp); u64_stats_init(&np->swstats_tx_syncp); + np->txrx_stats = alloc_percpu(struct nv_txrx_stats); + if (!np->txrx_stats) { + pr_err("np->txrx_stats, alloc memory error.\n"); + err = -ENOMEM; + goto out_alloc_percpu; + } timer_setup(&np->oom_kick, nv_do_rx_refill, 0); timer_setup(&np->nic_poll, nv_do_nic_poll, 0); @@ -6060,6 +6110,8 @@ out_relreg: out_disable: pci_disable_device(pci_dev); out_free: + free_percpu(np->txrx_stats); +out_alloc_percpu: free_netdev(dev); out: return err; @@ -6105,6 +6157,9 @@ static void nv_restore_mac_addr(struct pci_dev *pci_dev) static void nv_remove(struct pci_dev *pci_dev) { struct net_device *dev = pci_get_drvdata(pci_dev); + struct fe_priv *np = netdev_priv(dev); + + free_percpu(np->txrx_stats); unregister_netdev(dev); -- cgit From b88dd52c62bb5c5d58f0963287f41fd084352c57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Sep 2019 05:20:22 -0700 Subject: net: sched: fix reordering issues Whenever MQ is not used on a multiqueue device, we experience serious reordering problems. Bisection found the cited commit. The issue can be described this way : - A single qdisc hierarchy is shared by all transmit queues. (eg : tc qdisc replace dev eth0 root fq_codel) - When/if try_bulk_dequeue_skb_slow() dequeues a packet targetting a different transmit queue than the one used to build a packet train, we stop building the current list and save the 'bad' skb (P1) in a special queue. (bad_txq) - When dequeue_skb() calls qdisc_dequeue_skb_bad_txq() and finds this skb (P1), it checks if the associated transmit queues is still in frozen state. If the queue is still blocked (by BQL or NIC tx ring full), we leave the skb in bad_txq and return NULL. - dequeue_skb() calls q->dequeue() to get another packet (P2) The other packet can target the problematic queue (that we found in frozen state for the bad_txq packet), but another cpu just ran TX completion and made room in the txq that is now ready to accept new packets. - Packet P2 is sent while P1 is still held in bad_txq, P1 might be sent at next round. In practice P2 is the lead of a big packet train (P2,P3,P4 ...) filling the BQL budget and delaying P1 by many packets :/ To solve this problem, we have to block the dequeue process as long as the first packet in bad_txq can not be sent. Reordering issues disappear and no side effects have been seen. Fixes: a53851e2c321 ("net: sched: explicit locking in gso_cpu fallback") Signed-off-by: Eric Dumazet Cc: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 137db1cbde85..ac28f6a5d70e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -46,6 +46,8 @@ EXPORT_SYMBOL(default_qdisc_ops); * - updates to tree and tree walking are only done under the rtnl mutex. */ +#define SKB_XOFF_MAGIC ((struct sk_buff *)1UL) + static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) { const struct netdev_queue *txq = q->dev_queue; @@ -71,7 +73,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) q->q.qlen--; } } else { - skb = NULL; + skb = SKB_XOFF_MAGIC; } } @@ -253,8 +255,11 @@ validate: return skb; skb = qdisc_dequeue_skb_bad_txq(q); - if (unlikely(skb)) + if (unlikely(skb)) { + if (skb == SKB_XOFF_MAGIC) + return NULL; goto bulk; + } skb = q->dequeue(q); if (skb) { bulk: -- cgit From b0a3caeafded18111b638cf1adeba10bfd58a12d Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 5 Sep 2019 18:26:08 +0530 Subject: MAINTAINERS: add myself as maintainer for xilinx axiethernet driver I am maintaining xilinx axiethernet driver in xilinx tree and would like to maintain it in the mainline kernel as well. Hence adding myself as a maintainer. Also Anirudha and John has moved to new roles, so based on request removing them from the maintainer list. Signed-off-by: Radhey Shyam Pandey Acked-by: John Linn Acked-by: Michal Simek Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e7a47b5210fd..a50e97a63bc8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17699,8 +17699,7 @@ F: include/uapi/linux/dqblk_xfs.h F: include/uapi/linux/fsmap.h XILINX AXI ETHERNET DRIVER -M: Anirudha Sarangi -M: John Linn +M: Radhey Shyam Pandey S: Maintained F: drivers/net/ethernet/xilinx/xilinx_axienet* -- cgit From b82573fdbef809803d06295f318f7ad1a2c5ceb9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 26 Aug 2019 21:02:09 +0200 Subject: net/hamradio/6pack: Fix the size of a sk_buff used in 'sp_bump()' We 'allocate' 'count' bytes here. In fact, 'dev_alloc_skb' already add some extra space for padding, so a bit more is allocated. However, we use 1 byte for the KISS command, then copy 'count' bytes, so count+1 bytes. Explicitly allocate and use 1 more byte to be safe. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 331c16d30d5d..23281aeeb222 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -344,10 +344,10 @@ static void sp_bump(struct sixpack *sp, char cmd) sp->dev->stats.rx_bytes += count; - if ((skb = dev_alloc_skb(count)) == NULL) + if ((skb = dev_alloc_skb(count + 1)) == NULL) goto out_mem; - ptr = skb_put(skb, count); + ptr = skb_put(skb, count + 1); *ptr++ = cmd; /* KISS command */ memcpy(ptr, sp->cooked_buf + 1, count); -- cgit From 63b2ed4e10b2e6c913e1d8cdd728e7fba4115a3d Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 5 Sep 2019 19:46:18 +0300 Subject: net: phylink: Fix flow control resolution Regarding to IEEE 802.3-2015 standard section 2 28B.3 Priority resolution - Table 28-3 - Pause resolution In case of Local device Pause=1 AsymDir=0, Link partner Pause=1 AsymDir=1, Local device resolution should be enable PAUSE transmit, disable PAUSE receive. And in case of Local device Pause=1 AsymDir=1, Link partner Pause=1 AsymDir=0, Local device resolution should be enable PAUSE receive, disable PAUSE transmit. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Stefan Chulski Reported-by: Shaul Ben-Mayor Acked-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a45c5de96ab1..a5a57ca94c1a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -376,8 +376,8 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat * Local device Link partner * Pause AsymDir Pause AsymDir Result * 1 X 1 X TX+RX - * 0 1 1 1 RX - * 1 1 0 1 TX + * 0 1 1 1 TX + * 1 1 0 1 RX */ static void phylink_resolve_flow(struct phylink *pl, struct phylink_link_state *state) @@ -398,7 +398,7 @@ static void phylink_resolve_flow(struct phylink *pl, new_pause = MLO_PAUSE_TX | MLO_PAUSE_RX; else if (pause & MLO_PAUSE_ASYM) new_pause = state->pause & MLO_PAUSE_SYM ? - MLO_PAUSE_RX : MLO_PAUSE_TX; + MLO_PAUSE_TX : MLO_PAUSE_RX; } else { new_pause = pl->link_config.pause & MLO_PAUSE_TXRX_MASK; } -- cgit From 1c2977c094998de032fee6e898c88b4a05483d08 Mon Sep 17 00:00:00 2001 From: Juliet Kim Date: Thu, 5 Sep 2019 17:30:01 -0400 Subject: net/ibmvnic: free reset work of removed device from queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 36f1031c51a2 ("ibmvnic: Do not process reset during or after device removal") made the change to exit reset if the driver has been removed, but does not free reset work items of the adapter from queue. Ensure all reset work items are freed when breaking out of the loop early. Fixes: 36f1031c51a2 ("ibmnvic: Do not process reset during or after device removal”) Signed-off-by: Juliet Kim Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index fa4bb940665c..6644cabc8e75 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1985,7 +1985,10 @@ static void __ibmvnic_reset(struct work_struct *work) while (rwi) { if (adapter->state == VNIC_REMOVING || adapter->state == VNIC_REMOVED) - goto out; + kfree(rwi); + rc = EBUSY; + break; + } if (adapter->force_reset_recovery) { adapter->force_reset_recovery = false; @@ -2011,7 +2014,7 @@ static void __ibmvnic_reset(struct work_struct *work) netdev_dbg(adapter->netdev, "Reset failed\n"); free_all_rwi(adapter); } -out: + adapter->resetting = false; if (we_lock_rtnl) rtnl_unlock(); -- cgit From fe163e534e5eecdfd7b5920b0dfd24c458ee85d6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Sep 2019 19:36:37 -0700 Subject: isdn/capi: check message length in capi_write() syzbot reported: BUG: KMSAN: uninit-value in capi_write+0x791/0xa90 drivers/isdn/capi/capi.c:700 CPU: 0 PID: 10025 Comm: syz-executor379 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 capi_write+0x791/0xa90 drivers/isdn/capi/capi.c:700 do_loop_readv_writev fs/read_write.c:703 [inline] do_iter_write+0x83e/0xd80 fs/read_write.c:961 vfs_writev fs/read_write.c:1004 [inline] do_writev+0x397/0x840 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev+0x9b/0xb0 fs/read_write.c:1109 __x64_sys_writev+0x4a/0x70 fs/read_write.c:1109 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 [...] The problem is that capi_write() is reading past the end of the message. Fix it by checking the message's length in the needed places. Reported-and-tested-by: syzbot+0849c524d9c634f5ae66@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- drivers/isdn/capi/capi.c | 10 +++++++++- include/uapi/linux/isdn/capicmd.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 3c3ad42f22bf..c92b405b7646 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -688,6 +688,9 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos if (!cdev->ap.applid) return -ENODEV; + if (count < CAPIMSG_BASELEN) + return -EINVAL; + skb = alloc_skb(count, GFP_USER); if (!skb) return -ENOMEM; @@ -698,7 +701,8 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos } mlen = CAPIMSG_LEN(skb->data); if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) { - if ((size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) { + if (count < CAPI_DATA_B3_REQ_LEN || + (size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) { kfree_skb(skb); return -EINVAL; } @@ -711,6 +715,10 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos CAPIMSG_SETAPPID(skb->data, cdev->ap.applid); if (CAPIMSG_CMD(skb->data) == CAPI_DISCONNECT_B3_RESP) { + if (count < CAPI_DISCONNECT_B3_RESP_LEN) { + kfree_skb(skb); + return -EINVAL; + } mutex_lock(&cdev->lock); capincci_free(cdev, CAPIMSG_NCCI(skb->data)); mutex_unlock(&cdev->lock); diff --git a/include/uapi/linux/isdn/capicmd.h b/include/uapi/linux/isdn/capicmd.h index 4941628a4fb9..5ec88e7548a9 100644 --- a/include/uapi/linux/isdn/capicmd.h +++ b/include/uapi/linux/isdn/capicmd.h @@ -16,6 +16,7 @@ #define CAPI_MSG_BASELEN 8 #define CAPI_DATA_B3_REQ_LEN (CAPI_MSG_BASELEN+4+4+2+2+2) #define CAPI_DATA_B3_RESP_LEN (CAPI_MSG_BASELEN+4+2) +#define CAPI_DISCONNECT_B3_RESP_LEN (CAPI_MSG_BASELEN+4) /*----- CAPI commands -----*/ #define CAPI_ALERT 0x01 -- cgit From 8652f17c658d03f5c87b8dee6e8e52480c6cd37d Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Thu, 5 Sep 2019 20:56:37 -0700 Subject: ipv6: addrconf_f6i_alloc - fix non-null pointer check to !IS_ERR() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a stupid bug I recently introduced... ip6_route_info_create() returns an ERR_PTR(err) and not a NULL on error. Fixes: d55a2e374a94 ("net-ipv6: fix excessive RTF_ADDRCONF flag on ::1/128 local route (and others)'") Cc: David Ahern Cc: Lorenzo Colitti Cc: Eric Dumazet Signed-off-by: Maciej Żenczykowski Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 85bcd97f7a72..546088e50815 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4406,7 +4406,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (f6i) + if (!IS_ERR(f6i)) f6i->dst_nocount = true; return f6i; } -- cgit From 3dcbdb134f329842a38f0e6797191b885ab00a00 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 6 Sep 2019 12:23:50 +0300 Subject: net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list Historically, support for frag_list packets entering skb_segment() was limited to frag_list members terminating on exact same gso_size boundaries. This is verified with a BUG_ON since commit 89319d3801d1 ("net: Add frag_list support to skb_segment"), quote: As such we require all frag_list members terminate on exact MSS boundaries. This is checked using BUG_ON. As there should only be one producer in the kernel of such packets, namely GRO, this requirement should not be difficult to maintain. However, since commit 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper"), the "exact MSS boundaries" assumption no longer holds: An eBPF program using bpf_skb_change_proto() DOES modify 'gso_size', but leaves the frag_list members as originally merged by GRO with the original 'gso_size'. Example of such programs are bpf-based NAT46 or NAT64. This lead to a kernel BUG_ON for flows involving: - GRO generating a frag_list skb - bpf program performing bpf_skb_change_proto() or bpf_skb_adjust_room() - skb_segment() of the skb See example BUG_ON reports in [0]. In commit 13acc94eff12 ("net: permit skb_segment on head_frag frag_list skb"), skb_segment() was modified to support the "gso_size mangling" case of a frag_list GRO'ed skb, but *only* for frag_list members having head_frag==true (having a page-fragment head). Alas, GRO packets having frag_list members with a linear kmalloced head (head_frag==false) still hit the BUG_ON. This commit adds support to skb_segment() for a 'head_skb' packet having a frag_list whose members are *non* head_frag, with gso_size mangled, by disabling SG and thus falling-back to copying the data from the given 'head_skb' into the generated segmented skbs - as suggested by Willem de Bruijn [1]. Since this approach involves the penalty of skb_copy_and_csum_bits() when building the segments, care was taken in order to enable this solution only when required: - untrusted gso_size, by testing SKB_GSO_DODGY is set (SKB_GSO_DODGY is set by any gso_size mangling functions in net/core/filter.c) - the frag_list is non empty, its item is a non head_frag, *and* the headlen of the given 'head_skb' does not match the gso_size. [0] https://lore.kernel.org/netdev/20190826170724.25ff616f@pixies/ https://lore.kernel.org/netdev/9265b93f-253d-6b8c-f2b8-4b54eff1835c@fb.com/ [1] https://lore.kernel.org/netdev/CA+FuTSfVsgNDi7c=GUU8nMg2hWxF2SjCNLXetHeVPdnxAW5K-w@mail.gmail.com/ Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Suggested-by: Willem de Bruijn Cc: Daniel Borkmann Cc: Eric Dumazet Cc: Alexander Duyck Signed-off-by: Shmulik Ladkani Reviewed-by: Willem de Bruijn Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0338820ee0ec..982d8d12830e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3664,6 +3664,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && + (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { + /* gso_size is untrusted, and we have a frag_list with a linear + * non head_frag head. + * + * (we assume checking the first list_skb member suffices; + * i.e if either of the list_skb members have non head_frag + * head, then the first one has too). + * + * If head_skb's headlen does not fit requested gso_size, it + * means that the frag_list members do NOT terminate on exact + * gso_size boundaries. Hence we cannot perform skb_frag_t page + * sharing. Therefore we must fallback to copying the frag_list + * skbs; we do so by disabling SG. + */ + if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) + features &= ~NETIF_F_SG; + } + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) -- cgit From 28abe579625249161f5de943b8f8713aa8c48452 Mon Sep 17 00:00:00 2001 From: Fred Lotter Date: Fri, 6 Sep 2019 19:29:41 +0200 Subject: nfp: flower: cmsg rtnl locks can timeout reify messages Flower control message replies are handled in different locations. The truly high priority replies are handled in the BH (tasklet) context, while the remaining replies are handled in a predefined Linux work queue. The work queue handler orders replies into high and low priority groups, and always start servicing the high priority replies within the received batch first. Reply Type: Rtnl Lock: Handler: CMSG_TYPE_PORT_MOD no BH tasklet (mtu) CMSG_TYPE_TUN_NEIGH no BH tasklet CMSG_TYPE_FLOW_STATS no BH tasklet CMSG_TYPE_PORT_REIFY no WQ high CMSG_TYPE_PORT_MOD yes WQ high (link/mtu) CMSG_TYPE_MERGE_HINT yes WQ low CMSG_TYPE_NO_NEIGH no WQ low CMSG_TYPE_ACTIVE_TUNS no WQ low CMSG_TYPE_QOS_STATS no WQ low CMSG_TYPE_LAG_CONFIG no WQ low A subset of control messages can block waiting for an rtnl lock (from both work queue priority groups). The rtnl lock is heavily contended for by external processes such as systemd-udevd, systemd-network and libvirtd, especially during netdev creation, such as when flower VFs and representors are instantiated. Kernel netlink instrumentation shows that external processes (such as systemd-udevd) often use successive rtnl_trylock() sequences, which can result in an rtnl_lock() blocked control message to starve for longer periods of time during rtnl lock contention, i.e. netdev creation. In the current design a single blocked control message will block the entire work queue (both priorities), and introduce a latency which is nondeterministic and dependent on system wide rtnl lock usage. In some extreme cases, one blocked control message at exactly the wrong time, just before the maximum number of VFs are instantiated, can block the work queue for long enough to prevent VF representor REIFY replies from getting handled in time for the 40ms timeout. The firmware will deliver the total maximum number of REIFY message replies in around 300us. Only REIFY and MTU update messages require replies within a timeout period (of 40ms). The MTU-only updates are already done directly in the BH (tasklet) handler. Move the REIFY handler down into the BH (tasklet) in order to resolve timeouts caused by a blocked work queue waiting on rtnl locks. Signed-off-by: Fred Lotter Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index d5bbe3d6048b..05981b54eaab 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -260,9 +260,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) type = cmsg_hdr->type; switch (type) { - case NFP_FLOWER_CMSG_TYPE_PORT_REIFY: - nfp_flower_cmsg_portreify_rx(app, skb); - break; case NFP_FLOWER_CMSG_TYPE_PORT_MOD: nfp_flower_cmsg_portmod_rx(app, skb); break; @@ -328,8 +325,7 @@ nfp_flower_queue_ctl_msg(struct nfp_app *app, struct sk_buff *skb, int type) struct nfp_flower_priv *priv = app->priv; struct sk_buff_head *skb_head; - if (type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY || - type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) + if (type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) skb_head = &priv->cmsg_skbs_high; else skb_head = &priv->cmsg_skbs_low; @@ -368,6 +364,10 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) { /* Acks from the NFP that the route is added - ignore. */ dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { + /* Handle REIFY acks outside wq to prevent RTNL conflict. */ + nfp_flower_cmsg_portreify_rx(app, skb); + dev_consume_skb_any(skb); } else { nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type); } -- cgit From c8dc55956b09b53ccffceb6e3146981210e27821 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 9 Sep 2019 22:44:51 +0200 Subject: net/ibmvnic: Fix missing { in __ibmvnic_reset Commit 1c2977c09499 ("net/ibmvnic: free reset work of removed device from queue") adds a } without corresponding { causing build break. Fixes: 1c2977c09499 ("net/ibmvnic: free reset work of removed device from queue") Signed-off-by: Michal Suchanek Reviewed-by: Tyrel Datwyler Reviewed-by: Juliet Kim Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 6644cabc8e75..5cb55ea671e3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1984,7 +1984,7 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); while (rwi) { if (adapter->state == VNIC_REMOVING || - adapter->state == VNIC_REMOVED) + adapter->state == VNIC_REMOVED) { kfree(rwi); rc = EBUSY; break; -- cgit From 94a72b3f024fc7e9ab640897a1e38583a470659d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 6 Sep 2019 11:47:02 +0200 Subject: bridge/mdb: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent at the end. In fact, NLMSG_DONE is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: 949f1e39a617 ("bridge: mdb: notify on router port add and del") CC: Nikolay Aleksandrov Signed-off-by: Nicolas Dichtel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index bf6acd34234d..63f9c08625f0 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -437,7 +437,7 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct nlmsghdr *nlh; struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; -- cgit From 8b142a00edcf8422ca48b8de88d286efb500cb53 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 8 Sep 2019 12:11:23 -0700 Subject: net_sched: check cops->tcf_block in tc_bind_tclass() At least sch_red and sch_tbf don't implement ->tcf_block() while still have a non-zero tc "class". Instead of adding nop implementations to each of such qdisc's, we can just relax the check of cops->tcf_block() in tc_bind_tclass(). They don't support TC filter anyway. Reported-by: syzbot+21b29db13c065852f64b@syzkaller.appspotmail.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 04faee7ccbce..1047825d9f48 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1920,6 +1920,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, cl = cops->find(q, portid); if (!cl) return; + if (!cops->tcf_block) + return; block = cops->tcf_block(q, cl, NULL); if (!block) return; -- cgit From d4d6ec6dac07f263f06d847d6f732d6855522845 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 8 Sep 2019 13:40:51 -0700 Subject: sch_hhf: ensure quantum and hhf_non_hh_weight are non-zero In case of TCA_HHF_NON_HH_WEIGHT or TCA_HHF_QUANTUM is zero, it would make no progress inside the loop in hhf_dequeue() thus kernel would get stuck. Fix this by checking this corner case in hhf_change(). Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Reported-by: syzbot+bc6297c11f19ee807dc2@syzkaller.appspotmail.com Reported-by: syzbot+041483004a7f45f1f20a@syzkaller.appspotmail.com Reported-by: syzbot+55be5f513bed37fc4367@syzkaller.appspotmail.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Terry Lam Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hhf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index cee6971c1c82..23cd1c873a2c 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -531,7 +531,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; - if (non_hh_quantum > INT_MAX) + if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX) return -EINVAL; sch_tree_lock(sch); -- cgit From f794dc2304d83ab998c2eee5bab0549aff5c53a2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 9 Sep 2019 15:33:29 +0800 Subject: sctp: fix the missing put_user when dumping transport thresholds This issue causes SCTP_PEER_ADDR_THLDS sockopt not to be able to dump a transport thresholds info. Fix it by adding 'goto' put_user in sctp_getsockopt_paddr_thresholds. Fixes: 8add543e369d ("sctp: add SCTP_FUTURE_ASSOC for SCTP_PEER_ADDR_THLDS sockopt") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9d1f83b10c0a..ad8751891b00 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7173,7 +7173,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; - return 0; + goto out; } asoc = sctp_id2assoc(sk, val.spt_assoc_id); @@ -7191,6 +7191,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, val.spt_pathmaxrxt = sp->pathmaxrxt; } +out: if (put_user(len, optlen) || copy_to_user(optval, &val, len)) return -EFAULT; -- cgit From 2507e6ab7a9a440773be476141a255934468c5ef Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 10 Sep 2019 18:01:40 -0500 Subject: wimax: i2400: fix memory leak In i2400m_op_rfkill_sw_toggle cmd buffer should be released along with skb response. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/op-rfkill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c index 6642bcb27761..8efb493ceec2 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/net/wimax/i2400m/op-rfkill.c @@ -127,6 +127,7 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev, "%d\n", result); result = 0; error_cmd: + kfree(cmd); kfree_skb(ack_skb); error_msg_to_dev: error_alloc: -- cgit From 49f6c90bf6805948b597eabb499e500a47cf24be Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Wed, 11 Sep 2019 09:36:23 +0800 Subject: net: sonic: replace dev_kfree_skb in sonic_send_packet sonic_send_packet will be processed in irq or non-irq context, so it would better use dev_kfree_skb_any instead of dev_kfree_skb. Fixes: d9fb9f384292 ("*sonic/natsemi/ns83829: Move the National Semi-conductor drivers") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 18fd62fbfb64..b339125b2f09 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -233,7 +233,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } -- cgit From 3dfdecc6d1256835ac0612b2a8f7595466d66327 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Sep 2019 14:54:21 -0700 Subject: lib/Kconfig: fix OBJAGG in lib/ menu structure Keep the "Library routines" menu intact by moving OBJAGG into it. Otherwise OBJAGG is displayed/presented as an orphan in the various config menus. Fixes: 0a020d416d0a ("lib: introduce initial implementation of object aggregation manager") Signed-off-by: Randy Dunlap Cc: Jiri Pirko Cc: Ido Schimmel Cc: David S. Miller Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- lib/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index f33d66fc0e86..4e6b1c3e4c98 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -631,6 +631,9 @@ config SBITMAP config PARMAN tristate "parman" if COMPILE_TEST +config OBJAGG + tristate "objagg" if COMPILE_TEST + config STRING_SELFTEST tristate "Test string functions" @@ -653,6 +656,3 @@ config GENERIC_LIB_CMPDI2 config GENERIC_LIB_UCMPDI2 bool - -config OBJAGG - tristate "objagg" if COMPILE_TEST -- cgit From 3e493173b7841259a08c5c8e5cbe90adb349da7e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 11 Sep 2019 16:03:05 +0300 Subject: mac80211: Do not send Layer 2 Update frame before authorization The Layer 2 Update frame is used to update bridges when a station roams to another AP even if that STA does not transmit any frames after the reassociation. This behavior was described in IEEE Std 802.11F-2003 as something that would happen based on MLME-ASSOCIATE.indication, i.e., before completing 4-way handshake. However, this IEEE trial-use recommended practice document was published before RSN (IEEE Std 802.11i-2004) and as such, did not consider RSN use cases. Furthermore, IEEE Std 802.11F-2003 was withdrawn in 2006 and as such, has not been maintained amd should not be used anymore. Sending out the Layer 2 Update frame immediately after association is fine for open networks (and also when using SAE, FT protocol, or FILS authentication when the station is actually authenticated by the time association completes). However, it is not appropriate for cases where RSN is used with PSK or EAP authentication since the station is actually fully authenticated only once the 4-way handshake completes after authentication and attackers might be able to use the unauthenticated triggering of Layer 2 Update frame transmission to disrupt bridge behavior. Fix this by postponing transmission of the Layer 2 Update frame from station entry addition to the point when the station entry is marked authorized. Similarly, send out the VLAN binding update only if the STA entry has already been authorized. Signed-off-by: Jouni Malinen Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/cfg.c | 14 ++++---------- net/mac80211/sta_info.c | 4 ++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 111c400199ec..4105c97c7ba1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1529,7 +1529,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; - int layer2_update; if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -1573,18 +1572,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); - layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_AP; - err = sta_info_insert_rcu(sta); if (err) { rcu_read_unlock(); return err; } - if (layer2_update) - cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); - rcu_read_unlock(); return 0; @@ -1682,10 +1675,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, sta->sdata = vlansdata; ieee80211_check_fast_xmit(sta); - if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); - - cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); + } } err = sta_apply_parameters(local, sta, params); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 95eb8220e2e4..5fb368cc2633 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1979,6 +1979,10 @@ int sta_info_move_state(struct sta_info *sta, ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); } + if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_AP) + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); break; default: break; -- cgit From c5c1a030a7dbf8dd4e1fa4405ae9a89dc1d2a8db Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 11 Sep 2019 02:58:05 -0700 Subject: net/rds: An rds_sock is added too early to the hash table In rds_bind(), an rds_sock is added to the RDS bind hash table before rs_transport is set. This means that the socket can be found by the receive code path when rs_transport is NULL. And the receive code path de-references rs_transport for congestion update check. This can cause a panic. An rds_sock should not be added to the bind hash table before all the needed fields are set. Reported-by: syzbot+4b4f8163c2e246df3c4c@syzkaller.appspotmail.com Signed-off-by: Ka-Cheong Poon Signed-off-by: David S. Miller --- net/rds/bind.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 0f4398e7f2a7..05464fd7c17a 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -239,34 +239,30 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - sock_set_flag(sk, SOCK_RCU_FREE); - ret = rds_add_bound(rs, binding_addr, &port, scope_id); - if (ret) - goto out; - - if (rs->rs_transport) { /* previously bound */ + /* The transport can be set using SO_RDS_TRANSPORT option before the + * socket is bound. + */ + if (rs->rs_transport) { trans = rs->rs_transport; if (trans->laddr_check(sock_net(sock->sk), binding_addr, scope_id) != 0) { ret = -ENOPROTOOPT; - rds_remove_bound(rs); - } else { - ret = 0; + goto out; } - goto out; - } - trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr, - scope_id); - if (!trans) { - ret = -EADDRNOTAVAIL; - rds_remove_bound(rs); - pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n", - __func__, binding_addr); - goto out; + } else { + trans = rds_trans_get_preferred(sock_net(sock->sk), + binding_addr, scope_id); + if (!trans) { + ret = -EADDRNOTAVAIL; + pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n", + __func__, binding_addr); + goto out; + } + rs->rs_transport = trans; } - rs->rs_transport = trans; - ret = 0; + sock_set_flag(sk, SOCK_RCU_FREE); + ret = rds_add_bound(rs, binding_addr, &port, scope_id); out: release_sock(sk); -- cgit From 90aa11f1bc5dbb0c392775ed63ced23a3873bcd2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 11:38:48 +0100 Subject: NFC: st95hf: fix spelling mistake "receieve" -> "receive" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/nfc/st95hf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index e42850095892..7eda62a9e0df 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -316,7 +316,7 @@ static int st95hf_echo_command(struct st95hf_context *st95context) &echo_response); if (result) { dev_err(&st95context->spicontext.spidev->dev, - "err: echo response receieve error = 0x%x\n", result); + "err: echo response receive error = 0x%x\n", result); return result; } -- cgit From b93fb20f016c057d8a48c4ad62d493dfc5096d0c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 12:37:34 +0100 Subject: net: lmc: fix spelling mistake "runnin" -> "running" There is a spelling mistake in the lmc_trace message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index d74349628db2..0e6a51525d91 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1115,7 +1115,7 @@ static void lmc_running_reset (struct net_device *dev) /*fold00*/ sc->lmc_cmdmode |= (TULIP_CMD_TXRUN | TULIP_CMD_RXRUN); LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode); - lmc_trace(dev, "lmc_runnin_reset_out"); + lmc_trace(dev, "lmc_running_reset_out"); } -- cgit From c3dc1fa72249e4472b90ecef4dbafe25f0f07889 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 15:08:16 +0100 Subject: net: hns3: fix spelling mistake "undeflow" -> "underflow" There is a spelling mistake in a .msg literal string. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 0a7243825e7b..85a3b062371d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -98,7 +98,7 @@ static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow", .reset_level = HNAE3_GLOBAL_RESET }, - { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow", + { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow", .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(3), .msg = "tx_buf_overflow", .reset_level = HNAE3_GLOBAL_RESET }, -- cgit From f4b752a6b2708bfdf7fbe8a241082c8104f4ce05 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 15:18:11 +0100 Subject: mlx4: fix spelling mistake "veify" -> "verify" There is a spelling mistake in a mlx4_err error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1f6e16d5ea6b..309470ec0219 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2240,7 +2240,7 @@ static int mlx4_validate_optimized_steering(struct mlx4_dev *dev) for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_dev_port(dev, i, &port_cap)) { mlx4_err(dev, - "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n"); + "QUERY_DEV_CAP command failed, can't verify DMFS high rate steering.\n"); } else if ((dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_DEFAULT) && (port_cap.dmfs_optimized_state == -- cgit From 377228accbbb8b9738f615d791aa803f41c067e0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 4 Sep 2019 08:07:11 -0700 Subject: ixgbe: Prevent u8 wrapping of ITR value to something less than 10us There were a couple cases where the ITR value generated via the adaptive ITR scheme could exceed 126. This resulted in the value becoming either 0 or something less than 10. Switching back and forth between a value less than 10 and a value greater than 10 can cause issues as certain hardware features such as RSC to not function well when the ITR value has dropped that low. CC: stable@vger.kernel.org Fixes: b4ded8327fea ("ixgbe: Update adaptive ITR algorithm") Reported-by: Gregg Leventhal Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7882148abb43..77ca9005dc41 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2621,7 +2621,7 @@ adjust_by_size: /* 16K ints/sec to 9.2K ints/sec */ avg_wire_size *= 15; avg_wire_size += 11452; - } else if (avg_wire_size <= 1980) { + } else if (avg_wire_size < 1968) { /* 9.2K ints/sec to 8K ints/sec */ avg_wire_size *= 5; avg_wire_size += 22420; @@ -2654,6 +2654,8 @@ adjust_by_size: case IXGBE_LINK_SPEED_2_5GB_FULL: case IXGBE_LINK_SPEED_1GB_FULL: case IXGBE_LINK_SPEED_10_FULL: + if (avg_wire_size > 8064) + avg_wire_size = 8064; itr += DIV_ROUND_UP(avg_wire_size, IXGBE_ITR_ADAPTIVE_MIN_INC * 64) * IXGBE_ITR_ADAPTIVE_MIN_INC; -- cgit From bf280c0387ebbf8eebad1036fca8f7b85ebfde32 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 22 Aug 2019 20:12:37 +0300 Subject: ixgbe: fix double clean of Tx descriptors with xdp Tx code doesn't clear the descriptors' status after cleaning. So, if the budget is larger than number of used elems in a ring, some descriptors will be accounted twice and xsk_umem_complete_tx will move prod_tail far beyond the prod_head breaking the completion queue ring. Fix that by limiting the number of descriptors to clean by the number of used descriptors in the Tx ring. 'ixgbe_clean_xdp_tx_irq()' function refactored to look more like 'ixgbe_xsk_clean_tx_ring()' since we're allowed to directly use 'next_to_clean' and 'next_to_use' indexes. CC: stable@vger.kernel.org Fixes: 8221c5eba8c1 ("ixgbe: add AF_XDP zero-copy Tx support") Signed-off-by: Ilya Maximets Tested-by: William Tu Tested-by: Eelco Chaudron Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 29 +++++++++++----------------- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6b609553329f..a3b6d8c89127 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -633,19 +633,17 @@ static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring, bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *tx_ring, int napi_budget) { + u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; unsigned int total_packets = 0, total_bytes = 0; - u32 i = tx_ring->next_to_clean, xsk_frames = 0; - unsigned int budget = q_vector->tx.work_limit; struct xdp_umem *umem = tx_ring->xsk_umem; union ixgbe_adv_tx_desc *tx_desc; struct ixgbe_tx_buffer *tx_bi; - bool xmit_done; + u32 xsk_frames = 0; - tx_bi = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC(tx_ring, i); - i -= tx_ring->count; + tx_bi = &tx_ring->tx_buffer_info[ntc]; + tx_desc = IXGBE_TX_DESC(tx_ring, ntc); - do { + while (ntc != ntu) { if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; @@ -661,22 +659,18 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, tx_bi++; tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; + ntc++; + if (unlikely(ntc == tx_ring->count)) { + ntc = 0; tx_bi = tx_ring->tx_buffer_info; tx_desc = IXGBE_TX_DESC(tx_ring, 0); } /* issue prefetch for next Tx descriptor */ prefetch(tx_desc); + } - /* update budget accounting */ - budget--; - } while (likely(budget)); - - i += tx_ring->count; - tx_ring->next_to_clean = i; + tx_ring->next_to_clean = ntc; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; @@ -688,8 +682,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); - xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); - return budget > 0 && xmit_done; + return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); } int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid) -- cgit From af38d07ed391b21f7405fa1f936ca9686787d6d2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 9 Sep 2019 16:56:02 -0400 Subject: tcp: fix tcp_ecn_withdraw_cwr() to clear TCP_ECN_QUEUE_CWR Fix tcp_ecn_withdraw_cwr() to clear the correct bit: TCP_ECN_QUEUE_CWR. Rationale: basically, TCP_ECN_DEMAND_CWR is a bit that is purely about the behavior of data receivers, and deciding whether to reflect incoming IP ECN CE marks as outgoing TCP th->ece marks. The TCP_ECN_QUEUE_CWR bit is purely about the behavior of data senders, and deciding whether to send CWR. The tcp_ecn_withdraw_cwr() function is only called from tcp_undo_cwnd_reduction() by data senders during an undo, so it should zero the sender-side state, TCP_ECN_QUEUE_CWR. It does not make sense to stop the reflection of incoming CE bits on incoming data packets just because outgoing packets were spuriously retransmitted. The bug has been reproduced with packetdrill to manifest in a scenario with RFC3168 ECN, with an incoming data packet with CE bit set and carrying a TCP timestamp value that causes cwnd undo. Before this fix, the IP CE bit was ignored and not reflected in the TCP ECE header bit, and sender sent a TCP CWR ('W') bit on the next outgoing data packet, even though the cwnd reduction had been undone. After this fix, the sender properly reflects the CE bit and does not set the W bit. Note: the bug actually predates 2005 git history; this Fixes footer is chosen to be the oldest SHA1 I have tested (from Sep 2007) for which the patch applies cleanly (since before this commit the code was in a .h file). Fixes: bdf1ee5d3bd3 ("[TCP]: Move code from tcp_ecn.h to tcp*.c and tcp.h & remove it") Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Cc: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c21e8a22fb3b..8a1cd93dbb09 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -266,7 +266,7 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) { - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) -- cgit From 77f22f92dff8e7b45c7786a430626d38071d4670 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Sep 2019 18:56:57 +0800 Subject: tun: fix use-after-free when register netdev failed I got a UAF repport in tun driver when doing fuzzy test: [ 466.269490] ================================================================== [ 466.271792] BUG: KASAN: use-after-free in tun_chr_read_iter+0x2ca/0x2d0 [ 466.271806] Read of size 8 at addr ffff888372139250 by task tun-test/2699 [ 466.271810] [ 466.271824] CPU: 1 PID: 2699 Comm: tun-test Not tainted 5.3.0-rc1-00001-g5a9433db2614-dirty #427 [ 466.271833] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 466.271838] Call Trace: [ 466.271858] dump_stack+0xca/0x13e [ 466.271871] ? tun_chr_read_iter+0x2ca/0x2d0 [ 466.271890] print_address_description+0x79/0x440 [ 466.271906] ? vprintk_func+0x5e/0xf0 [ 466.271920] ? tun_chr_read_iter+0x2ca/0x2d0 [ 466.271935] __kasan_report+0x15c/0x1df [ 466.271958] ? tun_chr_read_iter+0x2ca/0x2d0 [ 466.271976] kasan_report+0xe/0x20 [ 466.271987] tun_chr_read_iter+0x2ca/0x2d0 [ 466.272013] do_iter_readv_writev+0x4b7/0x740 [ 466.272032] ? default_llseek+0x2d0/0x2d0 [ 466.272072] do_iter_read+0x1c5/0x5e0 [ 466.272110] vfs_readv+0x108/0x180 [ 466.299007] ? compat_rw_copy_check_uvector+0x440/0x440 [ 466.299020] ? fsnotify+0x888/0xd50 [ 466.299040] ? __fsnotify_parent+0xd0/0x350 [ 466.299064] ? fsnotify_first_mark+0x1e0/0x1e0 [ 466.304548] ? vfs_write+0x264/0x510 [ 466.304569] ? ksys_write+0x101/0x210 [ 466.304591] ? do_preadv+0x116/0x1a0 [ 466.304609] do_preadv+0x116/0x1a0 [ 466.309829] do_syscall_64+0xc8/0x600 [ 466.309849] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 466.309861] RIP: 0033:0x4560f9 [ 466.309875] Code: 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 [ 466.309889] RSP: 002b:00007ffffa5166e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000127 [ 466.322992] RAX: ffffffffffffffda RBX: 0000000000400460 RCX: 00000000004560f9 [ 466.322999] RDX: 0000000000000003 RSI: 00000000200008c0 RDI: 0000000000000003 [ 466.323007] RBP: 00007ffffa516700 R08: 0000000000000004 R09: 0000000000000000 [ 466.323014] R10: 0000000000000000 R11: 0000000000000206 R12: 000000000040cb10 [ 466.323021] R13: 0000000000000000 R14: 00000000006d7018 R15: 0000000000000000 [ 466.323057] [ 466.323064] Allocated by task 2605: [ 466.335165] save_stack+0x19/0x80 [ 466.336240] __kasan_kmalloc.constprop.8+0xa0/0xd0 [ 466.337755] kmem_cache_alloc+0xe8/0x320 [ 466.339050] getname_flags+0xca/0x560 [ 466.340229] user_path_at_empty+0x2c/0x50 [ 466.341508] vfs_statx+0xe6/0x190 [ 466.342619] __do_sys_newstat+0x81/0x100 [ 466.343908] do_syscall_64+0xc8/0x600 [ 466.345303] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 466.347034] [ 466.347517] Freed by task 2605: [ 466.348471] save_stack+0x19/0x80 [ 466.349476] __kasan_slab_free+0x12e/0x180 [ 466.350726] kmem_cache_free+0xc8/0x430 [ 466.351874] putname+0xe2/0x120 [ 466.352921] filename_lookup+0x257/0x3e0 [ 466.354319] vfs_statx+0xe6/0x190 [ 466.355498] __do_sys_newstat+0x81/0x100 [ 466.356889] do_syscall_64+0xc8/0x600 [ 466.358037] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 466.359567] [ 466.360050] The buggy address belongs to the object at ffff888372139100 [ 466.360050] which belongs to the cache names_cache of size 4096 [ 466.363735] The buggy address is located 336 bytes inside of [ 466.363735] 4096-byte region [ffff888372139100, ffff88837213a100) [ 466.367179] The buggy address belongs to the page: [ 466.368604] page:ffffea000dc84e00 refcount:1 mapcount:0 mapping:ffff8883df1b4f00 index:0x0 compound_mapcount: 0 [ 466.371582] flags: 0x2fffff80010200(slab|head) [ 466.372910] raw: 002fffff80010200 dead000000000100 dead000000000122 ffff8883df1b4f00 [ 466.375209] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 466.377778] page dumped because: kasan: bad access detected [ 466.379730] [ 466.380288] Memory state around the buggy address: [ 466.381844] ffff888372139100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.384009] ffff888372139180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.386131] >ffff888372139200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.388257] ^ [ 466.390234] ffff888372139280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.392512] ffff888372139300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.394667] ================================================================== tun_chr_read_iter() accessed the memory which freed by free_netdev() called by tun_set_iff(): CPUA CPUB tun_set_iff() alloc_netdev_mqs() tun_attach() tun_chr_read_iter() tun_get() tun_do_read() tun_ring_recv() register_netdevice() <-- inject error goto err_detach tun_detach_all() <-- set RCV_SHUTDOWN free_netdev() <-- called from err_free_dev path netdev_freemem() <-- free the memory without check refcount (In this path, the refcount cannot prevent freeing the memory of dev, and the memory will be used by dev_put() called by tun_chr_read_iter() on CPUB.) (Break from tun_ring_recv(), because RCV_SHUTDOWN is set) tun_put() dev_put() <-- use the memory freed by netdev_freemem() Put the publishing of tfile->tun after register_netdevice(), so tun_get() won't get the tun pointer that freed by err_detach path if register_netdevice() failed. Fixes: eb0fb363f920 ("tuntap: attach queue 0 before registering netdevice") Reported-by: Hulk Robot Suggested-by: Jason Wang Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/tun.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index db16d7a13e00..aab0be40d443 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -787,7 +787,8 @@ static void tun_detach_all(struct net_device *dev) } static int tun_attach(struct tun_struct *tun, struct file *file, - bool skip_filter, bool napi, bool napi_frags) + bool skip_filter, bool napi, bool napi_frags, + bool publish_tun) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; @@ -870,7 +871,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, * initialized tfile; otherwise we risk using half-initialized * object. */ - rcu_assign_pointer(tfile->tun, tun); + if (publish_tun) + rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; tun_set_real_num_queues(tun); @@ -2730,7 +2732,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS); + ifr->ifr_flags & IFF_NAPI_FRAGS, true); if (err < 0) return err; @@ -2829,13 +2831,17 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS); + ifr->ifr_flags & IFF_NAPI_FRAGS, false); if (err < 0) goto err_free_flow; err = register_netdevice(tun->dev); if (err < 0) goto err_detach; + /* free_netdev() won't check refcnt, to aovid race + * with dev_put() we need publish tun after registration. + */ + rcu_assign_pointer(tfile->tun, tun); } netif_carrier_on(tun->dev); @@ -2978,7 +2984,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) if (ret < 0) goto unlock; ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, - tun->flags & IFF_NAPI_FRAGS); + tun->flags & IFF_NAPI_FRAGS, true); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) -- cgit From d23dbc479a8e813db4161a695d67da0e36557846 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 10 Sep 2019 13:29:59 +0200 Subject: ipv6: Fix the link time qualifier of 'ping_v6_proc_exit_net()' The '.exit' functions from 'pernet_operations' structure should be marked as __net_exit, not __net_init. Fixes: d862e5461423 ("net: ipv6: Implement /proc/net/icmp6.") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- net/ipv6/ping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 87d2d8c1db7c..98ac32b49d8c 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -223,7 +223,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net) return 0; } -static void __net_init ping_v6_proc_exit_net(struct net *net) +static void __net_exit ping_v6_proc_exit_net(struct net *net) { remove_proc_entry("icmp6", net->proc_net); } -- cgit From 10cc514f451a0f239aa34f91bc9dc954a9397840 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 10 Sep 2019 14:02:57 -0600 Subject: net: Fix null de-reference of device refcount In event of failure during register_netdevice, free_netdev is invoked immediately. free_netdev assumes that all the netdevice refcounts have been dropped prior to it being called and as a result frees and clears out the refcount pointer. However, this is not necessarily true as some of the operations in the NETDEV_UNREGISTER notifier handlers queue RCU callbacks for invocation after a grace period. The IPv4 callback in_dev_rcu_put tries to access the refcount after free_netdev is called which leads to a null de-reference- 44837.761523: <6> Unable to handle kernel paging request at virtual address 0000004a88287000 44837.761651: <2> pc : in_dev_finish_destroy+0x4c/0xc8 44837.761654: <2> lr : in_dev_finish_destroy+0x2c/0xc8 44837.762393: <2> Call trace: 44837.762398: <2> in_dev_finish_destroy+0x4c/0xc8 44837.762404: <2> in_dev_rcu_put+0x24/0x30 44837.762412: <2> rcu_nocb_kthread+0x43c/0x468 44837.762418: <2> kthread+0x118/0x128 44837.762424: <2> ret_from_fork+0x10/0x1c Fix this by waiting for the completion of the call_rcu() in case of register_netdevice errors. Fixes: 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") Cc: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 0891f499c1bb..5156c0edebe8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8758,6 +8758,8 @@ int register_netdevice(struct net_device *dev) ret = notifier_to_errno(ret); if (ret) { rollback_registered(dev); + rcu_barrier(); + dev->reg_state = NETREG_UNREGISTERED; } /* -- cgit From a21b7f0cff1906a93a0130b74713b15a0b36481d Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 11 Sep 2019 10:09:02 -0500 Subject: net: qrtr: fix memort leak in qrtr_tun_write_iter In qrtr_tun_write_iter the allocated kbuf should be release in case of error or success return. v2 Update: Thanks to David Miller for pointing out the release on success path as well. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- net/qrtr/tun.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index ccff1e544c21..e35869e81766 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -84,11 +84,14 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!kbuf) return -ENOMEM; - if (!copy_from_iter_full(kbuf, len, from)) + if (!copy_from_iter_full(kbuf, len, from)) { + kfree(kbuf); return -EFAULT; + } ret = qrtr_endpoint_post(&tun->ep, kbuf, len); + kfree(kbuf); return ret < 0 ? ret : len; } -- cgit From f39b683d35dfa93a58f1b400a8ec0ff81296b37c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 12 Sep 2019 13:01:44 +0200 Subject: ixgbe: Fix secpath usage for IPsec TX offload. The ixgbe driver currently does IPsec TX offloading based on an existing secpath. However, the secpath can also come from the RX side, in this case it is misinterpreted for TX offload and the packets are dropped with a "bad sa_idx" error. Fix this by using the xfrm_offload() function to test for TX offload. Fixes: 592594704761 ("ixgbe: process the Tx ipsec offload") Reported-by: Michael Marley Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 77ca9005dc41..51c696b6bf64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ixgbe.h" #include "ixgbe_common.h" @@ -8697,7 +8698,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, #endif /* IXGBE_FCOE */ #ifdef CONFIG_IXGBE_IPSEC - if (secpath_exists(skb) && + if (xfrm_offload(skb) && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif -- cgit From b456d72412ca8797234449c25815e82f4e1426c0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 11 Sep 2019 18:02:39 +0200 Subject: sctp: Fix the link time qualifier of 'sctp_ctrlsock_exit()' The '.exit' functions from 'pernet_operations' structure should be marked as __net_exit, not __net_init. Fixes: 8e2d61e0aed2 ("sctp: fix race on protocol/netns initialization") Signed-off-by: Christophe JAILLET Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2d47adcb4cbe..53746ffeeca3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1336,7 +1336,7 @@ static int __net_init sctp_ctrlsock_init(struct net *net) return status; } -static void __net_init sctp_ctrlsock_exit(struct net *net) +static void __net_exit sctp_ctrlsock_exit(struct net *net) { /* Free the control endpoint. */ inet_ctl_sock_destroy(net->sctp.ctl_sock); -- cgit From 8f6617badcc96a582678ea36ea96490c5ff26eb4 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 12 Sep 2019 12:07:34 -0700 Subject: ixgbevf: Fix secpath usage for IPsec Tx offload Port the same fix for ixgbe to ixgbevf. The ixgbevf driver currently does IPsec Tx offloading based on an existing secpath. However, the secpath can also come from the Rx side, in this case it is misinterpreted for Tx offload and the packets are dropped with a "bad sa_idx" error. Fix this by using the xfrm_offload() function to test for Tx offload. CC: Shannon Nelson Fixes: 7f68d4306701 ("ixgbevf: enable VF IPsec offload operations") Reported-by: Jonathan Tooker Signed-off-by: Jeff Kirsher Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d2b41f9f87f8..72872d6ca80c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "ixgbevf.h" @@ -4161,7 +4162,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, first->protocol = vlan_get_protocol(skb); #ifdef CONFIG_IXGBEVF_IPSEC - if (secpath_exists(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) + if (xfrm_offload(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx); -- cgit From 8e2ef6abd4653b1cfeefe262017ecdd5eaf40fef Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 12 Sep 2019 12:02:17 +0800 Subject: sctp: change return type of sctp_get_port_local Currently sctp_get_port_local() returns a long which is either 0,1 or a pointer casted to long. It's neither of the callers use the return value since commit 62208f12451f ("net: sctp: simplify sctp_get_port"). Now two callers are sctp_get_port and sctp_do_bind, they actually assumend a casted to an int was the same as a pointer casted to a long, and they don't save the return value just check whether it is zero or non-zero, so it would better change return type from long to int for sctp_get_port_local. Signed-off-by: Mao Wenan Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ad8751891b00..1e26606683a8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -309,7 +309,7 @@ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) return retval; } -static long sctp_get_port_local(struct sock *, union sctp_addr *); +static int sctp_get_port_local(struct sock *, union sctp_addr *); /* Verify this is a valid sockaddr. */ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, @@ -7999,7 +7999,7 @@ static void sctp_unhash(struct sock *sk) static struct sctp_bind_bucket *sctp_bucket_create( struct sctp_bind_hashbucket *head, struct net *, unsigned short snum); -static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) +static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_sock *sp = sctp_sk(sk); bool reuse = (sk->sk_reuse || sp->reuse); @@ -8109,7 +8109,7 @@ pp_found: if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, sp2, sp)) { - ret = (long)sk2; + ret = 1; goto fail_unlock; } } @@ -8181,7 +8181,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) addr.v4.sin_port = htons(snum); /* Note: sk->sk_num gets filled in if ephemeral port request. */ - return !!sctp_get_port_local(sk, &addr); + return sctp_get_port_local(sk, &addr); } /* -- cgit From e0e4b8de10ad3affe83cc91a1cf6d1e48cca333e Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 12 Sep 2019 12:02:18 +0800 Subject: sctp: remove redundant assignment when call sctp_get_port_local There are more parentheses in if clause when call sctp_get_port_local in sctp_do_bind, and redundant assignment to 'ret'. This patch is to do cleanup. Signed-off-by: Mao Wenan Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1e26606683a8..5fe26ca6edbd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -399,9 +399,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) * detection. */ addr->v4.sin_port = htons(snum); - if ((ret = sctp_get_port_local(sk, addr))) { + if (sctp_get_port_local(sk, addr)) return -EADDRINUSE; - } /* Refresh ephemeral port. */ if (!bp->port) -- cgit From 29b99f54a8e63f871cf4060406d09fd8b0d744d8 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 12 Sep 2019 12:02:19 +0800 Subject: sctp: destroy bucket if failed to bind addr There is one memory leak bug report: BUG: memory leak unreferenced object 0xffff8881dc4c5ec0 (size 40): comm "syz-executor.0", pid 5673, jiffies 4298198457 (age 27.578s) hex dump (first 32 bytes): 02 00 00 00 81 88 ff ff 00 00 00 00 00 00 00 00 ................ f8 63 3d c1 81 88 ff ff 00 00 00 00 00 00 00 00 .c=............. backtrace: [<0000000072006339>] sctp_get_port_local+0x2a1/0xa00 [sctp] [<00000000c7b379ec>] sctp_do_bind+0x176/0x2c0 [sctp] [<000000005be274a2>] sctp_bind+0x5a/0x80 [sctp] [<00000000b66b4044>] inet6_bind+0x59/0xd0 [ipv6] [<00000000c68c7f42>] __sys_bind+0x120/0x1f0 net/socket.c:1647 [<000000004513635b>] __do_sys_bind net/socket.c:1658 [inline] [<000000004513635b>] __se_sys_bind net/socket.c:1656 [inline] [<000000004513635b>] __x64_sys_bind+0x3e/0x50 net/socket.c:1656 [<0000000061f2501e>] do_syscall_64+0x72/0x2e0 arch/x86/entry/common.c:296 [<0000000003d1e05e>] entry_SYSCALL_64_after_hwframe+0x49/0xbe This is because in sctp_do_bind, if sctp_get_port_local is to create hash bucket successfully, and sctp_add_bind_addr failed to bind address, e.g return -ENOMEM, so memory leak found, it needs to destroy allocated bucket. Reported-by: Hulk Robot Signed-off-by: Mao Wenan Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fe26ca6edbd..b083d4e66230 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -412,11 +412,13 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len, SCTP_ADDR_SRC, GFP_ATOMIC); - /* Copy back into socket for getsockname() use. */ - if (!ret) { - inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); - sp->pf->to_sk_saddr(addr, sk); + if (ret) { + sctp_put_port(sk); + return ret; } + /* Copy back into socket for getsockname() use. */ + inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); + sp->pf->to_sk_saddr(addr, sk); return ret; } -- cgit From 4d7ffcf3bf1be98d876c570cab8fc31d9fa92725 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Thu, 12 Sep 2019 10:42:00 +0200 Subject: cdc_ether: fix rndis support for Mediatek based smartphones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Mediatek based smartphone owner reports problems with USB tethering in Linux. The verbose USB listing shows a rndis_host interface pair (e0/01/03 + 10/00/00), but the driver fails to bind with [ 355.960428] usb 1-4: bad CDC descriptors The problem is a failsafe test intended to filter out ACM serial functions using the same 02/02/ff class/subclass/protocol as RNDIS. The serial functions are recognized by their non-zero bmCapabilities. No RNDIS function with non-zero bmCapabilities were known at the time this failsafe was added. But it turns out that some Wireless class RNDIS functions are using the bmCapabilities field. These functions are uniquely identified as RNDIS by their class/subclass/protocol, so the failing test can safely be disabled. The same applies to the two types of Misc class RNDIS functions. Applying the failsafe to Communication class functions only retains the original functionality, and fixes the problem for the Mediatek based smartphone. Tow examples of CDC functional descriptors with non-zero bmCapabilities from Wireless class RNDIS functions are: 0e8d:000a Mediatek Crosscall Spider X5 3G Phone CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x0f connection notifications sends break line coding and serial state get/set/clear comm features CDC Union: bMasterInterface 0 bSlaveInterface 1 CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 and 19d2:1023 ZTE K4201-z CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 CDC Union: bMasterInterface 0 bSlaveInterface 1 The Mediatek example is believed to apply to most smartphones with Mediatek firmware. The ZTE example is most likely also part of a larger family of devices/firmwares. Suggested-by: Lars Melin Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8458e88c18e9..32f53de5b1fe 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -206,7 +206,15 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) goto bad_desc; } skip: - if (rndis && header.usb_cdc_acm_descriptor && + /* Communcation class functions with bmCapabilities are not + * RNDIS. But some Wireless class RNDIS functions use + * bmCapabilities for their own purpose. The failsafe is + * therefore applied only to Communication class RNDIS + * functions. The rndis test is redundant, but a cheap + * optimization. + */ + if (rndis && is_rndis(&intf->cur_altsetting->desc) && + header.usb_cdc_acm_descriptor && header.usb_cdc_acm_descriptor->bmCapabilities) { dev_dbg(&intf->dev, "ACM capabilities %02x, not really RNDIS?\n", -- cgit