From 5de55ce951a1466e31ff68a7bc6b0a7ce3cb5947 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 17 Aug 2020 11:19:26 -0400
Subject: xprtrdma: Release in-flight MRs on disconnect

Dan Aloni reports that when a server disconnects abruptly, a few
memory regions are left DMA mapped. Over time this leak could pin
enough I/O resources to slow or even deadlock an NFS/RDMA client.

I found that if a transport disconnects before pending Send and
FastReg WRs can be posted, the to-be-registered MRs are stranded on
the req's rl_registered list and never released -- since they
weren't posted, there's no Send completion to DMA unmap them.

Reported-by: Dan Aloni <dan@kernelim.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 75c646743df3..ca89f24a1590 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -933,6 +933,8 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req)
 
 	rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
 	rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
+
+	frwr_reset(req);
 }
 
 /* ASSUMPTION: the rb_allreqs list is stable for the duration,
-- 
cgit 


From 097930e85f90f252c44dc0d084598265dd44ca48 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <ll@simonwunderlich.de>
Date: Thu, 27 Aug 2020 17:34:48 +0200
Subject: batman-adv: bla: fix type misuse for backbone_gw hash indexing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems that due to a copy & paste error the void pointer
in batadv_choose_backbone_gw() is cast to the wrong type.

Fixing this by using "struct batadv_bla_backbone_gw" instead of "struct
batadv_bla_claim" which better matches the caller's side.

For now it seems that we were lucky because the two structs both have
their orig/vid and addr/vid in the beginning. However I stumbled over
this issue when I was trying to add some debug variables in front of
"orig" in batadv_backbone_gw, which caused hash lookups to fail.

Fixes: 07568d0369f9 ("batman-adv: don't rely on positions in struct for hashing")
Signed-off-by: Linus Lüssing <ll@simonwunderlich.de>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/bridge_loop_avoidance.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 8500f56cbd10..d8c5d3170676 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -83,11 +83,12 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
  */
 static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
 {
-	const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
+	const struct batadv_bla_backbone_gw *gw;
 	u32 hash = 0;
 
-	hash = jhash(&claim->addr, sizeof(claim->addr), hash);
-	hash = jhash(&claim->vid, sizeof(claim->vid), hash);
+	gw = (struct batadv_bla_backbone_gw *)data;
+	hash = jhash(&gw->orig, sizeof(gw->orig), hash);
+	hash = jhash(&gw->vid, sizeof(gw->vid), hash);
 
 	return hash % size;
 }
-- 
cgit 


From cc8e58f8325cdf14b9516b61c384cdfd02a4f408 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 3 Sep 2020 19:10:11 -0700
Subject: act_ife: load meta modules before tcf_idr_check_alloc()

The following deadlock scenario is triggered by syzbot:

Thread A:				Thread B:
tcf_idr_check_alloc()
...
populate_metalist()
  rtnl_unlock()
					rtnl_lock()
					...
  request_module()			tcf_idr_check_alloc()
  rtnl_lock()

At this point, thread A is waiting for thread B to release RTNL
lock, while thread B is waiting for thread A to commit the IDR
change with tcf_idr_insert() later.

Break this deadlock situation by preloading ife modules earlier,
before tcf_idr_check_alloc(), this is fine because we only need
to load modules we need potentially.

Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com
Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_ife.c | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c1fcd85719d6..5c568757643b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -436,6 +436,25 @@ static void tcf_ife_cleanup(struct tc_action *a)
 		kfree_rcu(p, rcu);
 }
 
+static int load_metalist(struct nlattr **tb, bool rtnl_held)
+{
+	int i;
+
+	for (i = 1; i < max_metacnt; i++) {
+		if (tb[i]) {
+			void *val = nla_data(tb[i]);
+			int len = nla_len(tb[i]);
+			int rc;
+
+			rc = load_metaops_and_vet(i, val, len, rtnl_held);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 			     bool exists, bool rtnl_held)
 {
@@ -449,10 +468,6 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 			val = nla_data(tb[i]);
 			len = nla_len(tb[i]);
 
-			rc = load_metaops_and_vet(i, val, len, rtnl_held);
-			if (rc != 0)
-				return rc;
-
 			rc = add_metainfo(ife, i, val, len, exists);
 			if (rc)
 				return rc;
@@ -509,6 +524,21 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!p)
 		return -ENOMEM;
 
+	if (tb[TCA_IFE_METALST]) {
+		err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
+						  tb[TCA_IFE_METALST], NULL,
+						  NULL);
+		if (err) {
+			kfree(p);
+			return err;
+		}
+		err = load_metalist(tb2, rtnl_held);
+		if (err) {
+			kfree(p);
+			return err;
+		}
+	}
+
 	index = parm->index;
 	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0) {
@@ -570,15 +600,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (tb[TCA_IFE_METALST]) {
-		err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
-						  tb[TCA_IFE_METALST], NULL,
-						  NULL);
-		if (err)
-			goto metadata_parse_err;
 		err = populate_metalist(ife, tb2, exists, rtnl_held);
 		if (err)
 			goto metadata_parse_err;
-
 	} else {
 		/* if no passed metadata allow list or passed allow-all
 		 * then here we process by adding as many supported metadatum
-- 
cgit 


From 7dda5b3384121181c4e79f6eaeac2b94c0622c8d Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Fri, 4 Sep 2020 20:28:00 +0200
Subject: batman-adv: mcast/TT: fix wrongly dropped or rerouted packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unicast packet rerouting code makes several assumptions. For
instance it assumes that there is always exactly one destination in the
TT. This breaks for multicast frames in a unicast packets in several ways:

For one thing if there is actually no TT entry and the destination node
was selected due to the multicast tvlv flags it announced. Then an
intermediate node will wrongly drop the packet.

For another thing if there is a TT entry but the TTVN of this entry is
newer than the originally addressed destination node: Then the
intermediate node will wrongly redirect the packet, leading to
duplicated multicast packets at a multicast listener and missing
packets at other multicast listeners or multicast routers.

Fixing this by not applying the unicast packet rerouting to batman-adv
unicast packets with a multicast payload. We are not able to detect a
roaming multicast listener at the moment and will just continue to send
the multicast frame to both the new and old destination for a while in
case of such a roaming multicast listener.

Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/routing.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 27cdf5e4349a..9e5c71e406ff 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -826,6 +826,10 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	vid = batadv_get_vid(skb, hdr_len);
 	ethhdr = (struct ethhdr *)(skb->data + hdr_len);
 
+	/* do not reroute multicast frames in a unicast header */
+	if (is_multicast_ether_addr(ethhdr->h_dest))
+		return true;
+
 	/* check if the destination client was served by this node and it is now
 	 * roaming. In this case, it means that the node has got a ROAM_ADV
 	 * message and that it knows the new destination in the mesh to re-route
-- 
cgit 


From 8c6b6c793ed32b8f9770ebcdf1ba99af423c303b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sat, 5 Sep 2020 10:03:26 -0400
Subject: SUNRPC: stop printk reading past end of string

Since p points at raw xdr data, there's no guarantee that it's NULL
terminated, so we should give a length.  And probably escape any special
characters too.

Reported-by: Zhi Li <yieli@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/rpcb_clnt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c27123e6ba80..4a67685c83eb 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -982,8 +982,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 	p = xdr_inline_decode(xdr, len);
 	if (unlikely(p == NULL))
 		goto out_fail;
-	dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid,
-			req->rq_task->tk_msg.rpc_proc->p_name, (char *)p);
+	dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid,
+			req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p);
 
 	if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len,
 				sap, sizeof(address)) == 0)
-- 
cgit 


From e1f469cd5866499ac40bfdca87411e1c525a10c7 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 7 Sep 2020 15:54:41 +0000
Subject: Revert "netns: don't disable BHs when locking "nsid_lock""

This reverts commit 8d7e5dee972f1cde2ba96c621f1541fa36e7d4f4.

To protect netns id, the nsid_lock is used when netns id is being
allocated and removed by peernet2id_alloc() and unhash_nsid().
The nsid_lock can be used in BH context but only spin_lock() is used
in this code.
Using spin_lock() instead of spin_lock_bh() can result in a deadlock in
the following scenario reported by the lockdep.
In order to avoid a deadlock, the spin_lock_bh() should be used instead
of spin_lock() to acquire nsid_lock.

Test commands:
    ip netns del nst
    ip netns add nst
    ip link add veth1 type veth peer name veth2
    ip link set veth1 netns nst
    ip netns exec nst ip link add name br1 type bridge vlan_filtering 1
    ip netns exec nst ip link set dev br1 up
    ip netns exec nst ip link set dev veth1 master br1
    ip netns exec nst ip link set dev veth1 up
    ip netns exec nst ip link add macvlan0 link br1 up type macvlan

Splat looks like:
[   33.615860][  T607] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
[   33.617194][  T607] 5.9.0-rc1+ #665 Not tainted
[ ... ]
[   33.670615][  T607] Chain exists of:
[   33.670615][  T607]   &mc->mca_lock --> &bridge_netdev_addr_lock_key --> &net->nsid_lock
[   33.670615][  T607]
[   33.673118][  T607]  Possible interrupt unsafe locking scenario:
[   33.673118][  T607]
[   33.674599][  T607]        CPU0                    CPU1
[   33.675557][  T607]        ----                    ----
[   33.676516][  T607]   lock(&net->nsid_lock);
[   33.677306][  T607]                                local_irq_disable();
[   33.678517][  T607]                                lock(&mc->mca_lock);
[   33.679725][  T607]                                lock(&bridge_netdev_addr_lock_key);
[   33.681166][  T607]   <Interrupt>
[   33.681791][  T607]     lock(&mc->mca_lock);
[   33.682579][  T607]
[   33.682579][  T607]  *** DEADLOCK ***
[ ... ]
[   33.922046][  T607] stack backtrace:
[   33.922999][  T607] CPU: 3 PID: 607 Comm: ip Not tainted 5.9.0-rc1+ #665
[   33.924099][  T607] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[   33.925714][  T607] Call Trace:
[   33.926238][  T607]  dump_stack+0x78/0xab
[   33.926905][  T607]  check_irq_usage+0x70b/0x720
[   33.927708][  T607]  ? iterate_chain_key+0x60/0x60
[   33.928507][  T607]  ? check_path+0x22/0x40
[   33.929201][  T607]  ? check_noncircular+0xcf/0x180
[   33.930024][  T607]  ? __lock_acquire+0x1952/0x1f20
[   33.930860][  T607]  __lock_acquire+0x1952/0x1f20
[   33.931667][  T607]  lock_acquire+0xaf/0x3a0
[   33.932366][  T607]  ? peernet2id_alloc+0x3a/0x170
[   33.933147][  T607]  ? br_port_fill_attrs+0x54c/0x6b0 [bridge]
[   33.934140][  T607]  ? br_port_fill_attrs+0x5de/0x6b0 [bridge]
[   33.935113][  T607]  ? kvm_sched_clock_read+0x14/0x30
[   33.935974][  T607]  _raw_spin_lock+0x30/0x70
[   33.936728][  T607]  ? peernet2id_alloc+0x3a/0x170
[   33.937523][  T607]  peernet2id_alloc+0x3a/0x170
[   33.938313][  T607]  rtnl_fill_ifinfo+0xb5e/0x1400
[   33.939091][  T607]  rtmsg_ifinfo_build_skb+0x8a/0xf0
[   33.939953][  T607]  rtmsg_ifinfo_event.part.39+0x17/0x50
[   33.940863][  T607]  rtmsg_ifinfo+0x1f/0x30
[   33.941571][  T607]  __dev_notify_flags+0xa5/0xf0
[   33.942376][  T607]  ? __irq_work_queue_local+0x49/0x50
[   33.943249][  T607]  ? irq_work_queue+0x1d/0x30
[   33.943993][  T607]  ? __dev_set_promiscuity+0x7b/0x1a0
[   33.944878][  T607]  __dev_set_promiscuity+0x7b/0x1a0
[   33.945758][  T607]  dev_set_promiscuity+0x1e/0x50
[   33.946582][  T607]  br_port_set_promisc+0x1f/0x40 [bridge]
[   33.947487][  T607]  br_manage_promisc+0x8b/0xe0 [bridge]
[   33.948388][  T607]  __dev_set_promiscuity+0x123/0x1a0
[   33.949244][  T607]  __dev_set_rx_mode+0x68/0x90
[   33.950021][  T607]  dev_uc_add+0x50/0x60
[   33.950720][  T607]  macvlan_open+0x18e/0x1f0 [macvlan]
[   33.951601][  T607]  __dev_open+0xd6/0x170
[   33.952269][  T607]  __dev_change_flags+0x181/0x1d0
[   33.953056][  T607]  rtnl_configure_link+0x2f/0xa0
[   33.953884][  T607]  __rtnl_newlink+0x6b9/0x8e0
[   33.954665][  T607]  ? __lock_acquire+0x95d/0x1f20
[   33.955450][  T607]  ? lock_acquire+0xaf/0x3a0
[   33.956193][  T607]  ? is_bpf_text_address+0x5/0xe0
[   33.956999][  T607]  rtnl_newlink+0x47/0x70

Acked-by: Guillaume Nault <gnault@redhat.com>
Fixes: 8d7e5dee972f ("netns: don't disable BHs when locking "nsid_lock"")
Reported-by: syzbot+3f960c64a104eaa2c813@syzkaller.appspotmail.com
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/net_namespace.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dcd61aca343e..944ab214e5ae 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -251,10 +251,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 	if (refcount_read(&net->count) == 0)
 		return NETNSA_NSID_NOT_ASSIGNED;
 
-	spin_lock(&net->nsid_lock);
+	spin_lock_bh(&net->nsid_lock);
 	id = __peernet2id(net, peer);
 	if (id >= 0) {
-		spin_unlock(&net->nsid_lock);
+		spin_unlock_bh(&net->nsid_lock);
 		return id;
 	}
 
@@ -264,12 +264,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 	 * just been idr_remove()'d from there in cleanup_net().
 	 */
 	if (!maybe_get_net(peer)) {
-		spin_unlock(&net->nsid_lock);
+		spin_unlock_bh(&net->nsid_lock);
 		return NETNSA_NSID_NOT_ASSIGNED;
 	}
 
 	id = alloc_netid(net, peer, -1);
-	spin_unlock(&net->nsid_lock);
+	spin_unlock_bh(&net->nsid_lock);
 
 	put_net(peer);
 	if (id < 0)
@@ -534,20 +534,20 @@ static void unhash_nsid(struct net *net, struct net *last)
 	for_each_net(tmp) {
 		int id;
 
-		spin_lock(&tmp->nsid_lock);
+		spin_lock_bh(&tmp->nsid_lock);
 		id = __peernet2id(tmp, net);
 		if (id >= 0)
 			idr_remove(&tmp->netns_ids, id);
-		spin_unlock(&tmp->nsid_lock);
+		spin_unlock_bh(&tmp->nsid_lock);
 		if (id >= 0)
 			rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
 					  GFP_KERNEL);
 		if (tmp == last)
 			break;
 	}
-	spin_lock(&net->nsid_lock);
+	spin_lock_bh(&net->nsid_lock);
 	idr_destroy(&net->netns_ids);
-	spin_unlock(&net->nsid_lock);
+	spin_unlock_bh(&net->nsid_lock);
 }
 
 static LLIST_HEAD(cleanup_list);
@@ -760,9 +760,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return PTR_ERR(peer);
 	}
 
-	spin_lock(&net->nsid_lock);
+	spin_lock_bh(&net->nsid_lock);
 	if (__peernet2id(net, peer) >= 0) {
-		spin_unlock(&net->nsid_lock);
+		spin_unlock_bh(&net->nsid_lock);
 		err = -EEXIST;
 		NL_SET_BAD_ATTR(extack, nla);
 		NL_SET_ERR_MSG(extack,
@@ -771,7 +771,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	err = alloc_netid(net, peer, nsid);
-	spin_unlock(&net->nsid_lock);
+	spin_unlock_bh(&net->nsid_lock);
 	if (err >= 0) {
 		rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
 				  nlh, GFP_KERNEL);
-- 
cgit 


From 1cc5ef91d2ff94d2bf2de3b3585423e8a1051cb6 Mon Sep 17 00:00:00 2001
From: Will McVicker <willmcvicker@google.com>
Date: Mon, 24 Aug 2020 19:38:32 +0000
Subject: netfilter: ctnetlink: add a range check for l3/l4 protonum

The indexes to the nf_nat_l[34]protos arrays come from userspace. So
check the tuple's family, e.g. l3num, when creating the conntrack in
order to prevent an OOB memory access during setup.  Here is an example
kernel panic on 4.14.180 when userspace passes in an index greater than
NFPROTO_NUMPROTO.

Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
Modules linked in:...
Process poc (pid: 5614, stack limit = 0x00000000a3933121)
CPU: 4 PID: 5614 Comm: poc Tainted: G S      W  O    4.14.180-g051355490483
Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM
task: 000000002a3dfffe task.stack: 00000000a3933121
pc : __cfi_check_fail+0x1c/0x24
lr : __cfi_check_fail+0x1c/0x24
...
Call trace:
__cfi_check_fail+0x1c/0x24
name_to_dev_t+0x0/0x468
nfnetlink_parse_nat_setup+0x234/0x258
ctnetlink_parse_nat_setup+0x4c/0x228
ctnetlink_new_conntrack+0x590/0xc40
nfnetlink_rcv_msg+0x31c/0x4d4
netlink_rcv_skb+0x100/0x184
nfnetlink_rcv+0xf4/0x180
netlink_unicast+0x360/0x770
netlink_sendmsg+0x5a0/0x6a4
___sys_sendmsg+0x314/0x46c
SyS_sendmsg+0xb4/0x108
el0_svc_naked+0x34/0x38

This crash is not happening since 5.4+, however, ctnetlink still
allows for creating entries with unsupported layer 3 protocol number.

Fixes: c1d10adb4a521 ("[NETFILTER]: Add ctnetlink port for nf_conntrack")
Signed-off-by: Will McVicker <willmcvicker@google.com>
[pablo@netfilter.org: rebased original patch on top of nf.git]
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 832eabecfbdd..d65846aa8059 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1404,7 +1404,8 @@ ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
 	if (err < 0)
 		return err;
 
-
+	if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6)
+		return -EOPNOTSUPP;
 	tuple->src.l3num = l3num;
 
 	if (flags & CTA_FILTER_FLAG(CTA_IP_DST) ||
-- 
cgit 


From 67cc570edaa02016a8685a06a0ee91f05a6277d9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 27 Aug 2020 19:28:42 +0200
Subject: netfilter: nf_tables: coalesce multiple notifications into one skbuff

On x86_64, each notification results in one skbuff allocation which
consumes at least 768 bytes due to the skbuff overhead.

This patch coalesces several notifications into one single skbuff, so
each notification consumes at least ~211 bytes, that ~3.5 times less
memory consumption. As a result, this is reducing the chances to exhaust
the netlink socket receive buffer.

Rule of thumb is that each notification batch only contains netlink
messages whose report flag is the same, nfnetlink_send() requires this
to do appropriate delivery to userspace, either via unicast (echo
mode) or multicast (monitor mode).

The skbuff control buffer is used to annotate the report flag for later
handling at the new coalescing routine.

The batch skbuff notification size is NLMSG_GOODSIZE, using a larger
skbuff would allow for more socket receiver buffer savings (to amortize
the cost of the skbuff even more), however, going over that size might
break userspace applications, so let's be conservative and stick to
NLMSG_GOODSIZE.

Reported-by: Phil Sutter <phil@nwl.cc>
Acked-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 70 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 57 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b7dc1cbf40ea..4603b667973a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -684,6 +684,18 @@ nla_put_failure:
 	return -1;
 }
 
+struct nftnl_skb_parms {
+	bool report;
+};
+#define NFT_CB(skb)	(*(struct nftnl_skb_parms*)&((skb)->cb))
+
+static void nft_notify_enqueue(struct sk_buff *skb, bool report,
+			       struct list_head *notify_list)
+{
+	NFT_CB(skb).report = report;
+	list_add_tail(&skb->list, notify_list);
+}
+
 static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
@@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-		       ctx->report, GFP_KERNEL);
+	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-		       ctx->report, GFP_KERNEL);
+	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-		       ctx->report, GFP_KERNEL);
+	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report,
-		       gfp_flags);
+	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
-		       GFP_KERNEL);
+	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
 		goto err;
 	}
 
-	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+	nft_notify_enqueue(skb, report, &net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-		       ctx->report, GFP_KERNEL);
+	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net)
 	mutex_unlock(&net->nft.commit_mutex);
 }
 
+static void nft_commit_notify(struct net *net, u32 portid)
+{
+	struct sk_buff *batch_skb = NULL, *nskb, *skb;
+	unsigned char *data;
+	int len;
+
+	list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) {
+		if (!batch_skb) {
+new_batch:
+			batch_skb = skb;
+			len = NLMSG_GOODSIZE - skb->len;
+			list_del(&skb->list);
+			continue;
+		}
+		len -= skb->len;
+		if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) {
+			data = skb_put(batch_skb, skb->len);
+			memcpy(data, skb->data, skb->len);
+			list_del(&skb->list);
+			kfree_skb(skb);
+			continue;
+		}
+		nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES,
+			       NFT_CB(batch_skb).report, GFP_KERNEL);
+		goto new_batch;
+	}
+
+	if (batch_skb) {
+		nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES,
+			       NFT_CB(batch_skb).report, GFP_KERNEL);
+	}
+
+	WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
 	struct nft_trans *trans, *next;
@@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		}
 	}
 
+	nft_commit_notify(net, NETLINK_CB(skb).portid);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
 	nf_tables_commit_release(net);
 
@@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net)
 	INIT_LIST_HEAD(&net->nft.tables);
 	INIT_LIST_HEAD(&net->nft.commit_list);
 	INIT_LIST_HEAD(&net->nft.module_list);
+	INIT_LIST_HEAD(&net->nft.notify_list);
 	mutex_init(&net->nft.commit_mutex);
 	net->nft.base_seq = 1;
 	net->nft.validate_state = NFT_VALIDATE_SKIP;
@@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 	mutex_unlock(&net->nft.commit_mutex);
 	WARN_ON_ONCE(!list_empty(&net->nft.tables));
 	WARN_ON_ONCE(!list_empty(&net->nft.module_list));
+	WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
 }
 
 static struct pernet_operations nf_tables_net_ops = {
-- 
cgit 


From 6c0d95d1238d944fe54f0bbfc7ec017d78435daa Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Tue, 1 Sep 2020 08:56:19 +0200
Subject: netfilter: ctnetlink: fix mark based dump filtering regression

conntrack mark based dump filtering may falsely skip entries if a mask
is given: If the mask-based check does not filter out the entry, the
else-if check is always true and compares the mark without considering
the mask. The if/else-if logic seems wrong.

Given that the mask during filter setup is implicitly set to 0xffffffff
if not specified explicitly, the mark filtering flags seem to just
complicate things. Restore the previously used approach by always
matching against a zero mask is no filter mark is given.

Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump")
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d65846aa8059..c3a4214dc958 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -851,7 +851,6 @@ static int ctnetlink_done(struct netlink_callback *cb)
 }
 
 struct ctnetlink_filter {
-	u_int32_t cta_flags;
 	u8 family;
 
 	u_int32_t orig_flags;
@@ -906,10 +905,6 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
 					 struct nf_conntrack_zone *zone,
 					 u_int32_t flags);
 
-/* applied on filters */
-#define CTA_FILTER_F_CTA_MARK			(1 << 0)
-#define CTA_FILTER_F_CTA_MARK_MASK		(1 << 1)
-
 static struct ctnetlink_filter *
 ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 {
@@ -930,14 +925,10 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 #ifdef CONFIG_NF_CONNTRACK_MARK
 	if (cda[CTA_MARK]) {
 		filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-		filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK);
-
-		if (cda[CTA_MARK_MASK]) {
+		if (cda[CTA_MARK_MASK])
 			filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
-			filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK);
-		} else {
+		else
 			filter->mark.mask = 0xffffffff;
-		}
 	} else if (cda[CTA_MARK_MASK]) {
 		err = -EINVAL;
 		goto err_filter;
@@ -1117,11 +1108,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) &&
-	    (ct->mark & filter->mark.mask) != filter->mark.val)
-		goto ignore_entry;
-	else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) &&
-		 ct->mark != filter->mark.val)
+	if ((ct->mark & filter->mark.mask) != filter->mark.val)
 		goto ignore_entry;
 #endif
 
-- 
cgit 


From 526e81b990e53e31ba40ba304a2285ffd098721f Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Tue, 1 Sep 2020 16:56:02 +0200
Subject: netfilter: conntrack: nf_conncount_init is failing with IPv6 disabled

The openvswitch module fails initialization when used in a kernel
without IPv6 enabled. nf_conncount_init() fails because the ct code
unconditionally tries to initialize the netns IPv6 related bit,
regardless of the build option. The change below ignores the IPv6
part if not enabled.

Note that the corresponding _put() function already has this IPv6
configuration check.

Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 95f79980348c..47e9319d2cf3 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -565,6 +565,7 @@ static int nf_ct_netns_inet_get(struct net *net)
 	int err;
 
 	err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+#if IS_ENABLED(CONFIG_IPV6)
 	if (err < 0)
 		goto err1;
 	err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
@@ -575,6 +576,7 @@ static int nf_ct_netns_inet_get(struct net *net)
 err2:
 	nf_ct_netns_put(net, NFPROTO_IPV4);
 err1:
+#endif
 	return err;
 }
 
-- 
cgit 


From 0c92411bb81de9bc516d6924f50289d8d5f880e5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 3 Sep 2020 19:00:52 +0200
Subject: netfilter: nft_meta: use socket user_ns to retrieve skuid and skgid

... instead of using init_user_ns.

Fixes: 96518518cc41 ("netfilter: add nftables")
Tested-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_meta.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 7bc6537f3ccb..b37bd02448d8 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -147,11 +147,11 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key,
 
 	switch (key) {
 	case NFT_META_SKUID:
-		*dest = from_kuid_munged(&init_user_ns,
+		*dest = from_kuid_munged(sock_net(sk)->user_ns,
 					 sock->file->f_cred->fsuid);
 		break;
 	case NFT_META_SKGID:
-		*dest =	from_kgid_munged(&init_user_ns,
+		*dest =	from_kgid_munged(sock_net(sk)->user_ns,
 					 sock->file->f_cred->fsgid);
 		break;
 	default:
-- 
cgit 


From 0ff4628f4c6c1ab87eef9f16b25355cadc426d64 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Sep 2020 03:40:25 -0700
Subject: mac802154: tx: fix use-after-free

syzbot reported a bug in ieee802154_tx() [1]

A similar issue in ieee802154_xmit_worker() is also fixed in this patch.

[1]
BUG: KASAN: use-after-free in ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88
Read of size 4 at addr ffff8880251a8c70 by task syz-executor.3/928

CPU: 0 PID: 928 Comm: syz-executor.3 Not tainted 5.9.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383
 __kasan_report mm/kasan/report.c:513 [inline]
 kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
 ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88
 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130
 __netdev_start_xmit include/linux/netdevice.h:4634 [inline]
 netdev_start_xmit include/linux/netdevice.h:4648 [inline]
 dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203
 packet_snd net/packet/af_packet.c:2989 [inline]
 packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:671
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45d5b9
Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fc98e749c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 000000000002ccc0 RCX: 000000000045d5b9
RDX: 0000000000000000 RSI: 0000000020007780 RDI: 000000000000000b
RBP: 000000000118d020 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cfec
R13: 00007fff690c720f R14: 00007fc98e74a9c0 R15: 000000000118cfec

Allocated by task 928:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
 kasan_set_track mm/kasan/common.c:56 [inline]
 __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461
 slab_post_alloc_hook mm/slab.h:518 [inline]
 slab_alloc_node mm/slab.c:3254 [inline]
 kmem_cache_alloc_node+0x136/0x3e0 mm/slab.c:3574
 __alloc_skb+0x71/0x550 net/core/skbuff.c:198
 alloc_skb include/linux/skbuff.h:1094 [inline]
 alloc_skb_with_frags+0x92/0x570 net/core/skbuff.c:5771
 sock_alloc_send_pskb+0x72a/0x880 net/core/sock.c:2348
 packet_alloc_skb net/packet/af_packet.c:2837 [inline]
 packet_snd net/packet/af_packet.c:2932 [inline]
 packet_sendmsg+0x19fb/0x5290 net/packet/af_packet.c:3014
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:671
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Freed by task 928:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56
 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355
 __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422
 __cache_free mm/slab.c:3418 [inline]
 kmem_cache_free.part.0+0x74/0x1e0 mm/slab.c:3693
 kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:622
 __kfree_skb net/core/skbuff.c:679 [inline]
 consume_skb net/core/skbuff.c:838 [inline]
 consume_skb+0xcf/0x160 net/core/skbuff.c:832
 __dev_kfree_skb_any+0x9c/0xc0 net/core/dev.c:3107
 fakelb_hw_xmit+0x20e/0x2a0 drivers/net/ieee802154/fakelb.c:81
 drv_xmit_async net/mac802154/driver-ops.h:16 [inline]
 ieee802154_tx+0x282/0x480 net/mac802154/tx.c:81
 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130
 __netdev_start_xmit include/linux/netdevice.h:4634 [inline]
 netdev_start_xmit include/linux/netdevice.h:4648 [inline]
 dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203
 packet_snd net/packet/af_packet.c:2989 [inline]
 packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:671
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

The buggy address belongs to the object at ffff8880251a8c00
 which belongs to the cache skbuff_head_cache of size 224
The buggy address is located 112 bytes inside of
 224-byte region [ffff8880251a8c00, ffff8880251a8ce0)
The buggy address belongs to the page:
page:0000000062b6a4f1 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x251a8
flags: 0xfffe0000000200(slab)
raw: 00fffe0000000200 ffffea0000435c88 ffffea00028b6c08 ffff8880a9055d00
raw: 0000000000000000 ffff8880251a80c0 000000010000000c 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8880251a8b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8880251a8b80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc
>ffff8880251a8c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                             ^
 ffff8880251a8c80: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
 ffff8880251a8d00: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb

Fixes: 409c3b0c5f03 ("mac802154: tx: move stats tx increment")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Stefan Schmidt <stefan@datenfreihafen.org>
Cc: linux-wpan@vger.kernel.org
Link: https://lore.kernel.org/r/20200908104025.4009085-1-edumazet@google.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 net/mac802154/tx.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index ab52811523e9..c829e4a75325 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -34,11 +34,11 @@ void ieee802154_xmit_worker(struct work_struct *work)
 	if (res)
 		goto err_tx;
 
-	ieee802154_xmit_complete(&local->hw, skb, false);
-
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 
+	ieee802154_xmit_complete(&local->hw, skb, false);
+
 	return;
 
 err_tx:
@@ -78,6 +78,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
 
 	/* async is priority, otherwise sync is fallback */
 	if (local->ops->xmit_async) {
+		unsigned int len = skb->len;
+
 		ret = drv_xmit_async(local, skb);
 		if (ret) {
 			ieee802154_wake_queue(&local->hw);
@@ -85,7 +87,7 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
 		}
 
 		dev->stats.tx_packets++;
-		dev->stats.tx_bytes += skb->len;
+		dev->stats.tx_bytes += len;
 	} else {
 		local->tx_skb = skb;
 		queue_work(local->workqueue, &local->tx_work);
-- 
cgit 


From e6a18d36118bea3bf497c9df4d9988b6df120689 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 8 Sep 2020 00:04:10 +0200
Subject: bpf: Fix clobbering of r2 in bpf_gen_ld_abs

Bryce reported that he saw the following with:

  0:  r6 = r1
  1:  r1 = 12
  2:  r0 = *(u16 *)skb[r1]

The xlated sequence was incorrectly clobbering r2 with pointer
value of r6 ...

  0: (bf) r6 = r1
  1: (b7) r1 = 12
  2: (bf) r1 = r6
  3: (bf) r2 = r1
  4: (85) call bpf_skb_load_helper_16_no_cache#7692160

... and hence call to the load helper never succeeded given the
offset was too high. Fix it by reordering the load of r6 to r1.

Other than that the insn has similar calling convention than BPF
helpers, that is, r0 - r5 are scratch regs, so nothing else
affected after the insn.

Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf")
Reported-by: Bryce Kahle <bryce.kahle@datadoghq.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/cace836e4d07bb63b1a53e49c5dfb238a040c298.1599512096.git.daniel@iogearbox.net
---
 net/core/filter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index b2df52086445..2d62c25e0395 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7065,8 +7065,6 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig,
 	bool indirect = BPF_MODE(orig->code) == BPF_IND;
 	struct bpf_insn *insn = insn_buf;
 
-	/* We're guaranteed here that CTX is in R6. */
-	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
 	if (!indirect) {
 		*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
 	} else {
@@ -7074,6 +7072,8 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig,
 		if (orig->imm)
 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
 	}
+	/* We're guaranteed here that CTX is in R6. */
+	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
 
 	switch (BPF_SIZE(orig->code)) {
 	case BPF_B:
-- 
cgit 


From 2f1e8ea726e9020e01e9e2ae29c2d5eb11133032 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Tue, 8 Sep 2020 02:48:42 +0300
Subject: net: dsa: link interfaces with the DSA master to get rid of lockdep
 warnings

Since commit 845e0ebb4408 ("net: change addr_list_lock back to static
key"), cascaded DSA setups (DSA switch port as DSA master for another
DSA switch port) are emitting this lockdep warning:

============================================
WARNING: possible recursive locking detected
5.8.0-rc1-00133-g923e4b5032dd-dirty #208 Not tainted
--------------------------------------------
dhcpcd/323 is trying to acquire lock:
ffff000066dd4268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90

but task is already holding lock:
ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&dsa_master_addr_list_lock_key/1);
  lock(&dsa_master_addr_list_lock_key/1);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by dhcpcd/323:
 #0: ffffdbd1381dda18 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30
 #1: ffff00006614b268 (_xmit_ETHER){+...}-{2:2}, at: dev_set_rx_mode+0x28/0x48
 #2: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90

stack backtrace:
Call trace:
 dump_backtrace+0x0/0x1e0
 show_stack+0x20/0x30
 dump_stack+0xec/0x158
 __lock_acquire+0xca0/0x2398
 lock_acquire+0xe8/0x440
 _raw_spin_lock_nested+0x64/0x90
 dev_mc_sync+0x44/0x90
 dsa_slave_set_rx_mode+0x34/0x50
 __dev_set_rx_mode+0x60/0xa0
 dev_mc_sync+0x84/0x90
 dsa_slave_set_rx_mode+0x34/0x50
 __dev_set_rx_mode+0x60/0xa0
 dev_set_rx_mode+0x30/0x48
 __dev_open+0x10c/0x180
 __dev_change_flags+0x170/0x1c8
 dev_change_flags+0x2c/0x70
 devinet_ioctl+0x774/0x878
 inet_ioctl+0x348/0x3b0
 sock_do_ioctl+0x50/0x310
 sock_ioctl+0x1f8/0x580
 ksys_ioctl+0xb0/0xf0
 __arm64_sys_ioctl+0x28/0x38
 el0_svc_common.constprop.0+0x7c/0x180
 do_el0_svc+0x2c/0x98
 el0_sync_handler+0x9c/0x1b8
 el0_sync+0x158/0x180

Since DSA never made use of the netdev API for describing links between
upper devices and lower devices, the dev->lower_level value of a DSA
switch interface would be 1, which would warn when it is a DSA master.

We can use netdev_upper_dev_link() to describe the relationship between
a DSA slave and a DSA master. To be precise, a DSA "slave" (switch port)
is an "upper" to a DSA "master" (host port). The relationship is "many
uppers to one lower", like in the case of VLAN. So, for that reason, we
use the same function as VLAN uses.

There might be a chance that somebody will try to take hold of this
interface and use it immediately after register_netdev() and before
netdev_upper_dev_link(). To avoid that, we do the registration and
linkage while holding the RTNL, and we use the RTNL-locked cousin of
register_netdev(), which is register_netdevice().

Since this warning was not there when lockdep was using dynamic keys for
addr_list_lock, we are blaming the lockdep patch itself. The network
stack _has_ been using static lockdep keys before, and it _is_ likely
that stacked DSA setups have been triggering these lockdep warnings
since forever, however I can't test very old kernels on this particular
stacked DSA setup, to ensure I'm not in fact introducing regressions.

Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9af1a2d0cec4..16e5f98d4882 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1799,15 +1799,27 @@ int dsa_slave_create(struct dsa_port *port)
 
 	dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
 
-	ret = register_netdev(slave_dev);
+	rtnl_lock();
+
+	ret = register_netdevice(slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
+		rtnl_unlock();
 		goto out_phy;
 	}
 
+	ret = netdev_upper_dev_link(master, slave_dev, NULL);
+
+	rtnl_unlock();
+
+	if (ret)
+		goto out_unregister;
+
 	return 0;
 
+out_unregister:
+	unregister_netdev(slave_dev);
 out_phy:
 	rtnl_lock();
 	phylink_disconnect_phy(p->dp->pl);
@@ -1824,16 +1836,18 @@ out_free:
 
 void dsa_slave_destroy(struct net_device *slave_dev)
 {
+	struct net_device *master = dsa_slave_to_master(slave_dev);
 	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
 	struct dsa_slave_priv *p = netdev_priv(slave_dev);
 
 	netif_carrier_off(slave_dev);
 	rtnl_lock();
+	netdev_upper_dev_unlink(master, slave_dev);
+	unregister_netdevice(slave_dev);
 	phylink_disconnect_phy(dp->pl);
 	rtnl_unlock();
 
 	dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
-	unregister_netdev(slave_dev);
 	phylink_destroy(dp->pl);
 	gro_cells_destroy(&p->gcells);
 	free_percpu(p->stats64);
-- 
cgit 


From 843d926b003ea692468c8cc5bea1f9f58dfa8c75 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Sep 2020 01:20:23 -0700
Subject: ipv6: avoid lockdep issue in fib6_del()

syzbot reported twice a lockdep issue in fib6_del() [1]
which I think is caused by net->ipv6.fib6_null_entry
having a NULL fib6_table pointer.

fib6_del() already checks for fib6_null_entry special
case, we only need to return earlier.

Bug seems to occur very rarely, I have thus chosen
a 'bug origin' that makes backports not too complex.

[1]
WARNING: suspicious RCU usage
5.9.0-rc4-syzkaller #0 Not tainted
-----------------------------
net/ipv6/ip6_fib.c:1996 suspicious rcu_dereference_protected() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
4 locks held by syz-executor.5/8095:
 #0: ffffffff8a7ea708 (rtnl_mutex){+.+.}-{3:3}, at: ppp_release+0x178/0x240 drivers/net/ppp/ppp_generic.c:401
 #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: spin_trylock_bh include/linux/spinlock.h:414 [inline]
 #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: fib6_run_gc+0x21b/0x2d0 net/ipv6/ip6_fib.c:2312
 #2: ffffffff89bd6a40 (rcu_read_lock){....}-{1:2}, at: __fib6_clean_all+0x0/0x290 net/ipv6/ip6_fib.c:2613
 #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:359 [inline]
 #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: __fib6_clean_all+0x107/0x290 net/ipv6/ip6_fib.c:2245

stack backtrace:
CPU: 1 PID: 8095 Comm: syz-executor.5 Not tainted 5.9.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 fib6_del+0x12b4/0x1630 net/ipv6/ip6_fib.c:1996
 fib6_clean_node+0x39b/0x570 net/ipv6/ip6_fib.c:2180
 fib6_walk_continue+0x4aa/0x8e0 net/ipv6/ip6_fib.c:2102
 fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2150
 fib6_clean_tree+0xdb/0x120 net/ipv6/ip6_fib.c:2230
 __fib6_clean_all+0x120/0x290 net/ipv6/ip6_fib.c:2246
 fib6_clean_all net/ipv6/ip6_fib.c:2257 [inline]
 fib6_run_gc+0x113/0x2d0 net/ipv6/ip6_fib.c:2320
 ndisc_netdev_event+0x217/0x350 net/ipv6/ndisc.c:1805
 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83
 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2033
 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline]
 call_netdevice_notifiers net/core/dev.c:2059 [inline]
 dev_close_many+0x30b/0x650 net/core/dev.c:1634
 rollback_registered_many+0x3a8/0x1210 net/core/dev.c:9261
 rollback_registered net/core/dev.c:9329 [inline]
 unregister_netdevice_queue+0x2dd/0x570 net/core/dev.c:10410
 unregister_netdevice include/linux/netdevice.h:2774 [inline]
 ppp_release+0x216/0x240 drivers/net/ppp/ppp_generic.c:403
 __fput+0x285/0x920 fs/file_table.c:281
 task_work_run+0xdd/0x190 kernel/task_work.c:141
 tracehook_notify_resume include/linux/tracehook.h:188 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:163 [inline]
 exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190
 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 421842edeaf6 ("net/ipv6: Add fib6_null_entry")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Ahern <dsahern@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 25a90f3f705c..4a664ad4f4d4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1993,14 +1993,19 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 /* Need to own table->tb6_lock */
 int fib6_del(struct fib6_info *rt, struct nl_info *info)
 {
-	struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
-				    lockdep_is_held(&rt->fib6_table->tb6_lock));
-	struct fib6_table *table = rt->fib6_table;
 	struct net *net = info->nl_net;
 	struct fib6_info __rcu **rtp;
 	struct fib6_info __rcu **rtp_next;
+	struct fib6_table *table;
+	struct fib6_node *fn;
+
+	if (rt == net->ipv6.fib6_null_entry)
+		return -ENOENT;
 
-	if (!fn || rt == net->ipv6.fib6_null_entry)
+	table = rt->fib6_table;
+	fn = rcu_dereference_protected(rt->fib6_node,
+				       lockdep_is_held(&table->tb6_lock));
+	if (!fn)
 		return -ENOENT;
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
-- 
cgit 


From 923f614cdba2842b6f7eaf0713cba1c5668c1b5f Mon Sep 17 00:00:00 2001
From: Brian Vazquez <brianvv@google.com>
Date: Tue, 8 Sep 2020 09:18:12 -0700
Subject: fib: fix fib_rule_ops indirect call wrappers when CONFIG_IPV6=m

If CONFIG_IPV6=m, the IPV6 functions won't be found by the linker:

ld: net/core/fib_rules.o: in function `fib_rules_lookup':
fib_rules.c:(.text+0x606): undefined reference to `fib6_rule_match'
ld: fib_rules.c:(.text+0x611): undefined reference to `fib6_rule_match'
ld: fib_rules.c:(.text+0x68c): undefined reference to `fib6_rule_action'
ld: fib_rules.c:(.text+0x693): undefined reference to `fib6_rule_action'
ld: fib_rules.c:(.text+0x6aa): undefined reference to `fib6_rule_suppress'
ld: fib_rules.c:(.text+0x6bc): undefined reference to `fib6_rule_suppress'
make: *** [Makefile:1166: vmlinux] Error 1

Reported-by: Sven Joachim <svenjoac@gmx.de>
Fixes: b9aaec8f0be5 ("fib: use indirect call wrappers in the most common fib_rules_ops")
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Brian Vazquez <brianvv@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 51678a528f85..7bcfb16854cb 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -16,7 +16,7 @@
 #include <net/ip_tunnels.h>
 #include <linux/indirect_call_wrapper.h>
 
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES)
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 #define INDIRECT_CALL_MT(f, f2, f1, ...) \
 	INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__)
-- 
cgit 


From ba9e04a7ddf4f22a10e05bf9403db6b97743c7bf Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 8 Sep 2020 14:09:34 -0700
Subject: ip: fix tos reflection in ack and reset packets

Currently, in tcp_v4_reqsk_send_ack() and tcp_v4_send_reset(), we
echo the TOS value of the received packets in the response.
However, we do not want to echo the lower 2 ECN bits in accordance
with RFC 3168 6.1.5 robustness principles.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 61f802d5350c..e6f2ada9e7d5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -74,6 +74,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/inet_ecn.h>
 #include <net/lwtunnel.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/igmp.h>
@@ -1703,7 +1704,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	if (IS_ERR(rt))
 		return;
 
-	inet_sk(sk)->tos = arg->tos;
+	inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
 
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
-- 
cgit 


From 3ca1a42a52ca4b4f02061683851692ad65fefac8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Sep 2020 01:27:39 -0700
Subject: net: qrtr: check skb_put_padto() return value

If skb_put_padto() returns an error, skb has been freed.
Better not touch it anymore, as reported by syzbot [1]

Note to qrtr maintainers : this suggests qrtr_sendmsg()
should adjust sock_alloc_send_skb() second parameter
to account for the potential added alignment to avoid
reallocation.

[1]

BUG: KASAN: use-after-free in __skb_insert include/linux/skbuff.h:1907 [inline]
BUG: KASAN: use-after-free in __skb_queue_before include/linux/skbuff.h:2016 [inline]
BUG: KASAN: use-after-free in __skb_queue_tail include/linux/skbuff.h:2049 [inline]
BUG: KASAN: use-after-free in skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146
Write of size 8 at addr ffff88804d8ab3c0 by task syz-executor.4/4316

CPU: 1 PID: 4316 Comm: syz-executor.4 Not tainted 5.9.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1d6/0x29e lib/dump_stack.c:118
 print_address_description+0x66/0x620 mm/kasan/report.c:383
 __kasan_report mm/kasan/report.c:513 [inline]
 kasan_report+0x132/0x1d0 mm/kasan/report.c:530
 __skb_insert include/linux/skbuff.h:1907 [inline]
 __skb_queue_before include/linux/skbuff.h:2016 [inline]
 __skb_queue_tail include/linux/skbuff.h:2049 [inline]
 skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146
 qrtr_tun_send+0x1a/0x40 net/qrtr/tun.c:23
 qrtr_node_enqueue+0x44f/0xc00 net/qrtr/qrtr.c:364
 qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861
 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg net/socket.c:671 [inline]
 sock_write_iter+0x317/0x470 net/socket.c:998
 call_write_iter include/linux/fs.h:1882 [inline]
 new_sync_write fs/read_write.c:503 [inline]
 vfs_write+0xa96/0xd10 fs/read_write.c:578
 ksys_write+0x11b/0x220 fs/read_write.c:631
 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45d5b9
Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f84b5b81c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000038b40 RCX: 000000000045d5b9
RDX: 0000000000000055 RSI: 0000000020001240 RDI: 0000000000000003
RBP: 00007f84b5b81ca0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000f
R13: 00007ffcbbf86daf R14: 00007f84b5b829c0 R15: 000000000118cf4c

Allocated by task 4316:
 kasan_save_stack mm/kasan/common.c:48 [inline]
 kasan_set_track mm/kasan/common.c:56 [inline]
 __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461
 slab_post_alloc_hook+0x3e/0x290 mm/slab.h:518
 slab_alloc mm/slab.c:3312 [inline]
 kmem_cache_alloc+0x1c1/0x2d0 mm/slab.c:3482
 skb_clone+0x1b2/0x370 net/core/skbuff.c:1449
 qrtr_bcast_enqueue+0x6d/0x140 net/qrtr/qrtr.c:857
 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg net/socket.c:671 [inline]
 sock_write_iter+0x317/0x470 net/socket.c:998
 call_write_iter include/linux/fs.h:1882 [inline]
 new_sync_write fs/read_write.c:503 [inline]
 vfs_write+0xa96/0xd10 fs/read_write.c:578
 ksys_write+0x11b/0x220 fs/read_write.c:631
 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Freed by task 4316:
 kasan_save_stack mm/kasan/common.c:48 [inline]
 kasan_set_track+0x3d/0x70 mm/kasan/common.c:56
 kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355
 __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422
 __cache_free mm/slab.c:3418 [inline]
 kmem_cache_free+0x82/0xf0 mm/slab.c:3693
 __skb_pad+0x3f5/0x5a0 net/core/skbuff.c:1823
 __skb_put_padto include/linux/skbuff.h:3233 [inline]
 skb_put_padto include/linux/skbuff.h:3252 [inline]
 qrtr_node_enqueue+0x62f/0xc00 net/qrtr/qrtr.c:360
 qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861
 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg net/socket.c:671 [inline]
 sock_write_iter+0x317/0x470 net/socket.c:998
 call_write_iter include/linux/fs.h:1882 [inline]
 new_sync_write fs/read_write.c:503 [inline]
 vfs_write+0xa96/0xd10 fs/read_write.c:578
 ksys_write+0x11b/0x220 fs/read_write.c:631
 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

The buggy address belongs to the object at ffff88804d8ab3c0
 which belongs to the cache skbuff_head_cache of size 224
The buggy address is located 0 bytes inside of
 224-byte region [ffff88804d8ab3c0, ffff88804d8ab4a0)
The buggy address belongs to the page:
page:00000000ea8cccfb refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88804d8abb40 pfn:0x4d8ab
flags: 0xfffe0000000200(slab)
raw: 00fffe0000000200 ffffea0002237ec8 ffffea00029b3388 ffff88821bb66800
raw: ffff88804d8abb40 ffff88804d8ab000 000000010000000b 0000000000000000
page dumped because: kasan: bad access detected

Fixes: ce57785bf91b ("net: qrtr: fix len of skb_put_padto in qrtr_node_enqueue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Carl Huang <cjhuang@codeaurora.org>
Cc: Wen Gong <wgong@codeaurora.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 90c558f89d46..957aa9263ba4 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -332,8 +332,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
 {
 	struct qrtr_hdr_v1 *hdr;
 	size_t len = skb->len;
-	int rc = -ENODEV;
-	int confirm_rx;
+	int rc, confirm_rx;
 
 	confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type);
 	if (confirm_rx < 0) {
@@ -357,15 +356,17 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
 	hdr->size = cpu_to_le32(len);
 	hdr->confirm_rx = !!confirm_rx;
 
-	skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
-
-	mutex_lock(&node->ep_lock);
-	if (node->ep)
-		rc = node->ep->xmit(node->ep, skb);
-	else
-		kfree_skb(skb);
-	mutex_unlock(&node->ep_lock);
+	rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
 
+	if (!rc) {
+		mutex_lock(&node->ep_lock);
+		rc = -ENODEV;
+		if (node->ep)
+			rc = node->ep->xmit(node->ep, skb);
+		else
+			kfree_skb(skb);
+		mutex_unlock(&node->ep_lock);
+	}
 	/* Need to ensure that a subsequent message carries the otherwise lost
 	 * confirm_rx flag if we dropped this one */
 	if (rc && confirm_rx)
-- 
cgit 


From b87f9fe1ac9441b75656dfd95eba70ef9f0375e0 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Wed, 9 Sep 2020 17:38:21 +0800
Subject: hsr: avoid newline at end of message in NL_SET_ERR_MSG_MOD

clean follow coccicheck warning:
net//hsr/hsr_netlink.c:94:8-42: WARNING avoid newline at end of message
in NL_SET_ERR_MSG_MOD
net//hsr/hsr_netlink.c:87:30-57: WARNING avoid newline at end of message
in NL_SET_ERR_MSG_MOD
net//hsr/hsr_netlink.c:79:29-53: WARNING avoid newline at end of message
in NL_SET_ERR_MSG_MOD

Signed-off-by: Ye Bin <yebin10@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_netlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 06c3cd988760..0e4681cf71db 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -76,7 +76,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
 		proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]);
 
 	if (proto >= HSR_PROTOCOL_MAX) {
-		NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol\n");
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol");
 		return -EINVAL;
 	}
 
@@ -84,14 +84,14 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
 		proto_version = HSR_V0;
 	} else {
 		if (proto == HSR_PROTOCOL_PRP) {
-			NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported\n");
+			NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported");
 			return -EINVAL;
 		}
 
 		proto_version = nla_get_u8(data[IFLA_HSR_VERSION]);
 		if (proto_version > HSR_V1) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Only HSR version 0/1 supported\n");
+					   "Only HSR version 0/1 supported");
 			return -EINVAL;
 		}
 	}
-- 
cgit 


From a4b5cc9e10803ecba64a7d54c0f47e4564b4a980 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 5 Sep 2020 15:14:47 +0900
Subject: tipc: fix shutdown() of connection oriented socket

I confirmed that the problem fixed by commit 2a63866c8b51a3f7 ("tipc: fix
shutdown() of connectionless socket") also applies to stream socket.

----------
#include <sys/socket.h>
#include <unistd.h>
#include <sys/wait.h>

int main(int argc, char *argv[])
{
        int fds[2] = { -1, -1 };
        socketpair(PF_TIPC, SOCK_STREAM /* or SOCK_DGRAM */, 0, fds);
        if (fork() == 0)
                _exit(read(fds[0], NULL, 1));
        shutdown(fds[0], SHUT_RDWR); /* This must make read() return. */
        wait(NULL); /* To be woken up by _exit(). */
        return 0;
}
----------

Since shutdown(SHUT_RDWR) should affect all processes sharing that socket,
unconditionally setting sk->sk_shutdown to SHUTDOWN_MASK will be the right
behavior.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ebd280e767bd..11b27ddc75ba 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2771,10 +2771,7 @@ static int tipc_shutdown(struct socket *sock, int how)
 
 	trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " ");
 	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
-	if (tipc_sk_type_connectionless(sk))
-		sk->sk_shutdown = SHUTDOWN_MASK;
-	else
-		sk->sk_shutdown = SEND_SHUTDOWN;
+	sk->sk_shutdown = SHUTDOWN_MASK;
 
 	if (sk->sk_state == TIPC_DISCONNECTING) {
 		/* Discard any unreceived messages */
-- 
cgit 


From 57025817eaa42c0b6e2a907f28a125c74e3b2fc6 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 8 Sep 2020 10:49:38 +0800
Subject: mptcp: fix subflow's local_id issues

In mptcp_pm_nl_get_local_id, skc_local is the same as msk_local, so it
always return 0. Thus every subflow's local_id is 0. It's incorrect.

This patch fixed this issue.

Also, we need to ignore the zero address here, like 0.0.0.0 in IPv4. When
we use the zero address as a local address, it means that we can use any
one of the local addresses. The zero address is not a new address, we don't
need to add it to PM, so this patch added a new function address_zero to
check whether an address is the zero address, if it is, we ignore this
address.

Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM")
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index c8820c4156e6..6b41d1d939a0 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -66,6 +66,16 @@ static bool addresses_equal(const struct mptcp_addr_info *a,
 	return a->port == b->port;
 }
 
+static bool address_zero(const struct mptcp_addr_info *addr)
+{
+	struct mptcp_addr_info zero;
+
+	memset(&zero, 0, sizeof(zero));
+	zero.family = addr->family;
+
+	return addresses_equal(addr, &zero, false);
+}
+
 static void local_address(const struct sock_common *skc,
 			  struct mptcp_addr_info *addr)
 {
@@ -323,10 +333,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 	 * addr
 	 */
 	local_address((struct sock_common *)msk, &msk_local);
-	local_address((struct sock_common *)msk, &skc_local);
+	local_address((struct sock_common *)skc, &skc_local);
 	if (addresses_equal(&msk_local, &skc_local, false))
 		return 0;
 
+	if (address_zero(&skc_local))
+		return 0;
+
 	pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
 
 	rcu_read_lock();
-- 
cgit 


From 2ff0e566faa4e92cba8138c5b396d6ba96a215f1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 8 Sep 2020 10:49:39 +0800
Subject: mptcp: fix subflow's remote_id issues

This patch set the init remote_id to zero, otherwise it will be a random
number.

Then it added the missing subflow's remote_id setting code both in
__mptcp_subflow_connect and in subflow_ulp_clone.

Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM")
Fixes: ec3edaa7ca6ce ("mptcp: Add handling of outgoing MP_JOIN requests")
Fixes: f296234c98a8f ("mptcp: Add handling of incoming MP_JOIN requests")
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 2 +-
 net/mptcp/subflow.c    | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 6b41d1d939a0..5ea121d0222e 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -181,9 +181,9 @@ static void check_work_pending(struct mptcp_sock *msk)
 
 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 {
+	struct mptcp_addr_info remote = { 0 };
 	struct sock *sk = (struct sock *)msk;
 	struct mptcp_pm_addr_entry *local;
-	struct mptcp_addr_info remote;
 	struct pm_nl_pernet *pernet;
 
 	pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e8cac2655c82..9ead43f79023 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1063,6 +1063,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_subflow_context *subflow;
 	struct sockaddr_storage addr;
+	int remote_id = remote->id;
 	int local_id = loc->id;
 	struct socket *sf;
 	struct sock *ssk;
@@ -1107,10 +1108,11 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
 		goto failed;
 
 	mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
-	pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token,
-		 local_id);
+	pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
+		 remote_token, local_id, remote_id);
 	subflow->remote_token = remote_token;
 	subflow->local_id = local_id;
+	subflow->remote_id = remote_id;
 	subflow->request_join = 1;
 	subflow->request_bkup = 1;
 	mptcp_info2sockaddr(remote, &addr);
@@ -1347,6 +1349,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
 		new_ctx->fully_established = 1;
 		new_ctx->backup = subflow_req->backup;
 		new_ctx->local_id = subflow_req->local_id;
+		new_ctx->remote_id = subflow_req->remote_id;
 		new_ctx->token = subflow_req->token;
 		new_ctx->thmac = subflow_req->thmac;
 	}
-- 
cgit 


From f612eb76f349919128d5f5cc4e8cc4251a16bf30 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Wed, 9 Sep 2020 11:01:24 +0800
Subject: mptcp: fix kmalloc flag in mptcp_pm_nl_get_local_id

mptcp_pm_nl_get_local_id may be called in interrupt context, so we need to
use GFP_ATOMIC flag to allocate memory to avoid sleeping in atomic context.

[  280.209809] BUG: sleeping function called from invalid context at mm/slab.h:498
[  280.209812] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1680, name: kworker/1:3
[  280.209814] INFO: lockdep is turned off.
[  280.209816] CPU: 1 PID: 1680 Comm: kworker/1:3 Tainted: G        W         5.9.0-rc3-mptcp+ #146
[  280.209818] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  280.209820] Workqueue: events mptcp_worker
[  280.209822] Call Trace:
[  280.209824]  <IRQ>
[  280.209826]  dump_stack+0x77/0xa0
[  280.209829]  ___might_sleep.cold+0xa6/0xb6
[  280.209832]  kmem_cache_alloc_trace+0x1d1/0x290
[  280.209835]  mptcp_pm_nl_get_local_id+0x23c/0x410
[  280.209840]  subflow_init_req+0x1e9/0x2ea
[  280.209843]  ? inet_reqsk_alloc+0x1c/0x120
[  280.209845]  ? kmem_cache_alloc+0x264/0x290
[  280.209849]  tcp_conn_request+0x303/0xae0
[  280.209854]  ? printk+0x53/0x6a
[  280.209857]  ? tcp_rcv_state_process+0x28f/0x1374
[  280.209859]  tcp_rcv_state_process+0x28f/0x1374
[  280.209864]  ? tcp_v4_do_rcv+0xb3/0x1f0
[  280.209866]  tcp_v4_do_rcv+0xb3/0x1f0
[  280.209869]  tcp_v4_rcv+0xed6/0xfa0
[  280.209873]  ip_protocol_deliver_rcu+0x28/0x270
[  280.209875]  ip_local_deliver_finish+0x89/0x120
[  280.209877]  ip_local_deliver+0x180/0x220
[  280.209881]  ip_rcv+0x166/0x210
[  280.209885]  __netif_receive_skb_one_core+0x82/0x90
[  280.209888]  process_backlog+0xd6/0x230
[  280.209891]  net_rx_action+0x13a/0x410
[  280.209895]  __do_softirq+0xcf/0x468
[  280.209899]  asm_call_on_stack+0x12/0x20
[  280.209901]  </IRQ>
[  280.209903]  ? ip_finish_output2+0x240/0x9a0
[  280.209906]  do_softirq_own_stack+0x4d/0x60
[  280.209908]  do_softirq.part.0+0x2b/0x60
[  280.209911]  __local_bh_enable_ip+0x9a/0xa0
[  280.209913]  ip_finish_output2+0x264/0x9a0
[  280.209916]  ? rcu_read_lock_held+0x4d/0x60
[  280.209920]  ? ip_output+0x7a/0x250
[  280.209922]  ip_output+0x7a/0x250
[  280.209925]  ? __ip_finish_output+0x330/0x330
[  280.209928]  __ip_queue_xmit+0x1dc/0x5a0
[  280.209931]  __tcp_transmit_skb+0xa0f/0xc70
[  280.209937]  tcp_connect+0xb03/0xff0
[  280.209939]  ? lockdep_hardirqs_on_prepare+0xe7/0x190
[  280.209942]  ? ktime_get_with_offset+0x125/0x150
[  280.209944]  ? trace_hardirqs_on+0x1c/0xe0
[  280.209948]  tcp_v4_connect+0x449/0x550
[  280.209953]  __inet_stream_connect+0xbb/0x320
[  280.209955]  ? mark_held_locks+0x49/0x70
[  280.209958]  ? lockdep_hardirqs_on_prepare+0xe7/0x190
[  280.209960]  ? __local_bh_enable_ip+0x6b/0xa0
[  280.209963]  inet_stream_connect+0x32/0x50
[  280.209966]  __mptcp_subflow_connect+0x1fd/0x242
[  280.209972]  mptcp_pm_create_subflow_or_signal_addr+0x2db/0x600
[  280.209975]  mptcp_worker+0x543/0x7a0
[  280.209980]  process_one_work+0x26d/0x5b0
[  280.209984]  ? process_one_work+0x5b0/0x5b0
[  280.209987]  worker_thread+0x48/0x3d0
[  280.209990]  ? process_one_work+0x5b0/0x5b0
[  280.209993]  kthread+0x117/0x150
[  280.209996]  ? kthread_park+0x80/0x80
[  280.209998]  ret_from_fork+0x22/0x30

Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM")
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5ea121d0222e..770da3627848 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -354,7 +354,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 		return ret;
 
 	/* address not found, add to local list */
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry)
 		return -ENOMEM;
 
-- 
cgit 


From 2fb541c862c987d02dfdf28f1545016deecfa0d5 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 8 Sep 2020 19:02:34 +0800
Subject: net: sch_generic: aviod concurrent reset and enqueue op for lockless
 qdisc

Currently there is concurrent reset and enqueue operation for the
same lockless qdisc when there is no lock to synchronize the
q->enqueue() in __dev_xmit_skb() with the qdisc reset operation in
qdisc_deactivate() called by dev_deactivate_queue(), which may cause
out-of-bounds access for priv->ring[] in hns3 driver if user has
requested a smaller queue num when __dev_xmit_skb() still enqueue a
skb with a larger queue_mapping after the corresponding qdisc is
reset, and call hns3_nic_net_xmit() with that skb later.

Reused the existing synchronize_net() in dev_deactivate_many() to
make sure skb with larger queue_mapping enqueued to old qdisc(which
is saved in dev_queue->qdisc_sleeping) will always be reset when
dev_reset_queue() is called.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 48 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 265a61d011df..54c417244642 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1131,24 +1131,10 @@ EXPORT_SYMBOL(dev_activate);
 
 static void qdisc_deactivate(struct Qdisc *qdisc)
 {
-	bool nolock = qdisc->flags & TCQ_F_NOLOCK;
-
 	if (qdisc->flags & TCQ_F_BUILTIN)
 		return;
-	if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
-		return;
-
-	if (nolock)
-		spin_lock_bh(&qdisc->seqlock);
-	spin_lock_bh(qdisc_lock(qdisc));
 
 	set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
-
-	qdisc_reset(qdisc);
-
-	spin_unlock_bh(qdisc_lock(qdisc));
-	if (nolock)
-		spin_unlock_bh(&qdisc->seqlock);
 }
 
 static void dev_deactivate_queue(struct net_device *dev,
@@ -1165,6 +1151,30 @@ static void dev_deactivate_queue(struct net_device *dev,
 	}
 }
 
+static void dev_reset_queue(struct net_device *dev,
+			    struct netdev_queue *dev_queue,
+			    void *_unused)
+{
+	struct Qdisc *qdisc;
+	bool nolock;
+
+	qdisc = dev_queue->qdisc_sleeping;
+	if (!qdisc)
+		return;
+
+	nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+	if (nolock)
+		spin_lock_bh(&qdisc->seqlock);
+	spin_lock_bh(qdisc_lock(qdisc));
+
+	qdisc_reset(qdisc);
+
+	spin_unlock_bh(qdisc_lock(qdisc));
+	if (nolock)
+		spin_unlock_bh(&qdisc->seqlock);
+}
+
 static bool some_qdisc_is_busy(struct net_device *dev)
 {
 	unsigned int i;
@@ -1213,12 +1223,20 @@ void dev_deactivate_many(struct list_head *head)
 		dev_watchdog_down(dev);
 	}
 
-	/* Wait for outstanding qdisc-less dev_queue_xmit calls.
+	/* Wait for outstanding qdisc-less dev_queue_xmit calls or
+	 * outstanding qdisc enqueuing calls.
 	 * This is avoided if all devices are in dismantle phase :
 	 * Caller will call synchronize_net() for us
 	 */
 	synchronize_net();
 
+	list_for_each_entry(dev, head, close_list) {
+		netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
+
+		if (dev_ingress_queue(dev))
+			dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
+	}
+
 	/* Wait for outstanding qdisc_run calls. */
 	list_for_each_entry(dev, head, close_list) {
 		while (some_qdisc_is_busy(dev)) {
-- 
cgit 


From 1be107de2ee4b3f0808e2071529364cf4d9a67b9 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Thu, 10 Sep 2020 04:41:53 -0400
Subject: net: Correct the comment of dst_dev_put()

Since commit 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to
invalidate dst entries"), we use blackhole_netdev to invalidate dst entries
instead of loopback device anymore.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index d6b6ced0d451..0c01bd8d9d81 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -144,7 +144,7 @@ static void dst_destroy_rcu(struct rcu_head *head)
 
 /* Operations to mark dst as DEAD and clean up the net device referenced
  * by dst:
- * 1. put the dst under loopback interface and discard all tx/rx packets
+ * 1. put the dst under blackhole interface and discard all tx/rx packets
  *    on this route.
  * 2. release the net_device
  * This function should be called when removing routes from the fib tree
-- 
cgit 


From e1b9efe6baebe79019a2183176686a0e709388ae Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 10 Sep 2020 14:01:26 +0300
Subject: net: Fix bridge enslavement failure

When a netdev is enslaved to a bridge, its parent identifier is queried.
This is done so that packets that were already forwarded in hardware
will not be forwarded again by the bridge device between netdevs
belonging to the same hardware instance.

The operation fails when the netdev is an upper of netdevs with
different parent identifiers.

Instead of failing the enslavement, have dev_get_port_parent_id() return
'-EOPNOTSUPP' which will signal the bridge to skip the query operation.
Other callers of the function are not affected by this change.

Fixes: 7e1146e8c10c ("net: devlink: introduce devlink_compat_switch_id_get() helper")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reported-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4086d335978c..266073e300b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8647,7 +8647,7 @@ int dev_get_port_parent_id(struct net_device *dev,
 		if (!first.id_len)
 			first = *ppid;
 		else if (memcmp(&first, ppid, sizeof(*ppid)))
-			return -ENODATA;
+			return -EOPNOTSUPP;
 	}
 
 	return err;
-- 
cgit 


From 297e77e53eadb332d5062913447b104a772dc33b Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 10 Sep 2020 14:09:05 +0200
Subject: net: DCB: Validate DCB_ATTR_DCB_BUFFER argument

The parameter passed via DCB_ATTR_DCB_BUFFER is a struct dcbnl_buffer. The
field prio2buffer is an array of IEEE_8021Q_MAX_PRIORITIES bytes, where
each value is a number of a buffer to direct that priority's traffic to.
That value is however never validated to lie within the bounds set by
DCBX_MAX_BUFFERS. The only driver that currently implements the callback is
mlx5 (maintainers CCd), and that does not do any validation either, in
particual allowing incorrect configuration if the prio2buffer value does
not fit into 4 bits.

Instead of offloading the need to validate the buffer index to drivers, do
it right there in core, and bounce the request if the value is too large.

CC: Parav Pandit <parav@nvidia.com>
CC: Saeed Mahameed <saeedm@nvidia.com>
Fixes: e549f6f9c098 ("net/dcb: Add dcbnl buffer attribute")
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 84dde5a2066e..16014ad19406 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 {
 	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
 	struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
+	int prio;
 	int err;
 
 	if (!ops)
@@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 		struct dcbnl_buffer *buffer =
 			nla_data(ieee[DCB_ATTR_DCB_BUFFER]);
 
+		for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) {
+			if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) {
+				err = -EINVAL;
+				goto err;
+			}
+		}
+
 		err = ops->dcbnl_setbuffer(netdev, buffer);
 		if (err)
 			goto err;
-- 
cgit 


From b5b73b26b3ca34574124ed7ae9c5ba8391a7f176 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Wed, 9 Sep 2020 17:03:11 -0700
Subject: taprio: Fix allowing too small intervals

It's possible that the user specifies an interval that couldn't allow
any packet to be transmitted. This also avoids the issue of the
hrtimer handler starving the other threads because it's running too
often.

The solution is to reject interval sizes that according to the current
link speed wouldn't allow any packet to be transmitted.

Reported-by: syzbot+8267241609ae8c23b248@syzkaller.appspotmail.com
Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index fe53c1e38c7d..b0ad7687ee2c 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -777,9 +777,11 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
 	[TCA_TAPRIO_ATTR_TXTIME_DELAY]		     = { .type = NLA_U32 },
 };
 
-static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
+static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
+			    struct sched_entry *entry,
 			    struct netlink_ext_ack *extack)
 {
+	int min_duration = length_to_duration(q, ETH_ZLEN);
 	u32 interval = 0;
 
 	if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
@@ -794,7 +796,10 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
 		interval = nla_get_u32(
 			tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
 
-	if (interval == 0) {
+	/* The interval should allow at least the minimum ethernet
+	 * frame to go out.
+	 */
+	if (interval < min_duration) {
 		NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
 		return -EINVAL;
 	}
@@ -804,8 +809,9 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
 	return 0;
 }
 
-static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
-			     int index, struct netlink_ext_ack *extack)
+static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
+			     struct sched_entry *entry, int index,
+			     struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
 	int err;
@@ -819,10 +825,10 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
 
 	entry->index = index;
 
-	return fill_sched_entry(tb, entry, extack);
+	return fill_sched_entry(q, tb, entry, extack);
 }
 
-static int parse_sched_list(struct nlattr *list,
+static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
 			    struct sched_gate_list *sched,
 			    struct netlink_ext_ack *extack)
 {
@@ -847,7 +853,7 @@ static int parse_sched_list(struct nlattr *list,
 			return -ENOMEM;
 		}
 
-		err = parse_sched_entry(n, entry, i, extack);
+		err = parse_sched_entry(q, n, entry, i, extack);
 		if (err < 0) {
 			kfree(entry);
 			return err;
@@ -862,7 +868,7 @@ static int parse_sched_list(struct nlattr *list,
 	return i;
 }
 
-static int parse_taprio_schedule(struct nlattr **tb,
+static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
 				 struct sched_gate_list *new,
 				 struct netlink_ext_ack *extack)
 {
@@ -883,8 +889,8 @@ static int parse_taprio_schedule(struct nlattr **tb,
 		new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
 
 	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
-		err = parse_sched_list(
-			tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
+		err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
+				       new, extack);
 	if (err < 0)
 		return err;
 
@@ -1473,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 		goto free_sched;
 	}
 
-	err = parse_taprio_schedule(tb, new_admin, extack);
+	err = parse_taprio_schedule(q, tb, new_admin, extack);
 	if (err < 0)
 		goto free_sched;
 
-- 
cgit 


From 4bba9dab86b6ac15ca560ef1f2b5aa4529cbf784 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 14 Sep 2020 13:58:16 +0200
Subject: batman-adv: Add missing include for in_interrupt()

The fix for receiving (internally generated) bla packets outside the
interrupt context introduced the usage of in_interrupt(). But this
functionality is only defined in linux/preempt.h which was not included
with the same patch.

Fixes: 279e89b2281a ("batman-adv: bla: use netif_rx_ni when not in interrupt context")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index d8c5d3170676..08419a2e6b95 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -25,6 +25,7 @@
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
+#include <linux/preempt.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
-- 
cgit 


From 1869e226a7b3ef75b4f70ede2f1b7229f7157fa4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 13 Sep 2020 12:43:39 -0600
Subject: ipv4: Initialize flowi4_multipath_hash in data path

flowi4_multipath_hash was added by the commit referenced below for
tunnels. Unfortunately, the patch did not initialize the new field
for several fast path lookups that do not initialize the entire flow
struct to 0. Fix those locations. Currently, flowi4_multipath_hash
is random garbage and affects the hash value computed by
fib_multipath_hash for multipath selection.

Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash")
Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: wenxu <wenxu@ucloud.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c       | 1 +
 net/ipv4/fib_frontend.c | 1 +
 net/ipv4/route.c        | 1 +
 3 files changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 1f647ab986b6..1b168371ba96 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4838,6 +4838,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	fl4.saddr = params->ipv4_src;
 	fl4.fl4_sport = params->sport;
 	fl4.fl4_dport = params->dport;
+	fl4.flowi4_multipath_hash = 0;
 
 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
 		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 41079490a118..86a23e4a6a50 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -362,6 +362,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	fl4.flowi4_tun_key.tun_id = 0;
 	fl4.flowi4_flags = 0;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
+	fl4.flowi4_multipath_hash = 0;
 
 	no_addr = idev->ifa_list == NULL;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8ca6bcab7b03..e5f210d00851 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2147,6 +2147,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
+	fl4.flowi4_multipath_hash = 0;
 
 	if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
 		flkeys = &_flkeys;
-- 
cgit 


From bb3a420d47ab00d7e1e5083286cab15235a96680 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin.cs@gmail.com>
Date: Sun, 13 Sep 2020 04:06:05 -0400
Subject: tipc: Fix memory leak in tipc_group_create_member()

tipc_group_add_to_tree() returns silently if `key` matches `nkey` of an
existing node, causing tipc_group_create_member() to leak memory. Let
tipc_group_add_to_tree() return an error in such a case, so that
tipc_group_create_member() can handle it properly.

Fixes: 75da2163dbb6 ("tipc: introduce communication groups")
Reported-and-tested-by: syzbot+f95d90c454864b3b5bc9@syzkaller.appspotmail.com
Cc: Hillf Danton <hdanton@sina.com>
Link: https://syzkaller.appspot.com/bug?id=048390604fe1b60df34150265479202f10e13aff
Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/group.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/group.c b/net/tipc/group.c
index 588c2d2b0c69..b1fcd2ad5ecf 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
 	return NULL;
 }
 
-static void tipc_group_add_to_tree(struct tipc_group *grp,
-				   struct tipc_member *m)
+static int tipc_group_add_to_tree(struct tipc_group *grp,
+				  struct tipc_member *m)
 {
 	u64 nkey, key = (u64)m->node << 32 | m->port;
 	struct rb_node **n, *parent = NULL;
@@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp,
 		else if (key > nkey)
 			n = &(*n)->rb_right;
 		else
-			return;
+			return -EEXIST;
 	}
 	rb_link_node(&m->tree_node, parent, n);
 	rb_insert_color(&m->tree_node, &grp->members);
+	return 0;
 }
 
 static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
@@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
 						    u32 instance, int state)
 {
 	struct tipc_member *m;
+	int ret;
 
 	m = kzalloc(sizeof(*m), GFP_ATOMIC);
 	if (!m)
@@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
 	m->port = port;
 	m->instance = instance;
 	m->bc_acked = grp->bc_snd_nxt - 1;
+	ret = tipc_group_add_to_tree(grp, m);
+	if (ret < 0) {
+		kfree(m);
+		return NULL;
+	}
 	grp->member_cnt++;
-	tipc_group_add_to_tree(grp, m);
 	tipc_nlist_add(&grp->dests, m->node);
 	m->state = state;
 	return m;
-- 
cgit 


From ff48b6222e65ebdba5a403ef1deba6214e749193 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 13 Sep 2020 19:37:31 +0800
Subject: tipc: use skb_unshare() instead in tipc_buf_append()

In tipc_buf_append() it may change skb's frag_list, and it causes
problems when this skb is cloned. skb_unclone() doesn't really
make this skb's flag_list available to change.

Shuang Li has reported an use-after-free issue because of this
when creating quite a few macvlan dev over the same dev, where
the broadcast packets will be cloned and go up to the stack:

 [ ] BUG: KASAN: use-after-free in pskb_expand_head+0x86d/0xea0
 [ ] Call Trace:
 [ ]  dump_stack+0x7c/0xb0
 [ ]  print_address_description.constprop.7+0x1a/0x220
 [ ]  kasan_report.cold.10+0x37/0x7c
 [ ]  check_memory_region+0x183/0x1e0
 [ ]  pskb_expand_head+0x86d/0xea0
 [ ]  process_backlog+0x1df/0x660
 [ ]  net_rx_action+0x3b4/0xc90
 [ ]
 [ ] Allocated by task 1786:
 [ ]  kmem_cache_alloc+0xbf/0x220
 [ ]  skb_clone+0x10a/0x300
 [ ]  macvlan_broadcast+0x2f6/0x590 [macvlan]
 [ ]  macvlan_process_broadcast+0x37c/0x516 [macvlan]
 [ ]  process_one_work+0x66a/0x1060
 [ ]  worker_thread+0x87/0xb10
 [ ]
 [ ] Freed by task 3253:
 [ ]  kmem_cache_free+0x82/0x2a0
 [ ]  skb_release_data+0x2c3/0x6e0
 [ ]  kfree_skb+0x78/0x1d0
 [ ]  tipc_recvmsg+0x3be/0xa40 [tipc]

So fix it by using skb_unshare() instead, which would create a new
skb for the cloned frag and it'll be safe to change its frag_list.
The similar things were also done in sctp_make_reassembled_event(),
which is using skb_copy().

Reported-by: Shuang Li <shuali@redhat.com>
Fixes: 37e22164a8a3 ("tipc: rename and move message reassembly function")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 848fae674532..52e93ba4d8e2 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 	if (fragid == FIRST_FRAGMENT) {
 		if (unlikely(head))
 			goto err;
-		if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
+		frag = skb_unshare(frag, GFP_ATOMIC);
+		if (unlikely(!frag))
 			goto err;
 		head = *headbuf = frag;
 		*buf = NULL;
-- 
cgit 


From 13e6ce98aa65ab5ce19351c020419360dfe8af29 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 13 Sep 2020 19:51:50 +0800
Subject: net: sched: only keep the available bits when setting vxlan md->gbp

As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata
on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can
be set to or parse from the packet for vxlan gbp option.

So we'd better do the mask when set it in act_tunnel_key and cls_flower.
Otherwise, when users don't know these bits, they may configure with a
value which can never be matched.

Reported-by: Shuang Li <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 1 +
 net/sched/cls_flower.c     | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 536c4bc31be6..37f1e10f35e0 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -156,6 +156,7 @@ tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len,
 		struct vxlan_metadata *md = dst;
 
 		md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]);
+		md->gbp &= VXLAN_GBP_MASK;
 	}
 
 	return sizeof(struct vxlan_metadata);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index a4f7ef1de7e7..e8fda1bd4d9d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1175,8 +1175,10 @@ static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key,
 		return -EINVAL;
 	}
 
-	if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP])
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) {
 		md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]);
+		md->gbp &= VXLAN_GBP_MASK;
+	}
 
 	return sizeof(*md);
 }
-- 
cgit 


From 681d2cfb790339a3e95b98bc140baf1f816a896a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 13 Sep 2020 19:51:51 +0800
Subject: lwtunnel: only keep the available bits when setting vxlan md->gbp

As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata
on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can
be set to or parse from the packet for vxlan gbp option.

So do the mask when set it in lwtunnel, as it does in act_tunnel_key and
cls_flower.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel_core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 75c6013ff9a4..b2ea1a8c5fd6 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -554,6 +554,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
 
 		attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
 		md->gbp = nla_get_u32(attr);
+		md->gbp &= VXLAN_GBP_MASK;
 		info->key.tun_flags |= TUNNEL_VXLAN_OPT;
 	}
 
-- 
cgit 


From 8e1b3ac4786680c2d2b5a24e38a2d714c3bcd1ef Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 13 Sep 2020 19:43:03 +0800
Subject: net: sched: initialize with 0 before setting erspan md->u

In fl_set_erspan_opt(), all bits of erspan md was set 1, as this
function is also used to set opt MASK. However, when setting for
md->u.index for opt VALUE, the rest bits of the union md->u will
be left 1. It would cause to fail the match of the whole md when
version is 1 and only index is set.

This patch is to fix by initializing with 0 before setting erspan
md->u.

Reported-by: Shuang Li <shuali@redhat.com>
Fixes: 79b1011cb33d ("net: sched: allow flower to match erspan options")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e8fda1bd4d9d..fed18fd2c50b 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1223,6 +1223,7 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
 		}
 		if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
 			nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
+			memset(&md->u, 0x00, sizeof(md->u));
 			md->u.index = nla_get_be32(nla);
 		}
 	} else if (md->version == 2) {
-- 
cgit 


From 2b1667e54caf95e1e4249d9068eea7a3089a5229 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Thu, 10 Sep 2020 09:56:09 +0200
Subject: xsk: Fix number of pinned pages/umem size discrepancy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For AF_XDP sockets, there was a discrepancy between the number of of
pinned pages and the size of the umem region.

The size of the umem region is used to validate the AF_XDP descriptor
addresses. The logic that pinned the pages covered by the region only
took whole pages into consideration, creating a mismatch between the
size and pinned pages. A user could then pass AF_XDP addresses outside
the range of pinned pages, but still within the size of the region,
crashing the kernel.

This change correctly calculates the number of pages to be
pinned. Further, the size check for the aligned mode is
simplified. Now the code simply checks if the size is divisible by the
chunk size.

Fixes: bbff2f321a86 ("xsk: new descriptor addressing scheme")
Reported-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Ciara Loftus <ciara.loftus@intel.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200910075609.7904-1-bjorn.topel@gmail.com
---
 net/xdp/xdp_umem.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index e97db37354e4..b010bfde0149 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -303,10 +303,10 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
 
 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 {
+	u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom;
 	bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 	u64 npgs, addr = mr->addr, size = mr->len;
-	unsigned int chunks, chunks_per_page;
+	unsigned int chunks, chunks_rem;
 	int err;
 
 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
@@ -336,19 +336,18 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if ((addr + size) < addr)
 		return -EINVAL;
 
-	npgs = size >> PAGE_SHIFT;
+	npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
+	if (npgs_rem)
+		npgs++;
 	if (npgs > U32_MAX)
 		return -EINVAL;
 
-	chunks = (unsigned int)div_u64(size, chunk_size);
+	chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem);
 	if (chunks == 0)
 		return -EINVAL;
 
-	if (!unaligned_chunks) {
-		chunks_per_page = PAGE_SIZE / chunk_size;
-		if (chunks < chunks_per_page || chunks % chunks_per_page)
-			return -EINVAL;
-	}
+	if (!unaligned_chunks && chunks_rem)
+		return -EINVAL;
 
 	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
 		return -EINVAL;
-- 
cgit 


From 3236d215ad38a3f5372e65cd1e0a52cf93d3c6a2 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Tue, 15 Sep 2020 09:54:08 +0200
Subject: batman-adv: mcast: fix duplicate mcast packets in BLA backbone from
 LAN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scenario:
* Multicast frame send from a BLA backbone (multiple nodes with
  their bat0 bridged together, with BLA enabled)

Issue:
* BLA backbone nodes receive the frame multiple times on bat0

For multicast frames received via batman-adv broadcast packets the
originator of the broadcast packet is checked before decapsulating and
forwarding the frame to bat0 (batadv_bla_is_backbone_gw()->
batadv_recv_bcast_packet()). If it came from a node which shares the
same BLA backbone with us then it is not forwarded to bat0 to avoid a
loop.

When sending a multicast frame in a non-4-address batman-adv unicast
packet we are currently missing this check - and cannot do so because
the batman-adv unicast packet has no originator address field.

However, we can simply fix this on the sender side by only sending the
multicast frame via unicasts to interested nodes which do not share the
same BLA backbone with us. This also nicely avoids some unnecessary
transmissions on mesh side.

Note that no infinite loop was observed, probably because of dropping
via batadv_interface_tx()->batadv_bla_tx(). However the duplicates still
utterly confuse switches/bridges, ICMPv6 duplicate address detection and
neighbor discovery and therefore leads to long delays before being able
to establish TCP connections, for instance. And it also leads to the Linux
bridge printing messages like:
"br-lan: received packet on eth1 with own address as source address ..."

Fixes: 2d3f6ccc4ea5 ("batman-adv: Modified forwarding behaviour for multicast packets")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/multicast.c      | 46 ++++++++++++++++++++++++++++++++---------
 net/batman-adv/multicast.h      | 15 ++++++++++++++
 net/batman-adv/soft-interface.c |  5 ++---
 3 files changed, 53 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index bdc4a1fba1c6..ca24a2e522b7 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -51,6 +51,7 @@
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
+#include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
@@ -1434,6 +1435,35 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	return BATADV_FORW_ALL;
 }
 
+/**
+ * batadv_mcast_forw_send_orig() - send a multicast packet to an originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ * @vid: the vlan identifier
+ * @orig_node: the originator to send the packet to
+ *
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+				struct sk_buff *skb,
+				unsigned short vid,
+				struct batadv_orig_node *orig_node)
+{
+	/* Avoid sending multicast-in-unicast packets to other BLA
+	 * gateways - they already got the frame from the LAN side
+	 * we share with them.
+	 * TODO: Refactor to take BLA into account earlier, to avoid
+	 * reducing the mcast_fanout count.
+	 */
+	if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) {
+		dev_kfree_skb(skb);
+		return NET_XMIT_SUCCESS;
+	}
+
+	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
+				       orig_node, vid);
+}
+
 /**
  * batadv_mcast_forw_tt() - forwards a packet to multicast listeners
  * @bat_priv: the bat priv with all the soft interface information
@@ -1471,8 +1501,8 @@ batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			break;
 		}
 
-		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
-					orig_entry->orig_node, vid);
+		batadv_mcast_forw_send_orig(bat_priv, newskb, vid,
+					    orig_entry->orig_node);
 	}
 	rcu_read_unlock();
 
@@ -1513,8 +1543,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv,
 			break;
 		}
 
-		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
-					orig_node, vid);
+		batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
 	}
 	rcu_read_unlock();
 	return ret;
@@ -1551,8 +1580,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv,
 			break;
 		}
 
-		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
-					orig_node, vid);
+		batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
 	}
 	rcu_read_unlock();
 	return ret;
@@ -1618,8 +1646,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv,
 			break;
 		}
 
-		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
-					orig_node, vid);
+		batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
 	}
 	rcu_read_unlock();
 	return ret;
@@ -1656,8 +1683,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv,
 			break;
 		}
 
-		batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
-					orig_node, vid);
+		batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
 	}
 	rcu_read_unlock();
 	return ret;
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index ebf825991ecd..3e114bc5ca3b 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -46,6 +46,11 @@ enum batadv_forw_mode
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		       struct batadv_orig_node **mcast_single_orig);
 
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+				struct sk_buff *skb,
+				unsigned short vid,
+				struct batadv_orig_node *orig_node);
+
 int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			   unsigned short vid);
 
@@ -71,6 +76,16 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	return BATADV_FORW_ALL;
 }
 
+static inline int
+batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+			    struct sk_buff *skb,
+			    unsigned short vid,
+			    struct batadv_orig_node *orig_node)
+{
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
 static inline int
 batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		       unsigned short vid)
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 23833a0ba5e6..3d037b17f3a7 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -364,9 +364,8 @@ send:
 				goto dropped;
 			ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
 		} else if (mcast_single_orig) {
-			ret = batadv_send_skb_unicast(bat_priv, skb,
-						      BATADV_UNICAST, 0,
-						      mcast_single_orig, vid);
+			ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
+							  mcast_single_orig);
 		} else if (forw_mode == BATADV_FORW_SOME) {
 			ret = batadv_mcast_forw_send(bat_priv, skb, vid);
 		} else {
-- 
cgit 


From 74c09b7275126da1b642b90c9cdc3ae8b729ad4b Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Tue, 15 Sep 2020 09:54:09 +0200
Subject: batman-adv: mcast: fix duplicate mcast packets in BLA backbone from
 mesh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scenario:
* Multicast frame send from mesh to a BLA backbone (multiple nodes
  with their bat0 bridged together, with BLA enabled)

Issue:
* BLA backbone nodes receive the frame multiple times on bat0,
  once from mesh->bat0 and once from each backbone_gw from LAN

For unicast, a node will send only to the best backbone gateway
according to the TQ. However for multicast we currently cannot determine
if multiple destination nodes share the same backbone if they don't share
the same backbone with us. So we need to keep sending the unicasts to
all backbone gateways and let the backbone gateways decide which one
will forward the frame. We can use the CLAIM mechanism to make this
decision.

One catch: The batman-adv gateway feature for DHCP packets potentially
sends multicast packets in the same batman-adv unicast header as the
multicast optimizations code. And we are not allowed to drop those even
if we did not claim the source address of the sender, as for such
packets there is only this one multicast-in-unicast packet.

How can we distinguish the two cases?

The gateway feature uses a batman-adv unicast 4 address header. While
the multicast-to-unicasts feature uses a simple, 3 address batman-adv
unicast header. So let's use this to distinguish.

Fixes: fe2da6ff27c7 ("batman-adv: check incoming packet type for bla")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 34 +++++++++++++++++++++++++---------
 net/batman-adv/bridge_loop_avoidance.h |  4 ++--
 net/batman-adv/soft-interface.c        |  6 +++---
 3 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 08419a2e6b95..68715783a742 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1814,7 +1814,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the frame to be checked
  * @vid: the VLAN ID of the frame
- * @is_bcast: the packet came in a broadcast packet type.
+ * @packet_type: the batman packet type this frame came in
  *
  * batadv_bla_rx avoidance checks if:
  *  * we have to race for a claim
@@ -1826,7 +1826,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
  * further process the skb.
  */
 bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
-		   unsigned short vid, bool is_bcast)
+		   unsigned short vid, int packet_type)
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
 	struct ethhdr *ethhdr;
@@ -1848,9 +1848,24 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		goto handled;
 
 	if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
-		/* don't allow broadcasts while requests are in flight */
-		if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
-			goto handled;
+		/* don't allow multicast packets while requests are in flight */
+		if (is_multicast_ether_addr(ethhdr->h_dest))
+			/* Both broadcast flooding or multicast-via-unicasts
+			 * delivery might send to multiple backbone gateways
+			 * sharing the same LAN and therefore need to coordinate
+			 * which backbone gateway forwards into the LAN,
+			 * by claiming the payload source address.
+			 *
+			 * Broadcast flooding and multicast-via-unicasts
+			 * delivery use the following two batman packet types.
+			 * Note: explicitly exclude BATADV_UNICAST_4ADDR,
+			 * as the DHCP gateway feature will send explicitly
+			 * to only one BLA gateway, so the claiming process
+			 * should be avoided there.
+			 */
+			if (packet_type == BATADV_BCAST ||
+			    packet_type == BATADV_UNICAST)
+				goto handled;
 
 	ether_addr_copy(search_claim.addr, ethhdr->h_source);
 	search_claim.vid = vid;
@@ -1885,13 +1900,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		goto allow;
 	}
 
-	/* if it is a broadcast ... */
-	if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) {
+	/* if it is a multicast ... */
+	if (is_multicast_ether_addr(ethhdr->h_dest) &&
+	    (packet_type == BATADV_BCAST || packet_type == BATADV_UNICAST)) {
 		/* ... drop it. the responsible gateway is in charge.
 		 *
-		 * We need to check is_bcast because with the gateway
+		 * We need to check packet type because with the gateway
 		 * feature, broadcasts (like DHCP requests) may be sent
-		 * using a unicast packet type.
+		 * using a unicast 4 address packet type. See comment above.
 		 */
 		goto handled;
 	} else {
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 41edb2c4a327..a81c41b636f9 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -35,7 +35,7 @@ static inline bool batadv_bla_is_loopdetect_mac(const uint8_t *mac)
 
 #ifdef CONFIG_BATMAN_ADV_BLA
 bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
-		   unsigned short vid, bool is_bcast);
+		   unsigned short vid, int packet_type);
 bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		   unsigned short vid);
 bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
@@ -66,7 +66,7 @@ bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
 
 static inline bool batadv_bla_rx(struct batadv_priv *bat_priv,
 				 struct sk_buff *skb, unsigned short vid,
-				 bool is_bcast)
+				 int packet_type)
 {
 	return false;
 }
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 3d037b17f3a7..cdde943c1b83 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -424,10 +424,10 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	struct vlan_ethhdr *vhdr;
 	struct ethhdr *ethhdr;
 	unsigned short vid;
-	bool is_bcast;
+	int packet_type;
 
 	batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data;
-	is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST);
+	packet_type = batadv_bcast_packet->packet_type;
 
 	skb_pull_rcsum(skb, hdr_size);
 	skb_reset_mac_header(skb);
@@ -470,7 +470,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	/* Let the bridge loop avoidance check the packet. If will
 	 * not handle it, we can safely push it up.
 	 */
-	if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
+	if (batadv_bla_rx(bat_priv, skb, vid, packet_type))
 		goto out;
 
 	if (orig_node)
-- 
cgit 


From 2369e827046920ef0599e6a36b975ac5c0a359c2 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Tue, 15 Sep 2020 09:54:10 +0200
Subject: batman-adv: mcast: fix duplicate mcast packets from BLA backbone to
 mesh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scenario:
* Multicast frame send from BLA backbone gateways (multiple nodes
  with their bat0 bridged together, with BLA enabled) sharing the same
  LAN to nodes in the mesh

Issue:
* Nodes receive the frame multiple times on bat0 from the mesh,
  once from each foreign BLA backbone gateway which shares the same LAN
  with another

For multicast frames via batman-adv broadcast packets coming from the
same BLA backbone but from different backbone gateways duplicates are
currently detected via a CRC history of previously received packets.

However this CRC so far was not performed for multicast frames received
via batman-adv unicast packets. Fixing this by appyling the same check
for such packets, too.

Room for improvements in the future: Ideally we would introduce the
possibility to not only claim a client, but a complete originator, too.
This would allow us to only send a multicast-in-unicast packet from a BLA
backbone gateway claiming the node and by that avoid potential redundant
transmissions in the first place.

Fixes: 279e89b2281a ("batman-adv: add broadcast duplicate check")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 103 ++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 68715783a742..c350ab63cd54 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1581,13 +1581,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
 }
 
 /**
- * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
+ * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup.
  * @bat_priv: the bat priv with all the soft interface information
- * @skb: contains the bcast_packet to be checked
+ * @skb: contains the multicast packet to be checked
+ * @payload_ptr: pointer to position inside the head buffer of the skb
+ *  marking the start of the data to be CRC'ed
+ * @orig: originator mac address, NULL if unknown
  *
- * check if it is on our broadcast list. Another gateway might
- * have sent the same packet because it is connected to the same backbone,
- * so we have to remove this duplicate.
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
  *
  * This is performed by checking the CRC, which will tell us
  * with a good chance that it is the same packet. If it is furthermore
@@ -1596,19 +1599,17 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
  *
  * Return: true if a packet is in the duplicate list, false otherwise.
  */
-bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
-				    struct sk_buff *skb)
+static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv,
+				     struct sk_buff *skb, u8 *payload_ptr,
+				     const u8 *orig)
 {
-	int i, curr;
-	__be32 crc;
-	struct batadv_bcast_packet *bcast_packet;
 	struct batadv_bcast_duplist_entry *entry;
 	bool ret = false;
-
-	bcast_packet = (struct batadv_bcast_packet *)skb->data;
+	int i, curr;
+	__be32 crc;
 
 	/* calculate the crc ... */
-	crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1));
+	crc = batadv_skb_crc32(skb, payload_ptr);
 
 	spin_lock_bh(&bat_priv->bla.bcast_duplist_lock);
 
@@ -1627,8 +1628,21 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
 		if (entry->crc != crc)
 			continue;
 
-		if (batadv_compare_eth(entry->orig, bcast_packet->orig))
-			continue;
+		/* are the originators both known and not anonymous? */
+		if (orig && !is_zero_ether_addr(orig) &&
+		    !is_zero_ether_addr(entry->orig)) {
+			/* If known, check if the new frame came from
+			 * the same originator:
+			 * We are safe to take identical frames from the
+			 * same orig, if known, as multiplications in
+			 * the mesh are detected via the (orig, seqno) pair.
+			 * So we can be a bit more liberal here and allow
+			 * identical frames from the same orig which the source
+			 * host might have sent multiple times on purpose.
+			 */
+			if (batadv_compare_eth(entry->orig, orig))
+				continue;
+		}
 
 		/* this entry seems to match: same crc, not too old,
 		 * and from another gw. therefore return true to forbid it.
@@ -1644,7 +1658,14 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
 	entry = &bat_priv->bla.bcast_duplist[curr];
 	entry->crc = crc;
 	entry->entrytime = jiffies;
-	ether_addr_copy(entry->orig, bcast_packet->orig);
+
+	/* known originator */
+	if (orig)
+		ether_addr_copy(entry->orig, orig);
+	/* anonymous originator */
+	else
+		eth_zero_addr(entry->orig);
+
 	bat_priv->bla.bcast_duplist_curr = curr;
 
 out:
@@ -1653,6 +1674,48 @@ out:
 	return ret;
 }
 
+/**
+ * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: contains the multicast packet to be checked, decapsulated from a
+ *  unicast_packet
+ *
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
+ *
+ * Return: true if a packet is in the duplicate list, false otherwise.
+ */
+static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv,
+					   struct sk_buff *skb)
+{
+	return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL);
+}
+
+/**
+ * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: contains the bcast_packet to be checked
+ *
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
+ *
+ * Return: true if a packet is in the duplicate list, false otherwise.
+ */
+bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
+				    struct sk_buff *skb)
+{
+	struct batadv_bcast_packet *bcast_packet;
+	u8 *payload_ptr;
+
+	bcast_packet = (struct batadv_bcast_packet *)skb->data;
+	payload_ptr = (u8 *)(bcast_packet + 1);
+
+	return batadv_bla_check_duplist(bat_priv, skb, payload_ptr,
+					bcast_packet->orig);
+}
+
 /**
  * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for
  *  the VLAN identified by vid.
@@ -1867,6 +1930,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			    packet_type == BATADV_UNICAST)
 				goto handled;
 
+	/* potential duplicates from foreign BLA backbone gateways via
+	 * multicast-in-unicast packets
+	 */
+	if (is_multicast_ether_addr(ethhdr->h_dest) &&
+	    packet_type == BATADV_UNICAST &&
+	    batadv_bla_check_ucast_duplist(bat_priv, skb))
+		goto handled;
+
 	ether_addr_copy(search_claim.addr, ethhdr->h_source);
 	search_claim.vid = vid;
 	claim = batadv_claim_hash_find(bat_priv, &search_claim);
-- 
cgit 


From 2e5117ba9f582262e93a1fdf8e1a7b9affd5121c Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Tue, 15 Sep 2020 10:39:55 +0800
Subject: net: tipc: kerneldoc fixes

Fix parameter description of tipc_link_bc_create()

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 16ad3f4022bb ("tipc: introduce variable window congestion control")
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index b7362556da95..cef38a910107 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -532,7 +532,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
  * tipc_link_bc_create - create new link to be used for broadcast
  * @net: pointer to associated network namespace
  * @mtu: mtu to be used initially if no peers
- * @window: send window to be used
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
  * @inputq: queue to put messages ready for delivery
  * @namedq: queue to put binding table update messages ready for delivery
  * @link: return value, pointer to put the created link
-- 
cgit 


From 2fbc6e89b2f1403189e624cabaf73e189c5e50c6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 14 Sep 2020 21:03:54 -0600
Subject: ipv4: Update exception handling for multipath routes via same device

Kfir reported that pmtu exceptions are not created properly for
deployments where multipath routes use the same device.

After some digging I see 2 compounding problems:
1. ip_route_output_key_hash_rcu is updating the flowi4_oif *after*
   the route lookup. This is the second use case where this has
   been a problem (the first is related to use of vti devices with
   VRF). I can not find any reason for the oif to be changed after the
   lookup; the code goes back to the start of git. It does not seem
   logical so remove it.

2. fib_lookups for exceptions do not call fib_select_path to handle
   multipath route selection based on the hash.

The end result is that the fib_lookup used to add the exception
always creates it based using the first leg of the route.

An example topology showing the problem:

                 |  host1
             +------+
             | eth0 |  .209
             +------+
                 |
             +------+
     switch  | br0  |
             +------+
                 |
       +---------+---------+
       | host2             |  host3
   +------+             +------+
   | eth0 | .250        | eth0 | 192.168.252.252
   +------+             +------+

   +-----+             +-----+
   | vti | .2          | vti | 192.168.247.3
   +-----+             +-----+
       \                  /
 =================================
 tunnels
         192.168.247.1/24

for h in host1 host2 host3; do
        ip netns add ${h}
        ip -netns ${h} link set lo up
        ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1
done

ip netns add switch
ip -netns switch li set lo up
ip -netns switch link add br0 type bridge stp 0
ip -netns switch link set br0 up

for n in 1 2 3; do
        ip -netns switch link add eth-sw type veth peer name eth-h${n}
        ip -netns switch li set eth-h${n} master br0 up
        ip -netns switch li set eth-sw netns host${n} name eth0
done

ip -netns host1 addr add 192.168.252.209/24 dev eth0
ip -netns host1 link set dev eth0 up
ip -netns host1 route add 192.168.247.0/24 \
        nexthop via 192.168.252.250 dev eth0 nexthop via 192.168.252.252 dev eth0

ip -netns host2 addr add 192.168.252.250/24 dev eth0
ip -netns host2 link set dev eth0 up

ip -netns host2 addr add 192.168.252.252/24 dev eth0
ip -netns host3 link set dev eth0 up

ip netns add tunnel
ip -netns tunnel li set lo up
ip -netns tunnel li add br0 type bridge
ip -netns tunnel li set br0 up
for n in $(seq 11 20); do
        ip -netns tunnel addr add dev br0 192.168.247.${n}/24
done

for n in 2 3
do
        ip -netns tunnel link add vti${n} type veth peer name eth${n}
        ip -netns tunnel link set eth${n} mtu 1360 master br0 up
        ip -netns tunnel link set vti${n} netns host${n} mtu 1360 up
        ip -netns host${n} addr add dev vti${n} 192.168.247.${n}/24
done
ip -netns tunnel ro add default nexthop via 192.168.247.2 nexthop via 192.168.247.3

ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.11
ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.15
ip -netns host1 ro ls cache

Before this patch the cache always shows exceptions against the first
leg in the multipath route; 192.168.252.250 per this example. Since the
hash has an initial random seed, you may need to vary the final octet
more than what is listed. In my tests, using addresses between 11 and 19
usually found 1 that used both legs.

With this patch, the cache will have exceptions for both legs.

Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions")
Reported-by: Kfir Itzhak <mastertheknife@gmail.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e5f210d00851..58642b29a499 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -786,8 +786,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 			neigh_event_send(n, NULL);
 		} else {
 			if (fib_lookup(net, fl4, &res, 0) == 0) {
-				struct fib_nh_common *nhc = FIB_RES_NHC(res);
+				struct fib_nh_common *nhc;
 
+				fib_select_path(net, &res, fl4, skb);
+				nhc = FIB_RES_NHC(res);
 				update_or_create_fnhe(nhc, fl4->daddr, new_gw,
 						0, false,
 						jiffies + ip_rt_gc_timeout);
@@ -1013,6 +1015,7 @@ out:	kfree_skb(skb);
 static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 {
 	struct dst_entry *dst = &rt->dst;
+	struct net *net = dev_net(dst->dev);
 	u32 old_mtu = ipv4_mtu(dst);
 	struct fib_result res;
 	bool lock = false;
@@ -1033,9 +1036,11 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 		return;
 
 	rcu_read_lock();
-	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
-		struct fib_nh_common *nhc = FIB_RES_NHC(res);
+	if (fib_lookup(net, fl4, &res, 0) == 0) {
+		struct fib_nh_common *nhc;
 
+		fib_select_path(net, &res, fl4, NULL);
+		nhc = FIB_RES_NHC(res);
 		update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
@@ -2668,8 +2673,6 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 	fib_select_path(net, res, fl4, skb);
 
 	dev_out = FIB_RES_DEV(*res);
-	fl4->flowi4_oif = dev_out->ifindex;
-
 
 make_route:
 	rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
-- 
cgit 


From 8c33dadc3e0eef1599811a55d748a0b95da0317d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 15 Sep 2020 11:29:59 -0700
Subject: bpf: Bpf_skc_to_* casting helpers require a NULL check on sk

The bpf_skc_to_* type casting helpers are available to
BPF_PROG_TYPE_TRACING.  The traced PTR_TO_BTF_ID may be NULL.
For example, the skb->sk may be NULL.  Thus, these casting helpers
need to check "!sk" also and this patch fixes them.

Fixes: 0d4fad3e57df ("bpf: Add bpf_skc_to_udp6_sock() helper")
Fixes: 478cfbdf5f13 ("bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers")
Fixes: af7ec1383361 ("bpf: Add bpf_skc_to_tcp6_sock() helper")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200915182959.241101-1-kafai@fb.com
---
 net/core/filter.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 2d62c25e0395..23e8ded0ec97 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9522,7 +9522,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
 	 * trigger an explicit type generation here.
 	 */
 	BTF_TYPE_EMIT(struct tcp6_sock);
-	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
 	    sk->sk_family == AF_INET6)
 		return (unsigned long)sk;
 
@@ -9540,7 +9540,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
 
 BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
 {
-	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
 		return (unsigned long)sk;
 
 	return (unsigned long)NULL;
@@ -9558,12 +9558,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
 BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
 {
 #ifdef CONFIG_INET
-	if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
+	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
 		return (unsigned long)sk;
 #endif
 
 #if IS_BUILTIN(CONFIG_IPV6)
-	if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
+	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
 		return (unsigned long)sk;
 #endif
 
@@ -9582,12 +9582,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
 BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
 {
 #ifdef CONFIG_INET
-	if (sk->sk_prot == &tcp_prot  && sk->sk_state == TCP_NEW_SYN_RECV)
+	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
 		return (unsigned long)sk;
 #endif
 
 #if IS_BUILTIN(CONFIG_IPV6)
-	if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
 		return (unsigned long)sk;
 #endif
 
@@ -9609,7 +9609,7 @@ BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
 	 * trigger an explicit type generation here.
 	 */
 	BTF_TYPE_EMIT(struct udp6_sock);
-	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
 	    sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
 		return (unsigned long)sk;
 
-- 
cgit 


From 19a83d36f9837e8bd27435ebb31564a717a5d15a Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Thu, 17 Sep 2020 01:04:10 +0200
Subject: ethtool: add and use message type for tunnel info reply

Tunnel offload info code uses ETHTOOL_MSG_TUNNEL_INFO_GET message type (cmd
field in genetlink header) for replies to tunnel info netlink request, i.e.
the same value as the request have. This is a problem because we are using
two separate enums for userspace to kernel and kernel to userspace message
types so that this ETHTOOL_MSG_TUNNEL_INFO_GET (28) collides with
ETHTOOL_MSG_CABLE_TEST_TDR_NTF which is what message type 28 means for
kernel to userspace messages.

As the tunnel info request reached mainline in 5.9 merge window, we should
still be able to fix the reply message type without breaking backward
compatibility.

Fixes: c7d759eb7b12 ("ethtool: add tunnel info interface")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/tunnels.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
index 84f23289475b..d93bf2da0f34 100644
--- a/net/ethtool/tunnels.c
+++ b/net/ethtool/tunnels.c
@@ -200,7 +200,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info)
 	reply_len = ret + ethnl_reply_header_size();
 
 	rskb = ethnl_reply_init(reply_len, req_info.dev,
-				ETHTOOL_MSG_TUNNEL_INFO_GET,
+				ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
 				ETHTOOL_A_TUNNEL_INFO_HEADER,
 				info, &reply_payload);
 	if (!rskb) {
@@ -273,7 +273,7 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 				goto cont;
 
 			ehdr = ethnl_dump_put(skb, cb,
-					      ETHTOOL_MSG_TUNNEL_INFO_GET);
+					      ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
 			if (!ehdr) {
 				ret = -EMSGSIZE;
 				goto out;
-- 
cgit 


From 1f38b8c564b8c24132428f6a4d04e05366fa10f4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 15 Sep 2020 10:59:44 +0200
Subject: mac80211: extend AQL aggregation estimation to HE and fix unit
 mismatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unit of the return value of ieee80211_get_rate_duration is nanoseconds, not
microseconds. Adjust the duration checks to account for that.
For higher data rates, allow larger estimated aggregation sizes, and add some
values for HE as well, which can use much larger aggregates.
Since small packets with high data rates can now lead to duration values too
small for info->tx_time_est, return a minimum of 4us.

Fixes: f01cfbaf9b29 ("mac80211: improve AQL aggregation estimation for low data rates")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20200915085945.3782-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/airtime.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 314973033d03..45140e535151 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -668,20 +668,26 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
 		 * This will not be very accurate, but much better than simply
 		 * assuming un-aggregated tx in all cases.
 		 */
-		if (duration > 400) /* <= VHT20 MCS2 1S */
+		if (duration > 400 * 1024) /* <= VHT20 MCS2 1S */
 			agg_shift = 1;
-		else if (duration > 250) /* <= VHT20 MCS3 1S or MCS1 2S */
+		else if (duration > 250 * 1024) /* <= VHT20 MCS3 1S or MCS1 2S */
 			agg_shift = 2;
-		else if (duration > 150) /* <= VHT20 MCS5 1S or MCS3 2S */
+		else if (duration > 150 * 1024) /* <= VHT20 MCS5 1S or MCS2 2S */
 			agg_shift = 3;
-		else
+		else if (duration > 70 * 1024) /* <= VHT20 MCS5 2S */
 			agg_shift = 4;
+		else if (stat.encoding != RX_ENC_HE ||
+			 duration > 20 * 1024) /* <= HE40 MCS6 2S */
+			agg_shift = 5;
+		else
+			agg_shift = 6;
 
 		duration *= len;
 		duration /= AVG_PKT_SIZE;
 		duration /= 1024;
+		duration += (overhead >> agg_shift);
 
-		return duration + (overhead >> agg_shift);
+		return max_t(u32, duration, 4);
 	}
 
 	if (!conf)
-- 
cgit 


From 8e280369b9076dc08ce13c802c1687b81b4cccd4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 15 Sep 2020 10:59:45 +0200
Subject: mac80211: add AQL support for VHT160 tx rates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When converting from struct ieee80211_tx_rate to ieee80211_rx_status,
there was one check missing to fill in the bandwidth for 160 MHz

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20200915085945.3782-2-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/airtime.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 45140e535151..26d2f8ba7029 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -560,7 +560,9 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat,
 	if (rate->idx < 0 || !rate->count)
 		return -1;
 
-	if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+	if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+		stat->bw = RATE_INFO_BW_160;
+	else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
 		stat->bw = RATE_INFO_BW_80;
 	else if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
 		stat->bw = RATE_INFO_BW_40;
-- 
cgit 


From b959ba9f468b1c581f40e92661ad58b093abaa03 Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Date: Wed, 9 Sep 2020 12:54:53 +0300
Subject: lib80211: fix unmet direct dependendices config warning when !CRYPTO

When LIB80211_CRYPT_CCMP is enabled and CRYPTO is disabled, it results in unmet
direct dependencies config warning. The reason is that LIB80211_CRYPT_CCMP
selects CRYPTO_AES and CRYPTO_CCM, which are subordinate to CRYPTO. This is
reproducible with CRYPTO disabled and R8188EU enabled, where R8188EU selects
LIB80211_CRYPT_CCMP but does not select or depend on CRYPTO.

Honor the kconfig menu hierarchy to remove kconfig dependency warnings.

Fixes: a11e2f85481c ("lib80211: use crypto API ccm(aes) transform for CCMP processing")
Signed-off-by: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Link: https://lore.kernel.org/r/20200909095452.3080-1-fazilyildiran@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index faf74850a1b5..27026f587fa6 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -217,6 +217,7 @@ config LIB80211_CRYPT_WEP
 
 config LIB80211_CRYPT_CCMP
 	tristate
+	select CRYPTO
 	select CRYPTO_AES
 	select CRYPTO_CCM
 
-- 
cgit 


From 412a84b5714af56f3eb648bba155107b5edddfdf Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 01:11:35 +0000
Subject: mac80211: Fix radiotap header channel flag for 6GHz band

Radiotap header field 'Channel flags' has '2 GHz spectrum' set to
'true' for 6GHz packet.
Change it to 5GHz as there isn't a separate option available for 6GHz.

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/010101747ab7b703-1d7c9851-1594-43bf-81f7-f79ce7a67cc6-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 836cde516a18..a959ebf56852 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -451,7 +451,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	else if (status->bw == RATE_INFO_BW_5)
 		channel_flags |= IEEE80211_CHAN_QUARTER;
 
-	if (status->band == NL80211_BAND_5GHZ)
+	if (status->band == NL80211_BAND_5GHZ ||
+	    status->band == NL80211_BAND_6GHZ)
 		channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ;
 	else if (status->encoding != RX_ENC_LEGACY)
 		channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
-- 
cgit 


From 780a8c9efc65f6d86acd44794495cedcd32eeb26 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Fri, 11 Sep 2020 10:29:02 +0000
Subject: mac80211: do not disable HE if HT is missing on 2.4 GHz

VHT is not supported on 2.4 GHz, but HE is; don't disable HE if HT
is missing there, do that only on 5 GHz (6 GHz is only HE).

Fixes: 57fa5e85d53ce51 ("mac80211: determine chandef from HE 6 GHz operation")
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Link: https://lore.kernel.org/r/010101747cb617f2-593c5410-1648-4a42-97a0-f3646a5a6dd1-000000@us-west-2.amazonses.com
[rewrite the commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ac870309b911..2e400b0ff696 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4861,6 +4861,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
 	bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
+	bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
 	struct ieee80211_bss *bss = (void *)cbss->priv;
 	int ret;
 	u32 i;
@@ -4879,7 +4880,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 	}
 
-	if (!sband->vht_cap.vht_supported && !is_6ghz) {
+	if (!sband->vht_cap.vht_supported && is_5ghz) {
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 	}
-- 
cgit 


From c0de8776af6543e10d1a5c8969679fd9f6b66fa9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Sep 2020 11:52:23 +0200
Subject: cfg80211: fix 6 GHz channel conversion

We shouldn't accept any channels bigger than 233, fix that.

Reported-by: Amar <asinghal@codeaurora.org>
Fixes: d1a1646c0de7 ("cfg80211: adapt to new channelization of the 6GHz band")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200917115222.312ba6f1d461.I3a8c8fbcc3cc019814fd9cd0aced7eb591626136@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4a9ff9ef513f..6fa99df52f86 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -95,7 +95,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
 		/* see 802.11ax D6.1 27.3.23.2 */
 		if (chan == 2)
 			return MHZ_TO_KHZ(5935);
-		if (chan <= 253)
+		if (chan <= 233)
 			return MHZ_TO_KHZ(5950 + chan * 5);
 		break;
 	case NL80211_BAND_60GHZ:
-- 
cgit 


From 3bd5c7a28a7c3aba07a2d300d43f8e988809e147 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 17 Sep 2020 14:50:31 +0200
Subject: mac80211: do not allow bigger VHT MPDUs than the hardware supports

Limit maximum VHT MPDU size by local capability.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200917125031.45009-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/vht.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 9c6045f9c24d..d1b64d0751f2 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -168,10 +168,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	/* take some capabilities as-is */
 	cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info);
 	vht_cap->cap = cap_info;
-	vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
-			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
-			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
-			IEEE80211_VHT_CAP_RXLDPC |
+	vht_cap->cap &= IEEE80211_VHT_CAP_RXLDPC |
 			IEEE80211_VHT_CAP_VHT_TXOP_PS |
 			IEEE80211_VHT_CAP_HTC_VHT |
 			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
@@ -180,6 +177,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 			IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
 			IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN;
 
+	vht_cap->cap |= min_t(u32, cap_info & IEEE80211_VHT_CAP_MAX_MPDU_MASK,
+			      own_cap.cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK);
+
 	/* and some based on our own capabilities */
 	switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
 	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
-- 
cgit 


From 75bcbd6913de649601f4e7d3fb6d2b5effc24e9e Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 18 Sep 2020 13:53:04 +0200
Subject: mac80211: fix 80 MHz association to 160/80+80 AP on 6 GHz

When trying to associate to an AP support 180 or 80+80 MHz on 6 GHz with a
STA that only has 80 Mhz support the cf2 field inside the chandef will get
set causing the association to fail when trying to validate the chandef.
Fix this by checking the support flags prior to setting cf2.

Fixes: 57fa5e85d53ce ("mac80211: determine chandef from HE 6 GHz operation")
Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20200918115304.1135693-1-john@phrozen.org
[reword commit message a bit]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c8504ffc71a1..8d3bfc0fe176 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3353,9 +3353,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
 		he_chandef.center_freq1 =
 			ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
 						       NL80211_BAND_6GHZ);
-		he_chandef.center_freq2 =
-			ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
-						       NL80211_BAND_6GHZ);
+		if (support_80_80 || support_160)
+			he_chandef.center_freq2 =
+				ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+							       NL80211_BAND_6GHZ);
 	}
 
 	if (!cfg80211_chandef_valid(&he_chandef)) {
-- 
cgit 


From 6565243c0677aa2befa5a953cf11bc7b4a6f0a47 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 18 Sep 2020 04:07:24 +0300
Subject: net: mscc: ocelot: add locking for the port TX timestamp ID

The ocelot_port->ts_id is used to:
(a) populate skb->cb[0] for matching the TX timestamp in the PTP IRQ
    with an skb.
(b) populate the REW_OP from the injection header of the ongoing skb.
Only then is ocelot_port->ts_id incremented.

This is a problem because, at least theoretically, another timestampable
skb might use the same ocelot_port->ts_id before that is incremented.
Normally all transmit calls are serialized by the netdev transmit
spinlock, but in this case, ocelot_port_add_txtstamp_skb() is also
called by DSA, which has started declaring the NETIF_F_LLTX feature
since commit 2b86cb829976 ("net: dsa: declare lockless TX feature for
slave ports").  So the logic of using and incrementing the timestamp id
should be atomic per port.

The solution is to use the global ocelot_port->ts_id only while
protected by the associated ocelot_port->ts_id_lock. That's where we
populate skb->cb[0]. Note that for ocelot, ocelot_port_add_txtstamp_skb
is called for the actual skb, but for felix, it is called for the skb's
clone. That is something which will also be changed in the future.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_ocelot.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 42f327c06dca..b4fc05cafaa6 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -160,11 +160,14 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 	packing(injection, &qos_class, 19,  17, OCELOT_TAG_LEN, PACK, 0);
 
 	if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+
 		rew_op = ocelot_port->ptp_cmd;
-		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
-			rew_op |= (ocelot_port->ts_id  % 4) << 3;
-			ocelot_port->ts_id++;
-		}
+		/* Retrieve timestamp ID populated inside skb->cb[0] of the
+		 * clone by ocelot_port_add_txtstamp_skb
+		 */
+		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+			rew_op |= clone->cb[0] << 3;
 
 		packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0);
 	}
-- 
cgit 


From db7cd91a4be15e1485d6b58c6afc8761c59c4efb Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Date: Thu, 17 Sep 2020 19:46:43 +0300
Subject: net: ipv6: fix kconfig dependency warning for IPV6_SEG6_HMAC

When IPV6_SEG6_HMAC is enabled and CRYPTO is disabled, it results in the
following Kbuild warning:

WARNING: unmet direct dependencies detected for CRYPTO_HMAC
  Depends on [n]: CRYPTO [=n]
  Selected by [y]:
  - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y]

WARNING: unmet direct dependencies detected for CRYPTO_SHA1
  Depends on [n]: CRYPTO [=n]
  Selected by [y]:
  - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y]

WARNING: unmet direct dependencies detected for CRYPTO_SHA256
  Depends on [n]: CRYPTO [=n]
  Selected by [y]:
  - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y]

The reason is that IPV6_SEG6_HMAC selects CRYPTO_HMAC, CRYPTO_SHA1, and
CRYPTO_SHA256 without depending on or selecting CRYPTO while those configs
are subordinate to CRYPTO.

Honor the kconfig menu hierarchy to remove kconfig dependency warnings.

Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support")
Signed-off-by: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 76bff79d6fed..747f56e0c636 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -303,6 +303,7 @@ config IPV6_SEG6_LWTUNNEL
 config IPV6_SEG6_HMAC
 	bool "IPv6: Segment Routing HMAC support"
 	depends on IPV6
+	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
-- 
cgit 


From fe81d9f6182d1160e625894eecb3d7ff0222cac5 Mon Sep 17 00:00:00 2001
From: Henry Ptasinski <hptasinski@google.com>
Date: Sat, 19 Sep 2020 00:12:11 +0000
Subject: net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendant

When calculating ancestor_size with IPv6 enabled, simply using
sizeof(struct ipv6_pinfo) doesn't account for extra bytes needed for
alignment in the struct sctp6_sock. On x86, there aren't any extra
bytes, but on ARM the ipv6_pinfo structure is aligned on an 8-byte
boundary so there were 4 pad bytes that were omitted from the
ancestor_size calculation.  This would lead to corruption of the
pd_lobby pointers, causing an oops when trying to free the sctp
structure on socket close.

Fixes: 636d25d557d1 ("sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant")
Signed-off-by: Henry Ptasinski <hptasinski@google.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 836615f71a7d..53d0a4161df3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9220,13 +9220,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 static inline void sctp_copy_descendant(struct sock *sk_to,
 					const struct sock *sk_from)
 {
-	int ancestor_size = sizeof(struct inet_sock) +
-			    sizeof(struct sctp_sock) -
-			    offsetof(struct sctp_sock, pd_lobby);
-
-	if (sk_from->sk_family == PF_INET6)
-		ancestor_size += sizeof(struct ipv6_pinfo);
+	size_t ancestor_size = sizeof(struct inet_sock);
 
+	ancestor_size += sk_from->sk_prot->obj_size;
+	ancestor_size -= offsetof(struct sctp_sock, pd_lobby);
 	__inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
 }
 
-- 
cgit 


From 91b2c9a0fdb56e3d7eb6bf06a39ad7a50f000916 Mon Sep 17 00:00:00 2001
From: Xu Wang <vulab@iscas.ac.cn>
Date: Mon, 21 Sep 2020 06:38:56 +0000
Subject: ipv6: route: convert comma to semicolon

Replace a comma between expression statements by a semicolon.

Signed-off-by: Xu Wang <vulab@iscas.ac.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5e7e25e2523a..fb075d9545b9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4202,7 +4202,7 @@ static struct fib6_info *rt6_add_route_info(struct net *net,
 		.fc_nlinfo.nl_net = net,
 	};
 
-	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
+	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
 	cfg.fc_dst = *prefix;
 	cfg.fc_gateway = *gwaddr;
 
-- 
cgit 


From 99f62a746066fa436aa15d4606a538569540db08 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 22 Sep 2020 01:07:09 +0300
Subject: net: bridge: br_vlan_get_pvid_rcu() should dereference the VLAN group
 under RCU

When calling the RCU brother of br_vlan_get_pvid(), lockdep warns:

=============================
WARNING: suspicious RCU usage
5.9.0-rc3-01631-g13c17acb8e38-dirty #814 Not tainted
-----------------------------
net/bridge/br_private.h:1054 suspicious rcu_dereference_protected() usage!

Call trace:
 lockdep_rcu_suspicious+0xd4/0xf8
 __br_vlan_get_pvid+0xc0/0x100
 br_vlan_get_pvid_rcu+0x78/0x108

The warning is because br_vlan_get_pvid_rcu() calls nbp_vlan_group()
which calls rtnl_dereference() instead of rcu_dereference(). In turn,
rtnl_dereference() calls rcu_dereference_protected() which assumes
operation under an RCU write-side critical section, which obviously is
not the case here. So, when the incorrect primitive is used to access
the RCU-protected VLAN group pointer, READ_ONCE() is not used, which may
cause various unexpected problems.

I'm sad to say that br_vlan_get_pvid() and br_vlan_get_pvid_rcu() cannot
share the same implementation. So fix the bug by splitting the 2
functions, and making br_vlan_get_pvid_rcu() retrieve the VLAN groups
under proper locking annotations.

Fixes: 7582f5b70f9a ("bridge: add br_vlan_get_pvid_rcu()")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index f9092c71225f..61c94cefa843 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1288,11 +1288,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
 	}
 }
 
-static int __br_vlan_get_pvid(const struct net_device *dev,
-			      struct net_bridge_port *p, u16 *p_pvid)
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
 {
 	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p;
 
+	ASSERT_RTNL();
+	p = br_port_get_check_rtnl(dev);
 	if (p)
 		vg = nbp_vlan_group(p);
 	else if (netif_is_bridge_master(dev))
@@ -1303,18 +1305,23 @@ static int __br_vlan_get_pvid(const struct net_device *dev,
 	*p_pvid = br_get_pvid(vg);
 	return 0;
 }
-
-int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
-{
-	ASSERT_RTNL();
-
-	return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid);
-}
 EXPORT_SYMBOL_GPL(br_vlan_get_pvid);
 
 int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
 {
-	return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid);
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p;
+
+	p = br_port_get_check_rcu(dev);
+	if (p)
+		vg = nbp_vlan_group_rcu(p);
+	else if (netif_is_bridge_master(dev))
+		vg = br_vlan_group_rcu(netdev_priv(dev));
+	else
+		return -EINVAL;
+
+	*p_pvid = br_get_pvid(vg);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
 
-- 
cgit 


From d5e4d0a5e692a942f0c212e37dc6aeac47ecbdea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Sep 2020 07:27:20 -0700
Subject: inet_diag: validate INET_DIAG_REQ_PROTOCOL attribute

User space could send an invalid INET_DIAG_REQ_PROTOCOL attribute
as caught by syzbot.

BUG: KMSAN: uninit-value in inet_diag_lock_handler net/ipv4/inet_diag.c:55 [inline]
BUG: KMSAN: uninit-value in __inet_diag_dump+0x58c/0x720 net/ipv4/inet_diag.c:1147
CPU: 0 PID: 8505 Comm: syz-executor174 Not tainted 5.9.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x21c/0x280 lib/dump_stack.c:118
 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:122
 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:219
 inet_diag_lock_handler net/ipv4/inet_diag.c:55 [inline]
 __inet_diag_dump+0x58c/0x720 net/ipv4/inet_diag.c:1147
 inet_diag_dump_compat+0x2a5/0x380 net/ipv4/inet_diag.c:1254
 netlink_dump+0xb73/0x1cb0 net/netlink/af_netlink.c:2246
 __netlink_dump_start+0xcf2/0xea0 net/netlink/af_netlink.c:2354
 netlink_dump_start include/linux/netlink.h:246 [inline]
 inet_diag_rcv_msg_compat+0x5da/0x6c0 net/ipv4/inet_diag.c:1288
 sock_diag_rcv_msg+0x24f/0x620 net/core/sock_diag.c:256
 netlink_rcv_skb+0x6d7/0x7e0 net/netlink/af_netlink.c:2470
 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:275
 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
 netlink_unicast+0x11c8/0x1490 net/netlink/af_netlink.c:1330
 netlink_sendmsg+0x173a/0x1840 net/netlink/af_netlink.c:1919
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg net/socket.c:671 [inline]
 ____sys_sendmsg+0xc82/0x1240 net/socket.c:2353
 ___sys_sendmsg net/socket.c:2407 [inline]
 __sys_sendmsg+0x6d1/0x820 net/socket.c:2440
 __do_sys_sendmsg net/socket.c:2449 [inline]
 __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447
 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447
 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x441389
Code: e8 fc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 1b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fff3b02ce98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441389
RDX: 0000000000000000 RSI: 0000000020001500 RDI: 0000000000000003
RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000402130
R13: 00000000004021c0 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:143 [inline]
 kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:126
 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:80
 slab_alloc_node mm/slub.c:2907 [inline]
 __kmalloc_node_track_caller+0x9aa/0x12f0 mm/slub.c:4511
 __kmalloc_reserve net/core/skbuff.c:142 [inline]
 __alloc_skb+0x35f/0xb30 net/core/skbuff.c:210
 alloc_skb include/linux/skbuff.h:1094 [inline]
 netlink_alloc_large_skb net/netlink/af_netlink.c:1176 [inline]
 netlink_sendmsg+0xdb9/0x1840 net/netlink/af_netlink.c:1894
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg net/socket.c:671 [inline]
 ____sys_sendmsg+0xc82/0x1240 net/socket.c:2353
 ___sys_sendmsg net/socket.c:2407 [inline]
 __sys_sendmsg+0x6d1/0x820 net/socket.c:2440
 __do_sys_sendmsg net/socket.c:2449 [inline]
 __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447
 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447
 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 3f935c75eb52 ("inet_diag: support for wider protocol numbers")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Christoph Paasch <cpaasch@apple.com>
Cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4a98dd736270..f1bd95f243b3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -186,8 +186,8 @@ errout:
 }
 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
 
-static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen,
-				  struct nlattr **req_nlas)
+static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen,
+				 struct nlattr **req_nlas)
 {
 	struct nlattr *nla;
 	int remaining;
@@ -195,9 +195,13 @@ static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen,
 	nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) {
 		int type = nla_type(nla);
 
+		if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32))
+			return -EINVAL;
+
 		if (type < __INET_DIAG_REQ_MAX)
 			req_nlas[type] = nla;
 	}
+	return 0;
 }
 
 static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req,
@@ -574,7 +578,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
 	int err, protocol;
 
 	memset(&dump_data, 0, sizeof(dump_data));
-	inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas);
+	err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas);
+	if (err)
+		return err;
+
 	protocol = inet_diag_get_protocol(req, &dump_data);
 
 	handler = inet_diag_lock_handler(protocol);
@@ -1180,8 +1187,11 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
 	if (!cb_data)
 		return -ENOMEM;
 
-	inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas);
-
+	err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas);
+	if (err) {
+		kfree(cb_data);
+		return err;
+	}
 	nla = cb_data->inet_diag_nla_bc;
 	if (nla) {
 		err = inet_diag_bc_audit(nla, skb);
-- 
cgit