From 0e74aa1d79a5bbc663e03a2804399cae418a0321 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 10 Nov 2017 14:14:06 +1100
Subject: xfrm: Copy policy family in clone_policy

The syzbot found an ancient bug in the IPsec code.  When we cloned
a socket policy (for example, for a child TCP socket derived from a
listening socket), we did not copy the family field.  This results
in a live policy with a zero family field.  This triggers a BUG_ON
check in the af_key code when the cloned policy is retrieved.

This patch fixes it by copying the family field over.

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6eb228a70131..2a6093840e7e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1306,6 +1306,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
 		newp->xfrm_nr = old->xfrm_nr;
 		newp->index = old->index;
 		newp->type = old->type;
+		newp->family = old->family;
 		memcpy(newp->xfrm_vec, old->xfrm_vec,
 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 		spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-- 
cgit 


From 94802151894d482e82c324edf2c658f8e6b96508 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 15 Nov 2017 06:40:57 +0100
Subject: Revert "xfrm: Fix stack-out-of-bounds read in xfrm_state_find."

This reverts commit c9f3f813d462c72dbe412cee6a5cbacf13c4ad5e.

This commit breaks transport mode when the policy template
has widlcard addresses configured, so revert it.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2a6093840e7e..6bc16bb61b55 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1362,29 +1362,36 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 	struct net *net = xp_net(policy);
 	int nx;
 	int i, error;
+	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
 	xfrm_address_t tmp;
 
 	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
 		struct xfrm_state *x;
-		xfrm_address_t *local;
-		xfrm_address_t *remote;
+		xfrm_address_t *remote = daddr;
+		xfrm_address_t *local  = saddr;
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
-		remote = &tmpl->id.daddr;
-		local = &tmpl->saddr;
-		if (xfrm_addr_any(local, tmpl->encap_family)) {
-			error = xfrm_get_saddr(net, fl->flowi_oif,
-					       &tmp, remote,
-					       tmpl->encap_family, 0);
-			if (error)
-				goto fail;
-			local = &tmp;
+		if (tmpl->mode == XFRM_MODE_TUNNEL ||
+		    tmpl->mode == XFRM_MODE_BEET) {
+			remote = &tmpl->id.daddr;
+			local = &tmpl->saddr;
+			if (xfrm_addr_any(local, tmpl->encap_family)) {
+				error = xfrm_get_saddr(net, fl->flowi_oif,
+						       &tmp, remote,
+						       tmpl->encap_family, 0);
+				if (error)
+					goto fail;
+				local = &tmp;
+			}
 		}
 
 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
 
 		if (x && x->km.state == XFRM_STATE_VALID) {
 			xfrm[nx++] = x;
+			daddr = remote;
+			saddr = local;
 			continue;
 		}
 		if (x) {
-- 
cgit 


From ca3af4dd28cff4e7216e213ba3b671fbf9f84758 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 15 Nov 2017 16:55:54 +0800
Subject: sctp: do not free asoc when it is already dead in sctp_sendmsg

Now in sctp_sendmsg sctp_wait_for_sndbuf could schedule out without
holding sock sk. It means the current asoc can be freed elsewhere,
like when receiving an abort packet.

If the asoc is just created in sctp_sendmsg and sctp_wait_for_sndbuf
returns err, the asoc will be freed again due to new_asoc is not nil.
An use-after-free issue would be triggered by this.

This patch is to fix it by setting new_asoc with nil if the asoc is
already dead when cpu schedules back, so that it will not be freed
again in sctp_sendmsg.

v1->v2:
  set new_asoc as nil in sctp_sendmsg instead of sctp_wait_for_sndbuf.

Suggested-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b029757bea03..fec8de73a06f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1971,8 +1971,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-		if (err)
+		if (err) {
+			if (err == -ESRCH) {
+				/* asoc is already dead. */
+				new_asoc = NULL;
+				err = -EPIPE;
+			}
 			goto out_free;
+		}
 	}
 
 	/* If an address is passed with the sendto/sendmsg call, it is used
@@ -8006,10 +8012,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 	for (;;) {
 		prepare_to_wait_exclusive(&asoc->wait, &wait,
 					  TASK_INTERRUPTIBLE);
+		if (asoc->base.dead)
+			goto do_dead;
 		if (!*timeo_p)
 			goto do_nonblock;
-		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
-		    asoc->base.dead)
+		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
 			goto do_error;
 		if (signal_pending(current))
 			goto do_interrupted;
@@ -8034,6 +8041,10 @@ out:
 
 	return err;
 
+do_dead:
+	err = -ESRCH;
+	goto out;
+
 do_error:
 	err = -EPIPE;
 	goto out;
-- 
cgit 


From cea0cc80a6777beb6eb643d4ad53690e1ad1d4ff Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 15 Nov 2017 16:57:26 +0800
Subject: sctp: use the right sk after waking up from wait_buf sleep

Commit dfcb9f4f99f1 ("sctp: deny peeloff operation on asocs with threads
sleeping on it") fixed the race between peeloff and wait sndbuf by
checking waitqueue_active(&asoc->wait) in sctp_do_peeloff().

But it actually doesn't work, as even if waitqueue_active returns false
the waiting sndbuf thread may still not yet hold sk lock. After asoc is
peeled off, sk is not asoc->base.sk any more, then to hold the old sk
lock couldn't make assoc safe to access.

This patch is to fix this by changing to hold the new sk lock if sk is
not asoc->base.sk, meanwhile, also set the sk in sctp_sendmsg with the
new sk.

With this fix, there is no more race between peeloff and waitbuf, the
check 'waitqueue_active' in sctp_do_peeloff can be removed.

Thanks Marcelo and Neil for making this clear.

v1->v2:
  fix it by changing to lock the new sock instead of adding a flag in asoc.

Suggested-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fec8de73a06f..4c0a77292792 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -84,8 +84,8 @@
 /* Forward declarations for internal helper functions. */
 static int sctp_writeable(struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
-				size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+				size_t msg_len, struct sock **orig_sk);
 static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
 static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
 static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1970,7 +1970,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
-		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+		/* sk can be changed by peel off when waiting for buf. */
+		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
 		if (err) {
 			if (err == -ESRCH) {
 				/* asoc is already dead. */
@@ -5021,12 +5022,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	if (!asoc)
 		return -EINVAL;
 
-	/* If there is a thread waiting on more sndbuf space for
-	 * sending on this asoc, it cannot be peeled.
-	 */
-	if (waitqueue_active(&asoc->wait))
-		return -EBUSY;
-
 	/* An association cannot be branched off from an already peeled-off
 	 * socket, nor is this supported for tcp style sockets.
 	 */
@@ -7995,7 +7990,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
 
 /* Helper function to wait for space in the sndbuf.  */
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
-				size_t msg_len)
+				size_t msg_len, struct sock **orig_sk)
 {
 	struct sock *sk = asoc->base.sk;
 	int err = 0;
@@ -8029,11 +8024,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 		release_sock(sk);
 		current_timeo = schedule_timeout(current_timeo);
 		lock_sock(sk);
+		if (sk != asoc->base.sk) {
+			release_sock(sk);
+			sk = asoc->base.sk;
+			lock_sock(sk);
+		}
 
 		*timeo_p = current_timeo;
 	}
 
 out:
+	*orig_sk = sk;
 	finish_wait(&asoc->wait, &wait);
 
 	/* Release the association's refcnt.  */
-- 
cgit 


From 423852f89034492cc40920c00908f6de7d2dbe4f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 15 Nov 2017 17:00:11 +0800
Subject: sctp: check stream reset info len before making reconf chunk

Now when resetting stream, if both in and out flags are set, the info
len can reach:
  sizeof(struct sctp_strreset_outreq) + SCTP_MAX_STREAM(65535) +
  sizeof(struct sctp_strreset_inreq)  + SCTP_MAX_STREAM(65535)
even without duplicated stream no, this value is far greater than the
chunk's max size.

_sctp_make_chunk doesn't do any check for this, which would cause the
skb it allocs is huge, syzbot even reported a crash due to this.

This patch is to check stream reset info len before making reconf
chunk and return EINVAL if the len exceeds chunk's capacity.

Thanks Marcelo and Neil for making this clear.

v1->v2:
  - move the check into sctp_send_reset_streams instead.

Fixes: cc16f00f6529 ("sctp: add support for generating stream reconf ssn reset request chunk")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c |  2 +-
 net/sctp/stream.c        | 32 ++++++++++++++++++++++++--------
 2 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 514465b03829..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3594,8 +3594,8 @@ struct sctp_chunk *sctp_make_strreset_req(
 					__u16 stream_num, __be16 *stream_list,
 					bool out, bool in)
 {
+	__u16 stream_len = stream_num * sizeof(__u16);
 	struct sctp_strreset_outreq outreq;
-	__u16 stream_len = stream_num * 2;
 	struct sctp_strreset_inreq inreq;
 	struct sctp_chunk *retval;
 	__u16 outlen, inlen;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index b8c8cabb1a58..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -282,15 +282,31 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
 
 	str_nums = params->srs_number_streams;
 	str_list = params->srs_stream_list;
-	if (out && str_nums)
-		for (i = 0; i < str_nums; i++)
-			if (str_list[i] >= stream->outcnt)
-				goto out;
+	if (str_nums) {
+		int param_len = 0;
 
-	if (in && str_nums)
-		for (i = 0; i < str_nums; i++)
-			if (str_list[i] >= stream->incnt)
-				goto out;
+		if (out) {
+			for (i = 0; i < str_nums; i++)
+				if (str_list[i] >= stream->outcnt)
+					goto out;
+
+			param_len = str_nums * sizeof(__u16) +
+				    sizeof(struct sctp_strreset_outreq);
+		}
+
+		if (in) {
+			for (i = 0; i < str_nums; i++)
+				if (str_list[i] >= stream->incnt)
+					goto out;
+
+			param_len += str_nums * sizeof(__u16) +
+				     sizeof(struct sctp_strreset_inreq);
+		}
+
+		if (param_len > SCTP_MAX_CHUNK_LEN -
+				sizeof(struct sctp_reconf_chunk))
+			goto out;
+	}
 
 	nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
 	if (!nstr_list) {
-- 
cgit 


From 0a833c29d89656025443cb9f0ebff7052dd95ce0 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Wed, 15 Nov 2017 13:09:32 +0100
Subject: genetlink: fix genlmsg_nlhdr()

According to the description, first argument of genlmsg_nlhdr() points to
what genlmsg_put() returns, i.e. beginning of user header. Therefore we
should only subtract size of genetlink header and netlink message header,
not user header.

This also means we don't need to pass the pointer to genetlink family and
the same is true for genl_dump_check_consistent() which is the only caller
of genlmsg_nlhdr(). (Note that at the moment, these functions are only
used for families which do not have user header so that they are not
affected.)

Fixes: 670dc2833d14 ("netlink: advertise incomplete dumps")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/netlink.c      | 6 +++---
 net/wireless/nl80211.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f6359c277212..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 	if (!hdr)
 		return -EMSGSIZE;
 
-	genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+	genl_dump_check_consistent(cb, hdr);
 
 	if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
 	    nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 		return -EMSGSIZE;
 
 	if (cb)
-		genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+		genl_dump_check_consistent(cb, hdr);
 
 	if (nfc_genl_setup_device_added(dev, msg))
 		goto nla_put_failure;
@@ -1356,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
 			goto nla_put_failure;
 
 		if (cb)
-			genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+			genl_dump_check_consistent(cb, hdr);
 
 		if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
 		    nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bb16f1ec766e..a0e1951227fa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6291,7 +6291,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	if (!hdr)
 		return -1;
 
-	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+	genl_dump_check_consistent(cb, hdr);
 
 	if (nl80211_put_regdom(regdom, msg))
 		goto nla_put_failure;
@@ -7722,7 +7722,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 	if (!hdr)
 		return -1;
 
-	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+	genl_dump_check_consistent(cb, hdr);
 
 	if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
 		goto nla_put_failure;
-- 
cgit 


From 8252fceac01570836f0cb9e922f56b4c0b1011df Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 15 Nov 2017 18:28:21 +0100
Subject: netfilter: add ifdef around ctnetlink_proto_size

This function is no longer marked 'inline', so we now get a warning
when it is unused:

net/netfilter/nf_conntrack_netlink.c:536:15: error: 'ctnetlink_proto_size' defined but not used [-Werror=unused-function]

We could mark it inline again, mark it __maybe_unused, or add an #ifdef
around the definition. I'm picking the third approach here since that
seems to be what the rest of the file has.

Fixes: 5caaed151a68 ("netfilter: conntrack: don't cache nlattr_tuple_size result in nla_size")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6e0adfefb9ed..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -533,6 +533,7 @@ nla_put_failure:
 	return -1;
 }
 
+#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
 static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
 	const struct nf_conntrack_l3proto *l3proto;
@@ -552,6 +553,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 
 	return len + len4;
 }
+#endif
 
 static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
 {
-- 
cgit 


From d618d09a68e4eed7a435beb2e355250f6f40664a Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Wed, 15 Nov 2017 21:23:56 +0100
Subject: tipc: enforce valid ratio between skb truesize and contents

The socket level flow control is based on the assumption that incoming
buffers meet the condition (skb->truesize / roundup(skb->len) <= 4),
where the latter value is rounded off upwards to the nearest 1k number.
This does empirically hold true for the device drivers we know, but we
cannot trust that it will always be so, e.g., in a system with jumbo
frames and very small packets.

We now introduce a check for this condition at packet arrival, and if
we find it to be false, we copy the packet to a new, smaller buffer,
where the condition will be true. We expect this to affect only a small
fraction of all incoming packets, if at all.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c  | 24 +++++++++++++++++-------
 net/tipc/msg.h  |  7 ++++++-
 net/tipc/node.c |  2 +-
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 1649d456e22d..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 
 	if (fragid == LAST_FRAGMENT) {
 		TIPC_SKB_CB(head)->validated = false;
-		if (unlikely(!tipc_msg_validate(head)))
+		if (unlikely(!tipc_msg_validate(&head)))
 			goto err;
 		*buf = head;
 		TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
  * TIPC will ignore the excess, under the assumption that it is optional info
  * introduced by a later release of the protocol.
  */
-bool tipc_msg_validate(struct sk_buff *skb)
+bool tipc_msg_validate(struct sk_buff **_skb)
 {
-	struct tipc_msg *msg;
+	struct sk_buff *skb = *_skb;
+	struct tipc_msg *hdr;
 	int msz, hsz;
 
+	/* Ensure that flow control ratio condition is satisfied */
+	if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
+		skb = skb_copy(skb, GFP_ATOMIC);
+		if (!skb)
+			return false;
+		kfree_skb(*_skb);
+		*_skb = skb;
+	}
+
 	if (unlikely(TIPC_SKB_CB(skb)->validated))
 		return true;
 	if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, hsz)))
 		return false;
 
-	msg = buf_msg(skb);
-	if (unlikely(msg_version(msg) != TIPC_VERSION))
+	hdr = buf_msg(skb);
+	if (unlikely(msg_version(hdr) != TIPC_VERSION))
 		return false;
 
-	msz = msg_size(msg);
+	msz = msg_size(hdr);
 	if (unlikely(msz < hsz))
 		return false;
 	if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
 	skb_pull(*iskb, offset);
 	imsz = msg_size(buf_msg(*iskb));
 	skb_trim(*iskb, imsz);
-	if (unlikely(!tipc_msg_validate(*iskb)))
+	if (unlikely(!tipc_msg_validate(iskb)))
 		goto none;
 	*pos += align(imsz);
 	return true;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index bf8f57ccc70c..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -926,7 +926,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
 }
 
 struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
-bool tipc_msg_validate(struct sk_buff *skb);
+bool tipc_msg_validate(struct sk_buff **_skb);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
 void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
 		     struct sk_buff_head *xmitq);
@@ -954,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
 	return msg_seqno(buf_msg(skb));
 }
 
+static inline int buf_roundup_len(struct sk_buff *skb)
+{
+	return (skb->len / 1024 + 1) * 1024;
+}
+
 /* tipc_skb_peek(): peek and reserve first buffer in list
  * @list: list to be peeked in
  * Returns pointer to first buffer in list, if any
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 009a81631280..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1539,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 	__skb_queue_head_init(&xmitq);
 
 	/* Ensure message is well-formed before touching the header */
-	if (unlikely(!tipc_msg_validate(skb)))
+	if (unlikely(!tipc_msg_validate(&skb)))
 		goto discard;
 	hdr = buf_msg(skb);
 	usr = msg_user(hdr);
-- 
cgit 


From 7c8a61d9ee1df0fb4747879fa67a99614eb62fec Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 15 Nov 2017 22:17:48 -0600
Subject: net/sctp: Always set scope_id in sctp_inet6_skb_msgname

Alexandar Potapenko while testing the kernel with KMSAN and syzkaller
discovered that in some configurations sctp would leak 4 bytes of
kernel stack.

Working with his reproducer I discovered that those 4 bytes that
are leaked is the scope id of an ipv6 address returned by recvmsg.

With a little code inspection and a shrewd guess I discovered that
sctp_inet6_skb_msgname only initializes the scope_id field for link
local ipv6 addresses to the interface index the link local address
pertains to instead of initializing the scope_id field for all ipv6
addresses.

That is almost reasonable as scope_id's are meaniningful only for link
local addresses.  Set the scope_id in all other cases to 0 which is
not a valid interface index to make it clear there is nothing useful
in the scope_id field.

There should be no danger of breaking userspace as the stack leak
guaranteed that previously meaningless random data was being returned.

Fixes: 372f525b495c ("SCTP:  Resync with LKSCTP tree.")
History-tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Reported-by: Alexander Potapenko <glider@google.com>
Tested-by: Alexander Potapenko <glider@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a6dfa86c0201..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -807,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 		addr->v6.sin6_flowinfo = 0;
 		addr->v6.sin6_port = sh->source;
 		addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
-		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
 			addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
-		}
+		else
+			addr->v6.sin6_scope_id = 0;
 	}
 
 	*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
-- 
cgit 


From 17e48577752a61197a7e5f4922db45b3a5af5068 Mon Sep 17 00:00:00 2001
From: Tim Hansen <devtimhansen@gmail.com>
Date: Thu, 16 Nov 2017 12:03:34 -0500
Subject: net/netlabel: Add list_next_rcu() in rcu_dereference().

Add list_next_rcu() for fetching next list in rcu_deference safely.

Found with sparse in linux-next tree on tag next-20171116.

Signed-off-by: Tim Hansen <devtimhansen@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_addrlist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
 	struct list_head *i = s;
 	struct netlbl_af4list *n = __af4list_entry(s);
 	while (i != h && !n->valid) {
-		i = rcu_dereference(i->next);
+		i = rcu_dereference(list_next_rcu(i));
 		n = __af4list_entry(i);
 	}
 	return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
 	struct list_head *i = s;
 	struct netlbl_af6list *n = __af6list_entry(s);
 	while (i != h && !n->valid) {
-		i = rcu_dereference(i->next);
+		i = rcu_dereference(list_next_rcu(i));
 		n = __af6list_entry(i);
 	}
 	return n;
-- 
cgit 


From ecca8f88da5c4260cc2bccfefd2a24976704c366 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 17 Nov 2017 14:11:11 +0800
Subject: sctp: set frag_point in sctp_setsockopt_maxseg correctly

Now in sctp_setsockopt_maxseg user_frag or frag_point can be set with
val >= 8 and val <= SCTP_MAX_CHUNK_LEN. But both checks are incorrect.

val >= 8 means frag_point can even be less than SCTP_DEFAULT_MINSEGMENT.
Then in sctp_datamsg_from_user(), when it's value is greater than cookie
echo len and trying to bundle with cookie echo chunk, the first_len will
overflow.

The worse case is when it's value is equal as cookie echo len, first_len
becomes 0, it will go into a dead loop for fragment later on. In Hangbin
syzkaller testing env, oom was even triggered due to consecutive memory
allocation in that loop.

Besides, SCTP_MAX_CHUNK_LEN is the max size of the whole chunk, it should
deduct the data header for frag_point or user_frag check.

This patch does a proper check with SCTP_DEFAULT_MINSEGMENT subtracting
the sctphdr and datahdr, SCTP_MAX_CHUNK_LEN subtracting datahdr when
setting frag_point via sockopt. It also improves sctp_setsockopt_maxseg
codes.

Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 4c0a77292792..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3140,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
  */
 static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_assoc_value params;
 	struct sctp_association *asoc;
-	struct sctp_sock *sp = sctp_sk(sk);
 	int val;
 
 	if (optlen == sizeof(int)) {
@@ -3158,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 		if (copy_from_user(&params, optval, optlen))
 			return -EFAULT;
 		val = params.assoc_value;
-	} else
+	} else {
 		return -EINVAL;
+	}
 
-	if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)))
-		return -EINVAL;
+	if (val) {
+		int min_len, max_len;
 
-	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
-		return -EINVAL;
+		min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+		min_len -= sizeof(struct sctphdr) +
+			   sizeof(struct sctp_data_chunk);
+
+		max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
 
+		if (val < min_len || val > max_len)
+			return -EINVAL;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
 	if (asoc) {
 		if (val == 0) {
-			val = asoc->pathmtu;
-			val -= sp->pf->af->net_header_len;
+			val = asoc->pathmtu - sp->pf->af->net_header_len;
 			val -= sizeof(struct sctphdr) +
-					sizeof(struct sctp_data_chunk);
+			       sizeof(struct sctp_data_chunk);
 		}
 		asoc->user_frag = val;
 		asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
 	} else {
+		if (params.assoc_id && sctp_style(sk, UDP))
+			return -EINVAL;
 		sp->user_frag = val;
 	}
 
-- 
cgit 


From e39d5246111399dbc6e11cd39fd8580191b86c47 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 17 Nov 2017 14:27:06 +0800
Subject: route: update fnhe_expires for redirect when the fnhe exists

Now when creating fnhe for redirect, it sets fnhe_expires for this
new route cache. But when updating the exist one, it doesn't do it.
It will cause this fnhe never to be expired.

Paolo already noticed it before, in Jianlin's test case, it became
even worse:

When ip route flush cache, the old fnhe is not to be removed, but
only clean it's members. When redirect comes again, this fnhe will
be found and updated, but never be expired due to fnhe_expires not
being set.

So fix it by simply updating fnhe_expires even it's for redirect.

Fixes: aee06da6726d ("ipv4: use seqlock for nh_exceptions")
Reported-by: Jianlin Shi <jishi@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3b427757b1f8..11cf2fe43308 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -678,10 +678,9 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	if (fnhe) {
 		if (gw)
 			fnhe->fnhe_gw = gw;
-		if (pmtu) {
+		if (pmtu)
 			fnhe->fnhe_pmtu = pmtu;
-			fnhe->fnhe_expires = max(1UL, expires);
-		}
+		fnhe->fnhe_expires = max(1UL, expires);
 		/* Update all cached dsts too */
 		rt = rcu_dereference(fnhe->fnhe_rth_input);
 		if (rt)
-- 
cgit 


From cebe84c6190d741045a322f5343f717139993c08 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 17 Nov 2017 14:27:18 +0800
Subject: route: also update fnhe_genid when updating a route cache

Now when ip route flush cache and it turn out all fnhe_genid != genid.
If a redirect/pmtu icmp packet comes and the old fnhe is found and all
it's members but fnhe_genid will be updated.

Then next time when it looks up route and tries to rebind this fnhe to
the new dst, the fnhe will be flushed due to fnhe_genid != genid. It
causes this redirect/pmtu icmp packet acutally not to be applied.

This patch is to also reset fnhe_genid when updating a route cache.

Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions")
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11cf2fe43308..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rt;
+	u32 genid, hval;
 	unsigned int i;
 	int depth;
-	u32 hval = fnhe_hashfun(daddr);
+
+	genid = fnhe_genid(dev_net(nh->nh_dev));
+	hval = fnhe_hashfun(daddr);
 
 	spin_lock_bh(&fnhe_lock);
 
@@ -676,6 +679,8 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	}
 
 	if (fnhe) {
+		if (fnhe->fnhe_genid != genid)
+			fnhe->fnhe_genid = genid;
 		if (gw)
 			fnhe->fnhe_gw = gw;
 		if (pmtu)
@@ -699,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_next = hash->chain;
 			rcu_assign_pointer(hash->chain, fnhe);
 		}
-		fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+		fnhe->fnhe_genid = genid;
 		fnhe->fnhe_daddr = daddr;
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
-- 
cgit