From c2d0f48a13e53b4747704c9e692f5e765e52041a Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 30 Nov 2016 21:47:09 +0100
Subject: batman-adv: Check for alloc errors when preparing TT local data

batadv_tt_prepare_tvlv_local_data can fail to allocate the memory for the
new TVLV block. The caller is informed about this problem with the returned
length of 0. Not checking this value results in an invalid memory access
when either tt_data or tt_change is accessed.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/translation-table.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7f663092f6de..0dc85eb1cb7a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -3282,7 +3282,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
 							     &tvlv_tt_data,
 							     &tt_change,
 							     &tt_len);
-		if (!tt_len)
+		if (!tt_len || !tvlv_len)
 			goto unlock;
 
 		/* Copy the last orig_node's OGM buffer */
@@ -3300,7 +3300,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
 							     &tvlv_tt_data,
 							     &tt_change,
 							     &tt_len);
-		if (!tt_len)
+		if (!tt_len || !tvlv_len)
 			goto out;
 
 		/* fill the rest of the tvlv with the real TT entries */
-- 
cgit 


From c66ebf2db555c6ed705044eabd2b37dcd546f68b Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sat, 3 Dec 2016 21:49:08 +0800
Subject: net: dcb: set error code on failures

In function dcbnl_cee_fill(), returns the value of variable err on
errors. However, on some error paths (e.g. nla put fails), its value may
be 0. It may be better to explicitly set a negative errno to variable
err before returning.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188881

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 4f6c1862dfd2..3202d75329b5 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1353,6 +1353,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
 dcb_unlock:
 	spin_unlock_bh(&dcb_lock);
 nla_put_failure:
+	err = -EMSGSIZE;
 	return err;
 }
 
-- 
cgit 


From 1a239173cccff726b60ac6a9c79ae4a1e26cfa49 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 1 Dec 2016 07:27:52 -0500
Subject: ipv4: Drop leaf from suffix pull/push functions

It wasn't necessary to pass a leaf in when doing the suffix updates so just
drop it.  Instead just pass the suffix and work with that.

Since we dropped the leaf there is no need to include that in the name so
the names are updated to node_push_suffix and node_pull_suffix.

Finally I noticed that the logic for pulling the suffix length back
actually had some issues.  Specifically it would stop prematurely if there
was a longer suffix, but it was not as long as the original suffix.  I
updated the code to address that in node_pull_suffix.

Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length")
Suggested-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Reviewed-by: Robert Shearman <rshearma@brocade.com>
Tested-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 026f309c51e9..eec90d72dd52 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -930,22 +930,24 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn)
 	return tp;
 }
 
-static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l)
+static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
 {
-	while ((tp->slen > tp->pos) && (tp->slen > l->slen)) {
-		if (update_suffix(tp) > l->slen)
+	unsigned char node_slen = tn->slen;
+
+	while ((node_slen > tn->pos) && (node_slen > slen)) {
+		slen = update_suffix(tn);
+		if (node_slen == slen)
 			break;
-		tp = node_parent(tp);
+
+		tn = node_parent(tn);
+		node_slen = tn->slen;
 	}
 }
 
-static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l)
+static void node_push_suffix(struct key_vector *tn, unsigned char slen)
 {
-	/* if this is a new leaf then tn will be NULL and we can sort
-	 * out parent suffix lengths as a part of trie_rebalance
-	 */
-	while (tn->slen < l->slen) {
-		tn->slen = l->slen;
+	while (tn->slen < slen) {
+		tn->slen = slen;
 		tn = node_parent(tn);
 	}
 }
@@ -1107,7 +1109,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
 	/* if we added to the tail node then we need to update slen */
 	if (l->slen < new->fa_slen) {
 		l->slen = new->fa_slen;
-		leaf_push_suffix(tp, l);
+		node_push_suffix(tp, new->fa_slen);
 	}
 
 	return 0;
@@ -1511,7 +1513,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
 
 	/* update the trie with the latest suffix length */
 	l->slen = fa->fa_slen;
-	leaf_pull_suffix(tp, l);
+	node_pull_suffix(tp, fa->fa_slen);
 }
 
 /* Caller must hold RTNL. */
-- 
cgit 


From a52ca62c4a6771028da9c1de934cdbcd93d54bb4 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 1 Dec 2016 07:27:57 -0500
Subject: ipv4: Drop suffix update from resize code

It has been reported that update_suffix can be expensive when it is called
on a large node in which most of the suffix lengths are the same.  The time
required to add 200K entries had increased from around 3 seconds to almost
49 seconds.

In order to address this we need to move the code for updating the suffix
out of resize and instead just have it handled in the cases where we are
pushing a node that increases the suffix length, or will decrease the
suffix length.

Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length")
Reported-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Reviewed-by: Robert Shearman <rshearma@brocade.com>
Tested-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index eec90d72dd52..e3665bf7a7f3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -719,6 +719,13 @@ static unsigned char update_suffix(struct key_vector *tn)
 {
 	unsigned char slen = tn->pos;
 	unsigned long stride, i;
+	unsigned char slen_max;
+
+	/* only vector 0 can have a suffix length greater than or equal to
+	 * tn->pos + tn->bits, the second highest node will have a suffix
+	 * length at most of tn->pos + tn->bits - 1
+	 */
+	slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
 
 	/* search though the list of children looking for nodes that might
 	 * have a suffix greater than the one we currently have.  This is
@@ -736,12 +743,8 @@ static unsigned char update_suffix(struct key_vector *tn)
 		slen = n->slen;
 		i &= ~(stride - 1);
 
-		/* if slen covers all but the last bit we can stop here
-		 * there will be nothing longer than that since only node
-		 * 0 and 1 << (bits - 1) could have that as their suffix
-		 * length.
-		 */
-		if ((slen + 1) >= (tn->pos + tn->bits))
+		/* stop searching if we have hit the maximum possible value */
+		if (slen >= slen_max)
 			break;
 	}
 
@@ -913,21 +916,7 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn)
 		return collapse(t, tn);
 
 	/* update parent in case halve failed */
-	tp = node_parent(tn);
-
-	/* Return if at least one deflate was run */
-	if (max_work != MAX_WORK)
-		return tp;
-
-	/* push the suffix length to the parent node */
-	if (tn->slen > tn->pos) {
-		unsigned char slen = update_suffix(tn);
-
-		if (slen > tp->slen)
-			tp->slen = slen;
-	}
-
-	return tp;
+	return node_parent(tn);
 }
 
 static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
@@ -1068,6 +1057,7 @@ static int fib_insert_node(struct trie *t, struct key_vector *tp,
 	}
 
 	/* Case 3: n is NULL, and will just insert a new leaf */
+	node_push_suffix(tp, new->fa_slen);
 	NODE_INIT_PARENT(l, tp);
 	put_child_root(tp, key, l);
 	trie_rebalance(t, tp);
@@ -1501,6 +1491,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
 	 * out parent suffix lengths as a part of trie_rebalance
 	 */
 	if (hlist_empty(&l->leaf)) {
+		if (tp->slen == l->slen)
+			node_pull_suffix(tp, tp->pos);
 		put_child_root(tp, l->key, NULL);
 		node_free(l);
 		trie_rebalance(t, tp);
@@ -1785,6 +1777,10 @@ void fib_table_flush_external(struct fib_table *tb)
 			if (IS_TRIE(pn))
 				break;
 
+			/* update the suffix to address pulled leaves */
+			if (pn->slen > pn->pos)
+				update_suffix(pn);
+
 			/* resize completed node */
 			pn = resize(t, pn);
 			cindex = get_index(pkey, pn);
@@ -1851,6 +1847,10 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
 			if (IS_TRIE(pn))
 				break;
 
+			/* update the suffix to address pulled leaves */
+			if (pn->slen > pn->pos)
+				update_suffix(pn);
+
 			/* resize completed node */
 			pn = resize(t, pn);
 			cindex = get_index(pkey, pn);
-- 
cgit 


From 14dd3e1b970feb125e4f453bc3b0569db5b2069b Mon Sep 17 00:00:00 2001
From: Suraj Deshmukh <surajssd009005@gmail.com>
Date: Sat, 3 Dec 2016 07:59:26 +0000
Subject: net: af_mpls.c add space before open parenthesis

Adding space after switch keyword before open
parenthesis for readability purpose.

This patch fixes the checkpatch.pl warning:
space required before the open parenthesis '('

Signed-off-by: Suraj Deshmukh <surajssd009005@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0e4334cbde17..15fe97644ffe 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1252,7 +1252,7 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 		if (!nla)
 			continue;
 
-		switch(index) {
+		switch (index) {
 		case RTA_OIF:
 			cfg->rc_ifindex = nla_get_u32(nla);
 			break;
-- 
cgit 


From b59589635ff01cc25270360709eeeb5c45c6abb9 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sat, 3 Dec 2016 19:33:23 +0800
Subject: net: bridge: set error code on failure

Function br_sysfs_addbr() does not set error code when the call
kobject_create_and_add() returns a NULL pointer. It may be better to
return "-ENOMEM" when kobject_create_and_add() fails.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188781

Signed-off-by: Pan Bian <bianpan2016@163.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_sysfs_br.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index e120307c6e36..f88c4df3f91e 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -898,6 +898,7 @@ int br_sysfs_addbr(struct net_device *dev)
 	if (!br->ifobj) {
 		pr_info("%s: can't add kobject (directory) %s/%s\n",
 			__func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR);
+		err = -ENOMEM;
 		goto out3;
 	}
 	return 0;
-- 
cgit 


From 0eab121ef8750a5c8637d51534d5e9143fb0633f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 5 Dec 2016 10:34:38 -0800
Subject: net: ping: check minimum size on ICMP header length

Prior to commit c0371da6047a ("put iov_iter into msghdr") in v3.19, there
was no check that the iovec contained enough bytes for an ICMP header,
and the read loop would walk across neighboring stack contents. Since the
iov_iter conversion, bad arguments are noticed, but the returned error is
EFAULT. Returning EINVAL is a clearer error and also solves the problem
prior to v3.19.

This was found using trinity with KASAN on v3.18:

BUG: KASAN: stack-out-of-bounds in memcpy_fromiovec+0x60/0x114 at addr ffffffc071077da0
Read of size 8 by task trinity-c2/9623
page:ffffffbe034b9a08 count:0 mapcount:0 mapping:          (null) index:0x0
flags: 0x0()
page dumped because: kasan: bad access detected
CPU: 0 PID: 9623 Comm: trinity-c2 Tainted: G    BU         3.18.0-dirty #15
Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT)
Call trace:
[<ffffffc000209c98>] dump_backtrace+0x0/0x1ac arch/arm64/kernel/traps.c:90
[<ffffffc000209e54>] show_stack+0x10/0x1c arch/arm64/kernel/traps.c:171
[<     inline     >] __dump_stack lib/dump_stack.c:15
[<ffffffc000f18dc4>] dump_stack+0x7c/0xd0 lib/dump_stack.c:50
[<     inline     >] print_address_description mm/kasan/report.c:147
[<     inline     >] kasan_report_error mm/kasan/report.c:236
[<ffffffc000373dcc>] kasan_report+0x380/0x4b8 mm/kasan/report.c:259
[<     inline     >] check_memory_region mm/kasan/kasan.c:264
[<ffffffc00037352c>] __asan_load8+0x20/0x70 mm/kasan/kasan.c:507
[<ffffffc0005b9624>] memcpy_fromiovec+0x5c/0x114 lib/iovec.c:15
[<     inline     >] memcpy_from_msg include/linux/skbuff.h:2667
[<ffffffc000ddeba0>] ping_common_sendmsg+0x50/0x108 net/ipv4/ping.c:674
[<ffffffc000dded30>] ping_v4_sendmsg+0xd8/0x698 net/ipv4/ping.c:714
[<ffffffc000dc91dc>] inet_sendmsg+0xe0/0x12c net/ipv4/af_inet.c:749
[<     inline     >] __sock_sendmsg_nosec net/socket.c:624
[<     inline     >] __sock_sendmsg net/socket.c:632
[<ffffffc000cab61c>] sock_sendmsg+0x124/0x164 net/socket.c:643
[<     inline     >] SYSC_sendto net/socket.c:1797
[<ffffffc000cad270>] SyS_sendto+0x178/0x1d8 net/socket.c:1761

CVE-2016-8399

Reported-by: Qidan He <i@flanker017.me>
Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 205e2000d395..96b8e2b95731 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -657,6 +657,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
 	if (len > 0xFFFF)
 		return -EMSGSIZE;
 
+	/* Must have at least a full ICMP header. */
+	if (len < icmph_len)
+		return -EINVAL;
+
 	/*
 	 *	Check the flags.
 	 */
-- 
cgit 


From c79e167c3cba066892542f3dfb5e73b7207e01df Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 4 Dec 2016 12:15:44 +0800
Subject: net: caif: remove ineffective check

The check of the return value of sock_register() is ineffective.
"if(!err)" seems to be a typo. It is better to propagate the error code
to the callers of caif_sktinit_module(). This patch removes the check
statment and directly returns the result of sock_register().

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188751
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index aa209b1066c9..92cbbd2afddb 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1107,10 +1107,7 @@ static struct net_proto_family caif_family_ops = {
 
 static int __init caif_sktinit_module(void)
 {
-	int err = sock_register(&caif_family_ops);
-	if (!err)
-		return err;
-	return 0;
+	return sock_register(&caif_family_ops);
 }
 
 static void __exit caif_sktexit_module(void)
-- 
cgit 


From ed5d7788a934a4b6d6d025e948ed4da496b4f12e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Dec 2016 15:28:21 +0800
Subject: netlink: Do not schedule work from sk_destruct

It is wrong to schedule a work from sk_destruct using the socket
as the memory reserve because the socket will be freed immediately
after the return from sk_destruct.

Instead we should do the deferral prior to sk_free.

This patch does just that.

Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 602e5ebe9db3..246f29d365c0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -322,11 +322,13 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 	sk_mem_charge(sk, skb->truesize);
 }
 
-static void __netlink_sock_destruct(struct sock *sk)
+static void netlink_sock_destruct(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 
 	if (nlk->cb_running) {
+		if (nlk->cb.done)
+			nlk->cb.done(&nlk->cb);
 		module_put(nlk->cb.module);
 		kfree_skb(nlk->cb.skb);
 	}
@@ -348,21 +350,7 @@ static void netlink_sock_destruct_work(struct work_struct *work)
 	struct netlink_sock *nlk = container_of(work, struct netlink_sock,
 						work);
 
-	nlk->cb.done(&nlk->cb);
-	__netlink_sock_destruct(&nlk->sk);
-}
-
-static void netlink_sock_destruct(struct sock *sk)
-{
-	struct netlink_sock *nlk = nlk_sk(sk);
-
-	if (nlk->cb_running && nlk->cb.done) {
-		INIT_WORK(&nlk->work, netlink_sock_destruct_work);
-		schedule_work(&nlk->work);
-		return;
-	}
-
-	__netlink_sock_destruct(sk);
+	sk_free(&nlk->sk);
 }
 
 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
@@ -667,8 +655,18 @@ out_module:
 static void deferred_put_nlk_sk(struct rcu_head *head)
 {
 	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
+	struct sock *sk = &nlk->sk;
+
+	if (!atomic_dec_and_test(&sk->sk_refcnt))
+		return;
+
+	if (nlk->cb_running && nlk->cb.done) {
+		INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+		schedule_work(&nlk->work);
+		return;
+	}
 
-	sock_put(&nlk->sk);
+	sk_free(sk);
 }
 
 static int netlink_release(struct socket *sock)
-- 
cgit 


From dcb17d22e1c2cd72e72190c736349a675362b3bc Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 5 Dec 2016 18:37:13 -0200
Subject: tcp: warn on bogus MSS and try to amend it

There have been some reports lately about TCP connection stalls caused
by NIC drivers that aren't setting gso_size on aggregated packets on rx
path. This causes TCP to assume that the MSS is actually the size of the
aggregated packet, which is invalid.

Although the proper fix is to be done at each driver, it's often hard
and cumbersome for one to debug, come to such root cause and report/fix
it.

This patch amends this situation in two ways. First, it adds a warning
on when this situation occurs, so it gives a hint to those trying to
debug this. It also limit the maximum probed MSS to the adverised MSS,
as it should never be any higher than that.

The result is that the connection may not have the best performance ever
but it shouldn't stall, and the admin will have a hint on what to look
for.

Tested with virtio by forcing gso_size to 0.

v2: updated msg per David's suggestion
v3: use skb_iif to find the interface and also log its name, per Eric
    Dumazet's suggestion. As the skb may be backlogged and the interface
    gone by then, we need to check if the number still has a meaning.
v4: use helper tcp_gro_dev_warn() and avoid pr_warn_once inside __once, per
    David's suggestion

Cc: Jonathan Maxwell <jmaxwell37@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a27b9c0e27c0..c71d49ce0c93 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -128,6 +128,23 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define REXMIT_LOST	1 /* retransmit packets marked lost */
 #define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
 
+static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
+{
+	static bool __once __read_mostly;
+
+	if (!__once) {
+		struct net_device *dev;
+
+		__once = true;
+
+		rcu_read_lock();
+		dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
+		pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+			dev ? dev->name : "Unknown driver");
+		rcu_read_unlock();
+	}
+}
+
 /* Adapt the MSS value used to make delayed ack decision to the
  * real world.
  */
@@ -144,7 +161,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 	 */
 	len = skb_shinfo(skb)->gso_size ? : skb->len;
 	if (len >= icsk->icsk_ack.rcv_mss) {
-		icsk->icsk_ack.rcv_mss = len;
+		icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
+					       tcp_sk(sk)->advmss);
+		if (unlikely(icsk->icsk_ack.rcv_mss != len))
+			tcp_gro_dev_warn(sk, skb);
 	} else {
 		/* Otherwise, we make more careful check taking into account,
 		 * that SACKs block is variable.
-- 
cgit