From f0d1f04f0a2f662b6b617e24d115fddcf6ef8723 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 7 Oct 2014 19:02:11 +0200
Subject: netfilter: fix wrong arithmetics regarding NFT_REJECT_ICMPX_MAX

NFT_REJECT_ICMPX_MAX should be __NFT_REJECT_ICMPX_MAX - 1.

nft_reject_icmp_code() and nft_reject_icmpv6_code() are called from the
packet path, so BUG_ON in case we try to access an unknown abstracted
ICMP code. This should not happen since we already validate this from
nft_reject_{inet,bridge}_init().

Fixes: 51b0a5d ("netfilter: nft_reject: introduce icmp code abstraction for inet and bridge")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_reject.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ec8a456092a7..57d3e1af5630 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -72,7 +72,7 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_reject_dump);
 
-static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
 	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMP_NET_UNREACH,
 	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMP_PORT_UNREACH,
 	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMP_HOST_UNREACH,
@@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
 
 int nft_reject_icmp_code(u8 code)
 {
-	if (code > NFT_REJECT_ICMPX_MAX)
-		return -EINVAL;
+	BUG_ON(code > NFT_REJECT_ICMPX_MAX);
 
 	return icmp_code_v4[code];
 }
@@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code)
 EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
 
 
-static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
 	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMPV6_NOROUTE,
 	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMPV6_PORT_UNREACH,
 	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMPV6_ADDR_UNREACH,
@@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
 
 int nft_reject_icmpv6_code(u8 code)
 {
-	if (code > NFT_REJECT_ICMPX_MAX)
-		return -EINVAL;
+	BUG_ON(code > NFT_REJECT_ICMPX_MAX);
 
 	return icmp_code_v6[code];
 }
-- 
cgit 


From 2f29fed3f814f652a24b10c975b9d415a154fc9c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 7 Oct 2014 22:20:23 +0200
Subject: net: rfkill: kernel-doc warning fixes

s/state/blocked

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b3b16c070a7f..fa7cd792791c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
 /**
  * __rfkill_switch_all - Toggle state of all switches of given type
  * @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
  *
  * This function sets the state of all switches of given type,
  * unless a specific switch is claimed by userspace (in which case,
@@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
 /**
  * rfkill_switch_all - Toggle state of all switches of given type
  * @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
  *
  * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
  * Please refer to __rfkill_switch_all() for details.
-- 
cgit 


From 59f35b810e3bb17efef2aa5feadffb66450190d9 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 7 Oct 2014 22:31:32 +0200
Subject: netlabel: kernel-doc warning fix

no secid argument in netlbl_cfg_unlbl_static_del

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_kapi.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0b4692dd1c5e..a845cd4cf21e 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
  * @addr: IP address in network byte order (struct in[6]_addr)
  * @mask: address mask in network byte order (struct in[6]_addr)
  * @family: address family
- * @secid: LSM secid value for the entry
  * @audit_info: NetLabel audit information
  *
  * Description:
-- 
cgit 


From de3f0d0effecc2ccfbd679705519ed5b35f9cb33 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Thu, 9 Oct 2014 12:58:08 +0900
Subject: net: Missing @ before descriptions cause make xmldocs warning

This patch fix following warning.
Warning(.//net/core/skbuff.c:4142): No description found for parameter 'header_len'
Warning(.//net/core/skbuff.c:4142): No description found for parameter 'data_len'
Warning(.//net/core/skbuff.c:4142): No description found for parameter 'max_page_order'
Warning(.//net/core/skbuff.c:4142): No description found for parameter 'errcode'
Warning(.//net/core/skbuff.c:4142): No description found for parameter 'gfp_mask'

Acutually the descriptions exist, but missing "@" in front.

This problem start to happen when following commit was merged
into Linus's tree during 3.18-rc1 merge period.
commit 2e4e44107176d552f8bb1bb76053e850e3809841
net: add alloc_skb_with_frags() helper

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7b3df0d518ab..a30d750647e7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4126,11 +4126,11 @@ EXPORT_SYMBOL(skb_vlan_untag);
 /**
  * alloc_skb_with_frags - allocate skb with page frags
  *
- * header_len: size of linear part
- * data_len: needed length in frags
- * max_page_order: max page order desired.
- * errcode: pointer to error code if any
- * gfp_mask: allocation mask
+ * @header_len: size of linear part
+ * @data_len: needed length in frags
+ * @max_page_order: max page order desired.
+ * @errcode: pointer to error code if any
+ * @gfp_mask: allocation mask
  *
  * This can be used to allocate a paged skb, given a maximal order for frags.
  */
-- 
cgit 


From b8358d70ce1066dd4cc658cfdaf7862d459e2d78 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 9 Oct 2014 12:18:10 +0200
Subject: net_sched: restore qdisc quota fairness limits after bulk dequeue

Restore the quota fairness between qdisc's, that we broke with commit
5772e9a346 ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE").

Before that commit, the quota in __qdisc_run() were in packets as
dequeue_skb() would only dequeue a single packet, that assumption
broke with bulk dequeue.

We choose not to account for the number of packets inside the TSO/GSO
packets (accessable via "skb_gso_segs").  As the previous fairness
also had this "defect". Thus, GSO/TSO packets counts as a single
packet.

Further more, we choose to slack on accuracy, by allowing a bulk
dequeue try_bulk_dequeue_skb() to exceed the "packets" limit, only
limited by the BQL bytelimit.  This is done because BQL prefers to get
its full budget for appropriate feedback from TX completion.

In future, we might consider reworking this further and, if it allows,
switch to a time-based model, as suggested by Eric. Right now, we only
restore old semantics.

Joint work with Eric, Hannes, Daniel and Jesper.  Hannes wrote the
first patch in cooperation with Daniel and Jesper.  Eric rewrote the
patch.

Fixes: 5772e9a346 ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 38d58e6cef07..6efca30894aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 
 static void try_bulk_dequeue_skb(struct Qdisc *q,
 				 struct sk_buff *skb,
-				 const struct netdev_queue *txq)
+				 const struct netdev_queue *txq,
+				 int *packets)
 {
 	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
 
@@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
 		bytelimit -= nskb->len; /* covers GSO len */
 		skb->next = nskb;
 		skb = nskb;
+		(*packets)++; /* GSO counts as one pkt */
 	}
 	skb->next = NULL;
 }
@@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
  * A requeued skb (via q->gso_skb) can also be a SKB list.
  */
-static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
+				   int *packets)
 {
 	struct sk_buff *skb = q->gso_skb;
 	const struct netdev_queue *txq = q->dev_queue;
 
+	*packets = 1;
 	*validate = true;
 	if (unlikely(skb)) {
 		/* check the reason of requeuing without tx lock first */
@@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
 		    !netif_xmit_frozen_or_stopped(txq)) {
 			skb = q->dequeue(q);
 			if (skb && qdisc_may_bulk(q))
-				try_bulk_dequeue_skb(q, skb, txq);
+				try_bulk_dequeue_skb(q, skb, txq, packets);
 		}
 	}
 	return skb;
@@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
  *				>0 - queue is not empty.
  *
  */
-static inline int qdisc_restart(struct Qdisc *q)
+static inline int qdisc_restart(struct Qdisc *q, int *packets)
 {
 	struct netdev_queue *txq;
 	struct net_device *dev;
@@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	bool validate;
 
 	/* Dequeue packet */
-	skb = dequeue_skb(q, &validate);
+	skb = dequeue_skb(q, &validate, packets);
 	if (unlikely(!skb))
 		return 0;
 
@@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q)
 void __qdisc_run(struct Qdisc *q)
 {
 	int quota = weight_p;
+	int packets;
 
-	while (qdisc_restart(q)) {
+	while (qdisc_restart(q, &packets)) {
 		/*
 		 * Ordered by possible occurrence: Postpone processing if
 		 * 1. we've exceeded packet quota
 		 * 2. another process needs the CPU;
 		 */
-		if (--quota <= 0 || need_resched()) {
+		quota -= packets;
+		if (quota <= 0 || need_resched()) {
 			__netif_schedule(q);
 			break;
 		}
-- 
cgit 


From 38b3629adb8c048dda8b98e888505c79ed33ae92 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 9 Oct 2014 15:16:41 -0700
Subject: net: bpf: fix bpf syscall dependence on anon_inodes

minimal configurations where EPOLL, PERF_EVENTS, etc are disabled,
but NET is enabled, are failing to build with link error:
kernel/built-in.o: In function `bpf_prog_load':
syscall.c:(.text+0x3b728): undefined reference to `anon_inode_getfd'

fix it by selecting ANON_INODES when NET is enabled

Reported-by: Michal Sojka <sojkam1@fel.cvut.cz>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index d6b138e2c263..6272420a721b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -6,6 +6,7 @@ menuconfig NET
 	bool "Networking support"
 	select NLATTR
 	select GENERIC_NET_UTILS
+	select ANON_INODES
 	---help---
 	  Unless you really know what you are doing, you should say Y here.
 	  The reason is that some programs need kernel networking support even
-- 
cgit 


From 8ea6e345a6123fa831e42cd8747f55892a58abff Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Fri, 10 Oct 2014 13:56:51 +0800
Subject: net: filter: fix the comments

1. sk_run_filter has been renamed, sk_filter() is using SK_RUN_FILTER.
2. Remove wrong comments about storing intermediate value.
3. replace sk_run_filter with __bpf_prog_run for check_load_and_stores's
comments

Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index fcd3f6742a6a..647b12265e18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,9 +51,9 @@
  *	@skb: buffer to filter
  *
  * Run the filter code and then cut skb->data to correct size returned by
- * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to sk_run_filter. It returns 0 if the packet should
+ * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -565,12 +565,9 @@ err:
 }
 
 /* Security:
- *
- * A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
  *
  * As we dont want to clear mem[] array for each packet going through
- * sk_run_filter(), we check that filter loaded by user never try to read
+ * __bpf_prog_run(), we check that filter loaded by user never try to read
  * a cell if not previously written, and we check all branches to be sure
  * a malicious user doesn't try to abuse us.
  */
-- 
cgit 


From 5af7fb6e3e92c2797ee30d66138cf6aa6b29240d Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Fri, 10 Oct 2014 12:09:12 -0700
Subject: flow-dissector: Fix alignment issue in __skb_flow_get_ports

This patch addresses a kernel unaligned access bug seen on a sparc64 system
with an igb adapter.  Specifically the __skb_flow_get_ports was returning a
be32 pointer which was then having the value directly returned.

In order to prevent this it is actually easier to simply not populate the
ports or address values when an skb is not present.  In this case the
assumption is that the data isn't needed and rather than slow down the
faster aligned accesses by making them have to assume the unaligned path on
architectures that don't support efficent unaligned access it makes more
sense to simply switch off the bits that were copying the source and
destination address/port for the case where we only care about the protocol
types and lengths which are normally 16 bit fields anyway.

Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8560dea58803..45084938c403 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -100,6 +100,13 @@ ip:
 		if (ip_is_fragment(iph))
 			ip_proto = 0;
 
+		/* skip the address processing if skb is NULL.  The assumption
+		 * here is that if there is no skb we are not looking for flow
+		 * info but lengths and protocols.
+		 */
+		if (!skb)
+			break;
+
 		iph_to_flow_copy_addrs(flow, iph);
 		break;
 	}
@@ -114,17 +121,15 @@ ipv6:
 			return false;
 
 		ip_proto = iph->nexthdr;
-		flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
-		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
 		nhoff += sizeof(struct ipv6hdr);
 
-		/* skip the flow label processing if skb is NULL.  The
-		 * assumption here is that if there is no skb we are not
-		 * looking for flow info as much as we are length.
-		 */
+		/* see comment above in IPv4 section */
 		if (!skb)
 			break;
 
+		flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+
 		flow_label = ip6_flowlabel(iph);
 		if (flow_label) {
 			/* Awesome, IPv6 packet has a flow label so we can
@@ -231,9 +236,13 @@ ipv6:
 
 	flow->n_proto = proto;
 	flow->ip_proto = ip_proto;
-	flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
 	flow->thoff = (u16) nhoff;
 
+	/* unless skb is set we don't need to record port info */
+	if (skb)
+		flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+						   data, hlen);
+
 	return true;
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
@@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 
 	switch (keys->ip_proto) {
 	case IPPROTO_TCP: {
-		const struct tcphdr *tcph;
-		struct tcphdr _tcph;
+		/* access doff as u8 to avoid unaligned access */
+		const u8 *doff;
+		u8 _doff;
 
-		tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
-					    data, hlen, &_tcph);
-		if (!tcph)
+		doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
+					    data, hlen, &_doff);
+		if (!doff)
 			return poff;
 
-		poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+		poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
 		break;
 	}
 	case IPPROTO_UDP:
-- 
cgit 


From 4c450583d9d0a8241f0f62b80038ac47b43ff843 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Oct 2014 04:48:18 -0700
Subject: net: fix races in page->_count manipulation

This is illegal to use atomic_set(&page->_count, ...) even if we 'own'
the page. Other entities in the kernel need to use get_page_unless_zero()
to get a reference to the page before testing page properties, so we could
loose a refcount increment.

The only case it is valid is when page->_count is 0

Fixes: 540eb7bf0bbed ("net: Update alloc frag to reduce get/put page usage and recycle pages")
Signed-off-by: Eric Dumaze <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a30d750647e7..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -360,18 +360,29 @@ refill:
 				goto end;
 		}
 		nc->frag.size = PAGE_SIZE << order;
-recycle:
-		atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+		/* Even if we own the page, we do not use atomic_set().
+		 * This would break get_page_unless_zero() users.
+		 */
+		atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+			   &nc->frag.page->_count);
 		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
 		nc->frag.offset = 0;
 	}
 
 	if (nc->frag.offset + fragsz > nc->frag.size) {
-		/* avoid unnecessary locked operations if possible */
-		if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
-		    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
-			goto recycle;
-		goto refill;
+		if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
+			if (!atomic_sub_and_test(nc->pagecnt_bias,
+						 &nc->frag.page->_count))
+				goto refill;
+			/* OK, page count is 0, we can safely set it */
+			atomic_set(&nc->frag.page->_count,
+				   NETDEV_PAGECNT_MAX_BIAS);
+		} else {
+			atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+				   &nc->frag.page->_count);
+		}
+		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+		nc->frag.offset = 0;
 	}
 
 	data = page_address(nc->frag.page) + nc->frag.offset;
-- 
cgit