From 948dc8c99a22d6bdcb34c194cde392e1a125928a Mon Sep 17 00:00:00 2001
From: Gary Lin <glin@suse.com>
Date: Mon, 13 May 2019 17:45:48 +0800
Subject: bpf: btf: fix the brackets of BTF_INT_OFFSET()

'VAL' should be protected by the brackets.

v2:
* Squash the fix for Documentation/bpf/btf.rst

Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Gary Lin <glin@suse.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/btf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 9310652ca4f9..63ae4a39e58b 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -83,7 +83,7 @@ struct btf_type {
  * is the 32 bits arrangement:
  */
 #define BTF_INT_ENCODING(VAL)	(((VAL) & 0x0f000000) >> 24)
-#define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
+#define BTF_INT_OFFSET(VAL)	(((VAL) & 0x00ff0000) >> 16)
 #define BTF_INT_BITS(VAL)	((VAL)  & 0x000000ff)
 
 /* Attributes stored in the BTF_INT_ENCODING */
-- 
cgit 


From c6110222c6f49ea68169f353565eb865488a8619 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 14 May 2019 01:18:55 +0200
Subject: bpf: add map_lookup_elem_sys_only for lookups from syscall side

Add a callback map_lookup_elem_sys_only() that map implementations
could use over map_lookup_elem() from system call side in case the
map implementation needs to handle the latter differently than from
the BPF data path. If map_lookup_elem_sys_only() is set, this will
be preferred pick for map lookups out of user space. This hook is
used in a follow-up fix for LRU map, but once development window
opens, we can convert other map types from map_lookup_elem() (here,
the one called upon BPF_MAP_LOOKUP_ELEM cmd is meant) over to use
the callback to simplify and clean up the latter.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 59631dd0777c..4fb3aa2dc975 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,7 @@ struct bpf_map_ops {
 	void (*map_free)(struct bpf_map *map);
 	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
 	void (*map_release_uref)(struct bpf_map *map);
+	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
 
 	/* funcs callable from userspace and from eBPF programs */
 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
-- 
cgit 


From 858f5017446764e8bca0b29589a3b164186ae471 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 May 2019 09:10:15 -0700
Subject: tcp: do not recycle cloned skbs

It is illegal to change arbitrary fields in skb_shared_info if the
skb is cloned.

Before calling skb_zcopy_clear() we need to ensure this rule,
therefore we need to move the test from sk_stream_alloc_skb()
to sk_wmem_free_skb()

Fixes: 4f661542a402 ("tcp: fix zerocopy and notsent_lowat issues")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Diagnosed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 4d208c0f9c14..0680fa988497 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1473,7 +1473,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
 	sk->sk_wmem_queued -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
-	if (!sk->sk_tx_skb_cache) {
+	if (!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
 		skb_zcopy_clear(skb, true);
 		sk->sk_tx_skb_cache = skb;
 		return;
-- 
cgit 


From ba6306e3f648a857ae52ddcabc2859542fd2f94c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 16 May 2019 15:19:46 +0800
Subject: rhashtable: Remove RCU marking from rhash_lock_head

The opaque type rhash_lock_head should not be marked with __rcu
because it can never be dereferenced.  We should apply the RCU
marking when we turn it into a pointer which can be dereferenced.

This patch does exactly that.  This fixes a number of sparse
warnings as well as getting rid of some unnecessary RCU checking.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 58 +++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f7714d3b46bd..9f8bc06d4136 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -84,7 +84,7 @@ struct bucket_table {
 
 	struct lockdep_map	dep_map;
 
-	struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
+	struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp;
 };
 
 /*
@@ -261,13 +261,13 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
-struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
-						 unsigned int hash);
-struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
-						   unsigned int hash);
-struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
-							struct bucket_table *tbl,
-							unsigned int hash);
+struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
+					   unsigned int hash);
+struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
+					     unsigned int hash);
+struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
+						  struct bucket_table *tbl,
+						  unsigned int hash);
 
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
@@ -284,21 +284,21 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
-static inline struct rhash_lock_head __rcu *const *rht_bucket(
+static inline struct rhash_lock_head *const *rht_bucket(
 	const struct bucket_table *tbl, unsigned int hash)
 {
 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
 				     &tbl->buckets[hash];
 }
 
-static inline struct rhash_lock_head __rcu **rht_bucket_var(
+static inline struct rhash_lock_head **rht_bucket_var(
 	struct bucket_table *tbl, unsigned int hash)
 {
 	return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
 				     &tbl->buckets[hash];
 }
 
-static inline struct rhash_lock_head __rcu **rht_bucket_insert(
+static inline struct rhash_lock_head **rht_bucket_insert(
 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
 {
 	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
@@ -349,6 +349,12 @@ static inline void rht_unlock(struct bucket_table *tbl,
 	local_bh_enable();
 }
 
+static inline struct rhash_head __rcu *__rht_ptr(
+	struct rhash_lock_head *const *bkt)
+{
+	return (struct rhash_head __rcu *)((unsigned long)*bkt & ~BIT(0));
+}
+
 /*
  * Where 'bkt' is a bucket and might be locked:
  *   rht_ptr() dereferences that pointer and clears the lock bit.
@@ -356,30 +362,30 @@ static inline void rht_unlock(struct bucket_table *tbl,
  *            access is guaranteed, such as when destroying the table.
  */
 static inline struct rhash_head *rht_ptr(
-	struct rhash_lock_head __rcu * const *bkt,
+	struct rhash_lock_head *const *bkt,
 	struct bucket_table *tbl,
 	unsigned int hash)
 {
-	const struct rhash_lock_head *p =
-		rht_dereference_bucket_rcu(*bkt, tbl, hash);
+	struct rhash_head __rcu *p = __rht_ptr(bkt);
 
-	if ((((unsigned long)p) & ~BIT(0)) == 0)
+	if (!p)
 		return RHT_NULLS_MARKER(bkt);
-	return (void *)(((unsigned long)p) & ~BIT(0));
+
+	return rht_dereference_bucket_rcu(p, tbl, hash);
 }
 
 static inline struct rhash_head *rht_ptr_exclusive(
-	struct rhash_lock_head __rcu * const *bkt)
+	struct rhash_lock_head *const *bkt)
 {
-	const struct rhash_lock_head *p =
-		rcu_dereference_protected(*bkt, 1);
+	struct rhash_head __rcu *p = __rht_ptr(bkt);
 
 	if (!p)
 		return RHT_NULLS_MARKER(bkt);
-	return (void *)(((unsigned long)p) & ~BIT(0));
+
+	return rcu_dereference_protected(p, 1);
 }
 
-static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+static inline void rht_assign_locked(struct rhash_lock_head **bkt,
 				     struct rhash_head *obj)
 {
 	struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
@@ -390,7 +396,7 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
 }
 
 static inline void rht_assign_unlock(struct bucket_table *tbl,
-				     struct rhash_lock_head __rcu **bkt,
+				     struct rhash_lock_head **bkt,
 				     struct rhash_head *obj)
 {
 	struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
@@ -587,7 +593,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
-	struct rhash_lock_head __rcu * const *bkt;
+	struct rhash_lock_head *const *bkt;
 	struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
@@ -703,7 +709,7 @@ static inline void *__rhashtable_insert_fast(
 		.ht = ht,
 		.key = key,
 	};
-	struct rhash_lock_head __rcu **bkt;
+	struct rhash_lock_head **bkt;
 	struct rhash_head __rcu **pprev;
 	struct bucket_table *tbl;
 	struct rhash_head *head;
@@ -989,7 +995,7 @@ static inline int __rhashtable_remove_fast_one(
 	struct rhash_head *obj, const struct rhashtable_params params,
 	bool rhlist)
 {
-	struct rhash_lock_head __rcu **bkt;
+	struct rhash_lock_head **bkt;
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
 	unsigned int hash;
@@ -1141,7 +1147,7 @@ static inline int __rhashtable_replace_fast(
 	struct rhash_head *obj_old, struct rhash_head *obj_new,
 	const struct rhashtable_params params)
 {
-	struct rhash_lock_head __rcu **bkt;
+	struct rhash_lock_head **bkt;
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
 	unsigned int hash;
-- 
cgit 


From bae9ed69029c7d499c57485593b2faae475fd704 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 14 May 2019 21:18:12 +0100
Subject: flow_offload: support CVLAN match

Plumb it through from the flow_dissector.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 6200900434e1..a2df99f9b196 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -71,6 +71,8 @@ void flow_rule_match_eth_addrs(const struct flow_rule *rule,
 			       struct flow_match_eth_addrs *out);
 void flow_rule_match_vlan(const struct flow_rule *rule,
 			  struct flow_match_vlan *out);
+void flow_rule_match_cvlan(const struct flow_rule *rule,
+			   struct flow_match_vlan *out);
 void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
 				struct flow_match_ipv4_addrs *out);
 void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
-- 
cgit 


From 185ce5c38ea76f29b6bd9c7c8c7a5e5408834920 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 15 May 2019 13:29:16 -0400
Subject: net: test nouarg before dereferencing zerocopy pointers

Zerocopy skbs without completion notification were added for packet
sockets with PACKET_TX_RING user buffers. Those signal completion
through the TP_STATUS_USER bit in the ring. Zerocopy annotation was
added only to avoid premature notification after clone or orphan, by
triggering a copy on these paths for these packets.

The mechanism had to define a special "no-uarg" mode because packet
sockets already use skb_uarg(skb) == skb_shinfo(skb)->destructor_arg
for a different pointer.

Before deferencing skb_uarg(skb), verify that it is a real pointer.

Fixes: 5cd8d46ea1562 ("packet: copy user buffers before orphan or clone")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6d58fa8a65fd..2ee5e63195c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1434,10 +1434,12 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 	struct ubuf_info *uarg = skb_zcopy(skb);
 
 	if (uarg) {
-		if (uarg->callback == sock_zerocopy_callback) {
+		if (skb_zcopy_is_nouarg(skb)) {
+			/* no notification callback */
+		} else if (uarg->callback == sock_zerocopy_callback) {
 			uarg->zerocopy = uarg->zerocopy && zerocopy;
 			sock_zerocopy_put(uarg);
-		} else if (!skb_zcopy_is_nouarg(skb)) {
+		} else {
 			uarg->callback(uarg, zerocopy);
 		}
 
@@ -2691,7 +2693,8 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
 {
 	if (likely(!skb_zcopy(skb)))
 		return 0;
-	if (skb_uarg(skb)->callback == sock_zerocopy_callback)
+	if (!skb_zcopy_is_nouarg(skb) &&
+	    skb_uarg(skb)->callback == sock_zerocopy_callback)
 		return 0;
 	return skb_copy_ubufs(skb, gfp_mask);
 }
-- 
cgit 


From 61fb0d01680771f72cc9d39783fb2c122aaad51e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 May 2019 19:39:52 -0700
Subject: ipv6: prevent possible fib6 leaks

At ipv6 route dismantle, fib6_drop_pcpu_from() is responsible
for finding all percpu routes and set their ->from pointer
to NULL, so that fib6_ref can reach its expected value (1).

The problem right now is that other cpus can still catch the
route being deleted, since there is no rcu grace period
between the route deletion and call to fib6_drop_pcpu_from()

This can leak the fib6 and associated resources, since no
notifier will take care of removing the last reference(s).

I decided to add another boolean (fib6_destroying) instead
of reusing/renaming exception_bucket_flushed to ease stable backports,
and properly document the memory barriers used to implement this fix.

This patch has been co-developped with Wei Wang.

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Wei Wang <weiwan@google.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin Lau <kafai@fb.com>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 40105738e2f6..525f701653ca 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -167,7 +167,8 @@ struct fib6_info {
 					dst_nocount:1,
 					dst_nopolicy:1,
 					dst_host:1,
-					unused:3;
+					fib6_destroying:1,
+					unused:2;
 
 	struct fib6_nh			fib6_nh;
 	struct rcu_head			rcu;
-- 
cgit 


From 02f3afd97556017872a2d01d03d4ce66f8421a65 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 5 Apr 2019 01:07:19 -0500
Subject: net/mlx5: E-Switch, Correct type to u16 for vport_num and int for
 vport_index

To avoid any ambiguity between vport index and vport number,
rename functions that had vport, to vport_num or vport_index appropriately.

vport_num is u16 hence change mlx5_eswitch_index_to_vport_num() return
type to u16.

vport_index is an int in vport array. Hence change input type of vport
index in mlx5_eswitch_index_to_vport_num() to int.

Correct multiple eswitch representor interfaces use type u16 of
rep->vport as type int vport_index.

Send vport FW commands with correct eswitch u16 vport_num instead
host int vport_index.

Fixes: 5ae5162066d8 ("net/mlx5: E-Switch, Assign a different position for uplink rep and vport")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/eswitch.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 0ca77dd1429c..cf226c190329 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -51,13 +51,13 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
 				      u8 rep_type);
 void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type);
 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
-				 int vport,
+				 u16 vport_num,
 				 u8 rep_type);
 struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
-						int vport);
+						u16 vport_num);
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
 u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
-				    int vport, u32 sqn);
+				    u16 vport_num, u32 sqn);
 #endif
-- 
cgit 


From 6a0a923dfa1480df41fb486323b8375e387d516f Mon Sep 17 00:00:00 2001
From: Petr Štetiar <ynezz@true.cz>
Date: Sun, 19 May 2019 14:18:44 +0200
Subject: of_net: fix of_get_mac_address retval if compiled without CONFIG_OF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

of_get_mac_address prior to commit d01f449c008a ("of_net: add NVMEM
support to of_get_mac_address") could return only valid pointer or NULL,
after this change it could return only valid pointer or ERR_PTR encoded
error value, but I've forget to change the return value of
of_get_mac_address in case where the kernel is compiled without
CONFIG_OF, so I'm doing so now.

Cc: Mirko Lindner <mlindner@marvell.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Fixes: d01f449c008a ("of_net: add NVMEM support to of_get_mac_address")
Reported-by: Octavio Alvarez <octallk1@alvarezp.org>
Signed-off-by: Petr Štetiar <ynezz@true.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 9cd72aab76fe..0f0346e6829c 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -22,7 +22,7 @@ static inline int of_get_phy_mode(struct device_node *np)
 
 static inline const void *of_get_mac_address(struct device_node *np)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline struct net_device *of_find_net_device_by_node(struct device_node *np)
-- 
cgit