From 948dc8c99a22d6bdcb34c194cde392e1a125928a Mon Sep 17 00:00:00 2001 From: Gary Lin Date: Mon, 13 May 2019 17:45:48 +0800 Subject: bpf: btf: fix the brackets of BTF_INT_OFFSET() 'VAL' should be protected by the brackets. v2: * Squash the fix for Documentation/bpf/btf.rst Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)") Signed-off-by: Gary Lin Signed-off-by: Daniel Borkmann --- include/uapi/linux/btf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 9310652ca4f9..63ae4a39e58b 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -83,7 +83,7 @@ struct btf_type { * is the 32 bits arrangement: */ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) -#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) /* Attributes stored in the BTF_INT_ENCODING */ -- cgit From c6110222c6f49ea68169f353565eb865488a8619 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 May 2019 01:18:55 +0200 Subject: bpf: add map_lookup_elem_sys_only for lookups from syscall side Add a callback map_lookup_elem_sys_only() that map implementations could use over map_lookup_elem() from system call side in case the map implementation needs to handle the latter differently than from the BPF data path. If map_lookup_elem_sys_only() is set, this will be preferred pick for map lookups out of user space. This hook is used in a follow-up fix for LRU map, but once development window opens, we can convert other map types from map_lookup_elem() (here, the one called upon BPF_MAP_LOOKUP_ELEM cmd is meant) over to use the callback to simplify and clean up the latter. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 59631dd0777c..4fb3aa2dc975 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,7 @@ struct bpf_map_ops { void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); void (*map_release_uref)(struct bpf_map *map); + void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); -- cgit From 858f5017446764e8bca0b29589a3b164186ae471 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 May 2019 09:10:15 -0700 Subject: tcp: do not recycle cloned skbs It is illegal to change arbitrary fields in skb_shared_info if the skb is cloned. Before calling skb_zcopy_clear() we need to ensure this rule, therefore we need to move the test from sk_stream_alloc_skb() to sk_wmem_free_skb() Fixes: 4f661542a402 ("tcp: fix zerocopy and notsent_lowat issues") Signed-off-by: Eric Dumazet Diagnosed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 4d208c0f9c14..0680fa988497 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1473,7 +1473,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); - if (!sk->sk_tx_skb_cache) { + if (!sk->sk_tx_skb_cache && !skb_cloned(skb)) { skb_zcopy_clear(skb, true); sk->sk_tx_skb_cache = skb; return; -- cgit From ba6306e3f648a857ae52ddcabc2859542fd2f94c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 May 2019 15:19:46 +0800 Subject: rhashtable: Remove RCU marking from rhash_lock_head The opaque type rhash_lock_head should not be marked with __rcu because it can never be dereferenced. We should apply the RCU marking when we turn it into a pointer which can be dereferenced. This patch does exactly that. This fixes a number of sparse warnings as well as getting rid of some unnecessary RCU checking. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 58 +++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f7714d3b46bd..9f8bc06d4136 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -84,7 +84,7 @@ struct bucket_table { struct lockdep_map dep_map; - struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp; }; /* @@ -261,13 +261,13 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); +struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -284,21 +284,21 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_lock_head __rcu *const *rht_bucket( +static inline struct rhash_lock_head *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head __rcu **rht_bucket_var( +static inline struct rhash_lock_head **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head __rcu **rht_bucket_insert( +static inline struct rhash_lock_head **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : @@ -349,6 +349,12 @@ static inline void rht_unlock(struct bucket_table *tbl, local_bh_enable(); } +static inline struct rhash_head __rcu *__rht_ptr( + struct rhash_lock_head *const *bkt) +{ + return (struct rhash_head __rcu *)((unsigned long)*bkt & ~BIT(0)); +} + /* * Where 'bkt' is a bucket and might be locked: * rht_ptr() dereferences that pointer and clears the lock bit. @@ -356,30 +362,30 @@ static inline void rht_unlock(struct bucket_table *tbl, * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr( - struct rhash_lock_head __rcu * const *bkt, + struct rhash_lock_head *const *bkt, struct bucket_table *tbl, unsigned int hash) { - const struct rhash_lock_head *p = - rht_dereference_bucket_rcu(*bkt, tbl, hash); + struct rhash_head __rcu *p = __rht_ptr(bkt); - if ((((unsigned long)p) & ~BIT(0)) == 0) + if (!p) return RHT_NULLS_MARKER(bkt); - return (void *)(((unsigned long)p) & ~BIT(0)); + + return rht_dereference_bucket_rcu(p, tbl, hash); } static inline struct rhash_head *rht_ptr_exclusive( - struct rhash_lock_head __rcu * const *bkt) + struct rhash_lock_head *const *bkt) { - const struct rhash_lock_head *p = - rcu_dereference_protected(*bkt, 1); + struct rhash_head __rcu *p = __rht_ptr(bkt); if (!p) return RHT_NULLS_MARKER(bkt); - return (void *)(((unsigned long)p) & ~BIT(0)); + + return rcu_dereference_protected(p, 1); } -static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, +static inline void rht_assign_locked(struct rhash_lock_head **bkt, struct rhash_head *obj) { struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; @@ -390,7 +396,7 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, } static inline void rht_assign_unlock(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bkt, + struct rhash_lock_head **bkt, struct rhash_head *obj) { struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; @@ -587,7 +593,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct rhash_lock_head __rcu * const *bkt; + struct rhash_lock_head *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -703,7 +709,7 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; @@ -989,7 +995,7 @@ static inline int __rhashtable_remove_fast_one( struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; @@ -1141,7 +1147,7 @@ static inline int __rhashtable_replace_fast( struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; -- cgit From bae9ed69029c7d499c57485593b2faae475fd704 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 14 May 2019 21:18:12 +0100 Subject: flow_offload: support CVLAN match Plumb it through from the flow_dissector. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 6200900434e1..a2df99f9b196 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -71,6 +71,8 @@ void flow_rule_match_eth_addrs(const struct flow_rule *rule, struct flow_match_eth_addrs *out); void flow_rule_match_vlan(const struct flow_rule *rule, struct flow_match_vlan *out); +void flow_rule_match_cvlan(const struct flow_rule *rule, + struct flow_match_vlan *out); void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out); void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, -- cgit From 185ce5c38ea76f29b6bd9c7c8c7a5e5408834920 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 15 May 2019 13:29:16 -0400 Subject: net: test nouarg before dereferencing zerocopy pointers Zerocopy skbs without completion notification were added for packet sockets with PACKET_TX_RING user buffers. Those signal completion through the TP_STATUS_USER bit in the ring. Zerocopy annotation was added only to avoid premature notification after clone or orphan, by triggering a copy on these paths for these packets. The mechanism had to define a special "no-uarg" mode because packet sockets already use skb_uarg(skb) == skb_shinfo(skb)->destructor_arg for a different pointer. Before deferencing skb_uarg(skb), verify that it is a real pointer. Fixes: 5cd8d46ea1562 ("packet: copy user buffers before orphan or clone") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6d58fa8a65fd..2ee5e63195c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1434,10 +1434,12 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - if (uarg->callback == sock_zerocopy_callback) { + if (skb_zcopy_is_nouarg(skb)) { + /* no notification callback */ + } else if (uarg->callback == sock_zerocopy_callback) { uarg->zerocopy = uarg->zerocopy && zerocopy; sock_zerocopy_put(uarg); - } else if (!skb_zcopy_is_nouarg(skb)) { + } else { uarg->callback(uarg, zerocopy); } @@ -2691,7 +2693,8 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; - if (skb_uarg(skb)->callback == sock_zerocopy_callback) + if (!skb_zcopy_is_nouarg(skb) && + skb_uarg(skb)->callback == sock_zerocopy_callback) return 0; return skb_copy_ubufs(skb, gfp_mask); } -- cgit From 61fb0d01680771f72cc9d39783fb2c122aaad51e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 May 2019 19:39:52 -0700 Subject: ipv6: prevent possible fib6 leaks At ipv6 route dismantle, fib6_drop_pcpu_from() is responsible for finding all percpu routes and set their ->from pointer to NULL, so that fib6_ref can reach its expected value (1). The problem right now is that other cpus can still catch the route being deleted, since there is no rcu grace period between the route deletion and call to fib6_drop_pcpu_from() This can leak the fib6 and associated resources, since no notifier will take care of removing the last reference(s). I decided to add another boolean (fib6_destroying) instead of reusing/renaming exception_bucket_flushed to ease stable backports, and properly document the memory barriers used to implement this fix. This patch has been co-developped with Wei Wang. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Wei Wang Cc: David Ahern Cc: Martin Lau Acked-by: Wei Wang Acked-by: Martin KaFai Lau Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 40105738e2f6..525f701653ca 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -167,7 +167,8 @@ struct fib6_info { dst_nocount:1, dst_nopolicy:1, dst_host:1, - unused:3; + fib6_destroying:1, + unused:2; struct fib6_nh fib6_nh; struct rcu_head rcu; -- cgit From 02f3afd97556017872a2d01d03d4ce66f8421a65 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 5 Apr 2019 01:07:19 -0500 Subject: net/mlx5: E-Switch, Correct type to u16 for vport_num and int for vport_index To avoid any ambiguity between vport index and vport number, rename functions that had vport, to vport_num or vport_index appropriately. vport_num is u16 hence change mlx5_eswitch_index_to_vport_num() return type to u16. vport_index is an int in vport array. Hence change input type of vport index in mlx5_eswitch_index_to_vport_num() to int. Correct multiple eswitch representor interfaces use type u16 of rep->vport as type int vport_index. Send vport FW commands with correct eswitch u16 vport_num instead host int vport_index. Fixes: 5ae5162066d8 ("net/mlx5: E-Switch, Assign a different position for uplink rep and vport") Signed-off-by: Parav Pandit Signed-off-by: Vu Pham Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 0ca77dd1429c..cf226c190329 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -51,13 +51,13 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, - int vport, + u16 vport_num, u8 rep_type); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, - int vport); + u16 vport_num); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, - int vport, u32 sqn); + u16 vport_num, u32 sqn); #endif -- cgit From 6a0a923dfa1480df41fb486323b8375e387d516f Mon Sep 17 00:00:00 2001 From: Petr Štetiar Date: Sun, 19 May 2019 14:18:44 +0200 Subject: of_net: fix of_get_mac_address retval if compiled without CONFIG_OF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of_get_mac_address prior to commit d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") could return only valid pointer or NULL, after this change it could return only valid pointer or ERR_PTR encoded error value, but I've forget to change the return value of of_get_mac_address in case where the kernel is compiled without CONFIG_OF, so I'm doing so now. Cc: Mirko Lindner Cc: Stephen Hemminger Fixes: d01f449c008a ("of_net: add NVMEM support to of_get_mac_address") Reported-by: Octavio Alvarez Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller --- include/linux/of_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 9cd72aab76fe..0f0346e6829c 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -22,7 +22,7 @@ static inline int of_get_phy_mode(struct device_node *np) static inline const void *of_get_mac_address(struct device_node *np) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct net_device *of_find_net_device_by_node(struct device_node *np) -- cgit