diff options
| author | Daniel Borkmann <daniel@iogearbox.net> | 2018-10-01 16:18:34 +0200 | 
|---|---|---|
| committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-10-01 16:18:35 +0200 | 
| commit | cb86d0f878be6d699dfd26c63f8ff03dfff1f9ba (patch) | |
| tree | 1b48dce888a0bcd3b8b4a747dc661b6e079e3f26 | |
| parent | 5bf7a60b8e70969f65c961d7e2c4eb40eb2c664d (diff) | |
| parent | 371e4fcc9d96ab1c8d72d59ca4ee3537402d1584 (diff) | |
Merge branch 'bpf-per-cpu-cgroup-storage'
Roman Gushchin says:
====================
This patchset implements per-cpu cgroup local storage and provides
an example how per-cpu and shared cgroup local storage can be used
for efficient accounting of network traffic.
v4->v3:
  1) incorporated Alexei's feedback
v3->v2:
  1) incorporated Song's feedback
  2) rebased on top of current bpf-next
v2->v1:
  1) added a selftest implementing network counters
  2) added a missing free() in cgroup local storage selftest
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
| -rw-r--r-- | include/linux/bpf-cgroup.h | 55 | ||||
| -rw-r--r-- | include/linux/bpf.h | 12 | ||||
| -rw-r--r-- | include/linux/bpf_types.h | 1 | ||||
| -rw-r--r-- | include/uapi/linux/bpf.h | 1 | ||||
| -rw-r--r-- | kernel/bpf/cgroup.c | 74 | ||||
| -rw-r--r-- | kernel/bpf/helpers.c | 25 | ||||
| -rw-r--r-- | kernel/bpf/local_storage.c | 169 | ||||
| -rw-r--r-- | kernel/bpf/map_in_map.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 20 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 23 | ||||
| -rw-r--r-- | net/bpf/test_run.c | 20 | ||||
| -rw-r--r-- | samples/bpf/test_cgrp2_attach2.c | 19 | ||||
| -rw-r--r-- | tools/bpf/bpftool/map.c | 4 | ||||
| -rw-r--r-- | tools/include/uapi/linux/bpf.h | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/Makefile | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/netcnt_common.h | 24 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/netcnt_prog.c | 71 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_cgroup_storage.c | 60 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_netcnt.c | 158 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/test_verifier.c | 139 | 
20 files changed, 786 insertions, 99 deletions
| diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index f91b0f8ff3a9..588dd5f0bd85 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -2,6 +2,7 @@  #ifndef _BPF_CGROUP_H  #define _BPF_CGROUP_H +#include <linux/bpf.h>  #include <linux/errno.h>  #include <linux/jump_label.h>  #include <linux/percpu.h> @@ -22,7 +23,11 @@ struct bpf_cgroup_storage;  extern struct static_key_false cgroup_bpf_enabled_key;  #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) -DECLARE_PER_CPU(void*, bpf_cgroup_storage); +DECLARE_PER_CPU(struct bpf_cgroup_storage*, +		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); + +#define for_each_cgroup_storage_type(stype) \ +	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)  struct bpf_cgroup_storage_map; @@ -32,7 +37,10 @@ struct bpf_storage_buffer {  };  struct bpf_cgroup_storage { -	struct bpf_storage_buffer *buf; +	union { +		struct bpf_storage_buffer *buf; +		void __percpu *percpu_buf; +	};  	struct bpf_cgroup_storage_map *map;  	struct bpf_cgroup_storage_key key;  	struct list_head list; @@ -43,7 +51,7 @@ struct bpf_cgroup_storage {  struct bpf_prog_list {  	struct list_head node;  	struct bpf_prog *prog; -	struct bpf_cgroup_storage *storage; +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];  };  struct bpf_prog_array; @@ -101,18 +109,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,  int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,  				      short access, enum bpf_attach_type type); -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) +static inline enum bpf_cgroup_storage_type cgroup_storage_type( +	struct bpf_map *map)  { -	struct bpf_storage_buffer *buf; +	if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) +		return BPF_CGROUP_STORAGE_PERCPU; + +	return BPF_CGROUP_STORAGE_SHARED; +} -	if (!storage) -		return; +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage +					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) +{ +	enum bpf_cgroup_storage_type stype; -	buf = READ_ONCE(storage->buf); -	this_cpu_write(bpf_cgroup_storage, &buf->data[0]); +	for_each_cgroup_storage_type(stype) +		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);  } -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, +					enum bpf_cgroup_storage_type stype);  void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);  void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,  			     struct cgroup *cgroup, @@ -121,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);  int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);  void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); +int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, +				     void *value, u64 flags); +  /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */  #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \  ({									      \ @@ -265,15 +285,24 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,  	return -EINVAL;  } -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {} +static inline void bpf_cgroup_storage_set( +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}  static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,  					    struct bpf_map *map) { return 0; }  static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,  					      struct bpf_map *map) {}  static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( -	struct bpf_prog *prog) { return 0; } +	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }  static inline void bpf_cgroup_storage_free(  	struct bpf_cgroup_storage *storage) {} +static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, +						 void *value) { +	return 0; +} +static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, +					void *key, void *value, u64 flags) { +	return 0; +}  #define cgroup_bpf_enabled (0)  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) @@ -293,6 +322,8 @@ static inline void bpf_cgroup_storage_free(  #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })  #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) +#define for_each_cgroup_storage_type(stype) for (; false; ) +  #endif /* CONFIG_CGROUP_BPF */  #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 988a00797bcd..018299a595c8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -272,6 +272,14 @@ struct bpf_prog_offload {  	u32			jited_len;  }; +enum bpf_cgroup_storage_type { +	BPF_CGROUP_STORAGE_SHARED, +	BPF_CGROUP_STORAGE_PERCPU, +	__BPF_CGROUP_STORAGE_MAX +}; + +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +  struct bpf_prog_aux {  	atomic_t refcnt;  	u32 used_map_cnt; @@ -289,7 +297,7 @@ struct bpf_prog_aux {  	struct bpf_prog *prog;  	struct user_struct *user;  	u64 load_time; /* ns since boottime */ -	struct bpf_map *cgroup_storage; +	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];  	char name[BPF_OBJ_NAME_LEN];  #ifdef CONFIG_SECURITY  	void *security; @@ -358,7 +366,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,   */  struct bpf_prog_array_item {  	struct bpf_prog *prog; -	struct bpf_cgroup_storage *cgroup_storage; +	struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];  };  struct bpf_prog_array { diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c9bd6fb765b0..5432f4c9f50e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)  #endif  #ifdef CONFIG_CGROUP_BPF  BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)  #endif  BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)  BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa5ccd2385ed..e2070d819e04 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -127,6 +127,7 @@ enum bpf_map_type {  	BPF_MAP_TYPE_SOCKHASH,  	BPF_MAP_TYPE_CGROUP_STORAGE,  	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, +	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,  };  enum bpf_prog_type { diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 549f6fbcc461..00f6ed2e4f9a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);   */  void cgroup_bpf_put(struct cgroup *cgrp)  { +	enum bpf_cgroup_storage_type stype;  	unsigned int type;  	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { @@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)  		list_for_each_entry_safe(pl, tmp, progs, node) {  			list_del(&pl->node);  			bpf_prog_put(pl->prog); -			bpf_cgroup_storage_unlink(pl->storage); -			bpf_cgroup_storage_free(pl->storage); +			for_each_cgroup_storage_type(stype) { +				bpf_cgroup_storage_unlink(pl->storage[stype]); +				bpf_cgroup_storage_free(pl->storage[stype]); +			}  			kfree(pl);  			static_branch_dec(&cgroup_bpf_enabled_key);  		} @@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,  				   enum bpf_attach_type type,  				   struct bpf_prog_array __rcu **array)  { +	enum bpf_cgroup_storage_type stype;  	struct bpf_prog_array *progs;  	struct bpf_prog_list *pl;  	struct cgroup *p = cgrp; @@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,  				continue;  			progs->items[cnt].prog = pl->prog; -			progs->items[cnt].cgroup_storage = pl->storage; +			for_each_cgroup_storage_type(stype) +				progs->items[cnt].cgroup_storage[stype] = +					pl->storage[stype];  			cnt++;  		}  	} while ((p = cgroup_parent(p))); @@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,  {  	struct list_head *progs = &cgrp->bpf.progs[type];  	struct bpf_prog *old_prog = NULL; -	struct bpf_cgroup_storage *storage, *old_storage = NULL; +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], +		*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; +	enum bpf_cgroup_storage_type stype;  	struct bpf_prog_list *pl;  	bool pl_was_allocated;  	int err; @@ -254,34 +262,44 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,  	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)  		return -E2BIG; -	storage = bpf_cgroup_storage_alloc(prog); -	if (IS_ERR(storage)) -		return -ENOMEM; +	for_each_cgroup_storage_type(stype) { +		storage[stype] = bpf_cgroup_storage_alloc(prog, stype); +		if (IS_ERR(storage[stype])) { +			storage[stype] = NULL; +			for_each_cgroup_storage_type(stype) +				bpf_cgroup_storage_free(storage[stype]); +			return -ENOMEM; +		} +	}  	if (flags & BPF_F_ALLOW_MULTI) {  		list_for_each_entry(pl, progs, node) {  			if (pl->prog == prog) {  				/* disallow attaching the same prog twice */ -				bpf_cgroup_storage_free(storage); +				for_each_cgroup_storage_type(stype) +					bpf_cgroup_storage_free(storage[stype]);  				return -EINVAL;  			}  		}  		pl = kmalloc(sizeof(*pl), GFP_KERNEL);  		if (!pl) { -			bpf_cgroup_storage_free(storage); +			for_each_cgroup_storage_type(stype) +				bpf_cgroup_storage_free(storage[stype]);  			return -ENOMEM;  		}  		pl_was_allocated = true;  		pl->prog = prog; -		pl->storage = storage; +		for_each_cgroup_storage_type(stype) +			pl->storage[stype] = storage[stype];  		list_add_tail(&pl->node, progs);  	} else {  		if (list_empty(progs)) {  			pl = kmalloc(sizeof(*pl), GFP_KERNEL);  			if (!pl) { -				bpf_cgroup_storage_free(storage); +				for_each_cgroup_storage_type(stype) +					bpf_cgroup_storage_free(storage[stype]);  				return -ENOMEM;  			}  			pl_was_allocated = true; @@ -289,12 +307,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,  		} else {  			pl = list_first_entry(progs, typeof(*pl), node);  			old_prog = pl->prog; -			old_storage = pl->storage; -			bpf_cgroup_storage_unlink(old_storage); +			for_each_cgroup_storage_type(stype) { +				old_storage[stype] = pl->storage[stype]; +				bpf_cgroup_storage_unlink(old_storage[stype]); +			}  			pl_was_allocated = false;  		}  		pl->prog = prog; -		pl->storage = storage; +		for_each_cgroup_storage_type(stype) +			pl->storage[stype] = storage[stype];  	}  	cgrp->bpf.flags[type] = flags; @@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,  		goto cleanup;  	static_branch_inc(&cgroup_bpf_enabled_key); -	if (old_storage) -		bpf_cgroup_storage_free(old_storage); +	for_each_cgroup_storage_type(stype) { +		if (!old_storage[stype]) +			continue; +		bpf_cgroup_storage_free(old_storage[stype]); +	}  	if (old_prog) {  		bpf_prog_put(old_prog);  		static_branch_dec(&cgroup_bpf_enabled_key);  	} -	bpf_cgroup_storage_link(storage, cgrp, type); +	for_each_cgroup_storage_type(stype) +		bpf_cgroup_storage_link(storage[stype], cgrp, type);  	return 0;  cleanup:  	/* and cleanup the prog list */  	pl->prog = old_prog; -	bpf_cgroup_storage_free(pl->storage); -	pl->storage = old_storage; -	bpf_cgroup_storage_link(old_storage, cgrp, type); +	for_each_cgroup_storage_type(stype) { +		bpf_cgroup_storage_free(pl->storage[stype]); +		pl->storage[stype] = old_storage[stype]; +		bpf_cgroup_storage_link(old_storage[stype], cgrp, type); +	}  	if (pl_was_allocated) {  		list_del(&pl->node);  		kfree(pl); @@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,  			enum bpf_attach_type type, u32 unused_flags)  {  	struct list_head *progs = &cgrp->bpf.progs[type]; +	enum bpf_cgroup_storage_type stype;  	u32 flags = cgrp->bpf.flags[type];  	struct bpf_prog *old_prog = NULL;  	struct bpf_prog_list *pl; @@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,  	/* now can actually delete it from this cgroup list */  	list_del(&pl->node); -	bpf_cgroup_storage_unlink(pl->storage); -	bpf_cgroup_storage_free(pl->storage); +	for_each_cgroup_storage_type(stype) { +		bpf_cgroup_storage_unlink(pl->storage[stype]); +		bpf_cgroup_storage_free(pl->storage[stype]); +	}  	kfree(pl);  	if (list_empty(progs))  		/* last program was detached, reset flags to zero */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1991466b8327..6502115e8f55 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -194,16 +194,28 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {  	.ret_type	= RET_INTEGER,  }; -DECLARE_PER_CPU(void*, bpf_cgroup_storage); +#ifdef CONFIG_CGROUP_BPF +DECLARE_PER_CPU(struct bpf_cgroup_storage*, +		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);  BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)  { -	/* map and flags arguments are not used now, -	 * but provide an ability to extend the API -	 * for other types of local storages. -	 * verifier checks that their values are correct. +	/* flags argument is not used now, +	 * but provides an ability to extend the API. +	 * verifier checks that its value is correct.  	 */ -	return (unsigned long) this_cpu_read(bpf_cgroup_storage); +	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); +	struct bpf_cgroup_storage *storage; +	void *ptr; + +	storage = this_cpu_read(bpf_cgroup_storage[stype]); + +	if (stype == BPF_CGROUP_STORAGE_SHARED) +		ptr = &READ_ONCE(storage->buf)->data[0]; +	else +		ptr = this_cpu_ptr(storage->percpu_buf); + +	return (unsigned long)ptr;  }  const struct bpf_func_proto bpf_get_local_storage_proto = { @@ -214,3 +226,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {  	.arg2_type	= ARG_ANYTHING,  };  #endif +#endif diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 22ad967d1e5f..944eb297465f 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -7,7 +7,8 @@  #include <linux/rbtree.h>  #include <linux/slab.h> -DEFINE_PER_CPU(void*, bpf_cgroup_storage); +DEFINE_PER_CPU(struct bpf_cgroup_storage*, +	       bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);  #ifdef CONFIG_CGROUP_BPF @@ -151,6 +152,71 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,  	return 0;  } +int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key, +				   void *value) +{ +	struct bpf_cgroup_storage_map *map = map_to_storage(_map); +	struct bpf_cgroup_storage_key *key = _key; +	struct bpf_cgroup_storage *storage; +	int cpu, off = 0; +	u32 size; + +	rcu_read_lock(); +	storage = cgroup_storage_lookup(map, key, false); +	if (!storage) { +		rcu_read_unlock(); +		return -ENOENT; +	} + +	/* per_cpu areas are zero-filled and bpf programs can only +	 * access 'value_size' of them, so copying rounded areas +	 * will not leak any kernel data +	 */ +	size = round_up(_map->value_size, 8); +	for_each_possible_cpu(cpu) { +		bpf_long_memcpy(value + off, +				per_cpu_ptr(storage->percpu_buf, cpu), size); +		off += size; +	} +	rcu_read_unlock(); +	return 0; +} + +int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key, +				     void *value, u64 map_flags) +{ +	struct bpf_cgroup_storage_map *map = map_to_storage(_map); +	struct bpf_cgroup_storage_key *key = _key; +	struct bpf_cgroup_storage *storage; +	int cpu, off = 0; +	u32 size; + +	if (map_flags != BPF_ANY && map_flags != BPF_EXIST) +		return -EINVAL; + +	rcu_read_lock(); +	storage = cgroup_storage_lookup(map, key, false); +	if (!storage) { +		rcu_read_unlock(); +		return -ENOENT; +	} + +	/* the user space will provide round_up(value_size, 8) bytes that +	 * will be copied into per-cpu area. bpf programs can only access +	 * value_size of it. During lookup the same extra bytes will be +	 * returned or zeros which were zero-filled by percpu_alloc, +	 * so no kernel data leaks possible +	 */ +	size = round_up(_map->value_size, 8); +	for_each_possible_cpu(cpu) { +		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), +				value + off, size); +		off += size; +	} +	rcu_read_unlock(); +	return 0; +} +  static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,  				       void *_next_key)  { @@ -251,6 +317,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {  int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)  { +	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);  	struct bpf_cgroup_storage_map *map = map_to_storage(_map);  	int ret = -EBUSY; @@ -258,11 +325,12 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)  	if (map->prog && map->prog != prog)  		goto unlock; -	if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map) +	if (prog->aux->cgroup_storage[stype] && +	    prog->aux->cgroup_storage[stype] != _map)  		goto unlock;  	map->prog = prog; -	prog->aux->cgroup_storage = _map; +	prog->aux->cgroup_storage[stype] = _map;  	ret = 0;  unlock:  	spin_unlock_bh(&map->lock); @@ -272,70 +340,117 @@ unlock:  void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)  { +	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);  	struct bpf_cgroup_storage_map *map = map_to_storage(_map);  	spin_lock_bh(&map->lock);  	if (map->prog == prog) { -		WARN_ON(prog->aux->cgroup_storage != _map); +		WARN_ON(prog->aux->cgroup_storage[stype] != _map);  		map->prog = NULL; -		prog->aux->cgroup_storage = NULL; +		prog->aux->cgroup_storage[stype] = NULL;  	}  	spin_unlock_bh(&map->lock);  } -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog) +static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) +{ +	size_t size; + +	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) { +		size = sizeof(struct bpf_storage_buffer) + map->value_size; +		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size, +				  PAGE_SIZE) >> PAGE_SHIFT; +	} else { +		size = map->value_size; +		*pages = round_up(round_up(size, 8) * num_possible_cpus(), +				  PAGE_SIZE) >> PAGE_SHIFT; +	} + +	return size; +} + +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, +					enum bpf_cgroup_storage_type stype)  {  	struct bpf_cgroup_storage *storage;  	struct bpf_map *map; +	gfp_t flags; +	size_t size;  	u32 pages; -	map = prog->aux->cgroup_storage; +	map = prog->aux->cgroup_storage[stype];  	if (!map)  		return NULL; -	pages = round_up(sizeof(struct bpf_cgroup_storage) + -			 sizeof(struct bpf_storage_buffer) + -			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT; +	size = bpf_cgroup_storage_calculate_size(map, &pages); +  	if (bpf_map_charge_memlock(map, pages))  		return ERR_PTR(-EPERM);  	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),  			       __GFP_ZERO | GFP_USER, map->numa_node); -	if (!storage) { -		bpf_map_uncharge_memlock(map, pages); -		return ERR_PTR(-ENOMEM); -	} +	if (!storage) +		goto enomem; -	storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) + -				    map->value_size, __GFP_ZERO | GFP_USER, -				    map->numa_node); -	if (!storage->buf) { -		bpf_map_uncharge_memlock(map, pages); -		kfree(storage); -		return ERR_PTR(-ENOMEM); +	flags = __GFP_ZERO | GFP_USER; + +	if (stype == BPF_CGROUP_STORAGE_SHARED) { +		storage->buf = kmalloc_node(size, flags, map->numa_node); +		if (!storage->buf) +			goto enomem; +	} else { +		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); +		if (!storage->percpu_buf) +			goto enomem;  	}  	storage->map = (struct bpf_cgroup_storage_map *)map;  	return storage; + +enomem: +	bpf_map_uncharge_memlock(map, pages); +	kfree(storage); +	return ERR_PTR(-ENOMEM); +} + +static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu) +{ +	struct bpf_cgroup_storage *storage = +		container_of(rcu, struct bpf_cgroup_storage, rcu); + +	kfree(storage->buf); +	kfree(storage); +} + +static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu) +{ +	struct bpf_cgroup_storage *storage = +		container_of(rcu, struct bpf_cgroup_storage, rcu); + +	free_percpu(storage->percpu_buf); +	kfree(storage);  }  void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)  { -	u32 pages; +	enum bpf_cgroup_storage_type stype;  	struct bpf_map *map; +	u32 pages;  	if (!storage)  		return;  	map = &storage->map->map; -	pages = round_up(sizeof(struct bpf_cgroup_storage) + -			 sizeof(struct bpf_storage_buffer) + -			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + +	bpf_cgroup_storage_calculate_size(map, &pages);  	bpf_map_uncharge_memlock(map, pages); -	kfree_rcu(storage->buf, rcu); -	kfree_rcu(storage, rcu); +	stype = cgroup_storage_type(map); +	if (stype == BPF_CGROUP_STORAGE_SHARED) +		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu); +	else +		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);  }  void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 3bfbf4464416..99d243e1ad6e 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -24,7 +24,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)  	 * in the verifier is not enough.  	 */  	if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || -	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE) { +	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || +	    inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {  		fdput(f);  		return ERR_PTR(-ENOTSUPP);  	} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b3c2d09bcf7a..5742df21598c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -686,7 +686,8 @@ static int map_lookup_elem(union bpf_attr *attr)  	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||  	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || -	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) +	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || +	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)  		value_size = round_up(map->value_size, 8) * num_possible_cpus();  	else if (IS_FD_MAP(map))  		value_size = sizeof(u32); @@ -705,6 +706,8 @@ static int map_lookup_elem(union bpf_attr *attr)  		err = bpf_percpu_hash_copy(map, key, value);  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {  		err = bpf_percpu_array_copy(map, key, value); +	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { +		err = bpf_percpu_cgroup_storage_copy(map, key, value);  	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {  		err = bpf_stackmap_copy(map, key, value);  	} else if (IS_FD_ARRAY(map)) { @@ -774,7 +777,8 @@ static int map_update_elem(union bpf_attr *attr)  	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||  	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || -	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) +	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || +	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)  		value_size = round_up(map->value_size, 8) * num_possible_cpus();  	else  		value_size = map->value_size; @@ -809,6 +813,9 @@ static int map_update_elem(union bpf_attr *attr)  		err = bpf_percpu_hash_update(map, key, value, attr->flags);  	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {  		err = bpf_percpu_array_update(map, key, value, attr->flags); +	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { +		err = bpf_percpu_cgroup_storage_update(map, key, value, +						       attr->flags);  	} else if (IS_FD_ARRAY(map)) {  		rcu_read_lock();  		err = bpf_fd_array_map_update_elem(map, f.file, key, value, @@ -988,10 +995,15 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)  /* drop refcnt on maps used by eBPF program and free auxilary data */  static void free_used_maps(struct bpf_prog_aux *aux)  { +	enum bpf_cgroup_storage_type stype;  	int i; -	if (aux->cgroup_storage) -		bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage); +	for_each_cgroup_storage_type(stype) { +		if (!aux->cgroup_storage[stype]) +			continue; +		bpf_cgroup_storage_release(aux->prog, +					   aux->cgroup_storage[stype]); +	}  	for (i = 0; i < aux->used_map_cnt; i++)  		bpf_map_put(aux->used_maps[i]); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e986518d7bc3..a8cc83a970d1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2074,6 +2074,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,  			goto error;  		break;  	case BPF_MAP_TYPE_CGROUP_STORAGE: +	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:  		if (func_id != BPF_FUNC_get_local_storage)  			goto error;  		break; @@ -2164,7 +2165,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,  			goto error;  		break;  	case BPF_FUNC_get_local_storage: -		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) +		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && +		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)  			goto error;  		break;  	case BPF_FUNC_sk_select_reuseport: @@ -5049,6 +5051,12 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,  	return 0;  } +static bool bpf_map_is_cgroup_storage(struct bpf_map *map) +{ +	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || +		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); +} +  /* look for pseudo eBPF instructions that access map FDs and   * replace them with actual map pointers   */ @@ -5139,10 +5147,9 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)  			}  			env->used_maps[env->used_map_cnt++] = map; -			if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE && +			if (bpf_map_is_cgroup_storage(map) &&  			    bpf_cgroup_storage_assign(env->prog, map)) { -				verbose(env, -					"only one cgroup storage is allowed\n"); +				verbose(env, "only one cgroup storage of each type is allowed\n");  				fdput(f);  				return -EBUSY;  			} @@ -5171,11 +5178,15 @@ next_insn:  /* drop refcnt of maps used by the rejected program */  static void release_maps(struct bpf_verifier_env *env)  { +	enum bpf_cgroup_storage_type stype;  	int i; -	if (env->prog->aux->cgroup_storage) +	for_each_cgroup_storage_type(stype) { +		if (!env->prog->aux->cgroup_storage[stype]) +			continue;  		bpf_cgroup_storage_release(env->prog, -					   env->prog->aux->cgroup_storage); +			env->prog->aux->cgroup_storage[stype]); +	}  	for (i = 0; i < env->used_map_cnt; i++)  		bpf_map_put(env->used_maps[i]); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index f4078830ea50..0c423b8cd75c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -12,7 +12,7 @@  #include <linux/sched/signal.h>  static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, -					    struct bpf_cgroup_storage *storage) +		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])  {  	u32 ret; @@ -28,13 +28,20 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,  static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)  { -	struct bpf_cgroup_storage *storage = NULL; +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 }; +	enum bpf_cgroup_storage_type stype;  	u64 time_start, time_spent = 0;  	u32 ret = 0, i; -	storage = bpf_cgroup_storage_alloc(prog); -	if (IS_ERR(storage)) -		return PTR_ERR(storage); +	for_each_cgroup_storage_type(stype) { +		storage[stype] = bpf_cgroup_storage_alloc(prog, stype); +		if (IS_ERR(storage[stype])) { +			storage[stype] = NULL; +			for_each_cgroup_storage_type(stype) +				bpf_cgroup_storage_free(storage[stype]); +			return -ENOMEM; +		} +	}  	if (!repeat)  		repeat = 1; @@ -53,7 +60,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)  	do_div(time_spent, repeat);  	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; -	bpf_cgroup_storage_free(storage); +	for_each_cgroup_storage_type(stype) +		bpf_cgroup_storage_free(storage[stype]);  	return ret;  } diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 180f9d813bca..d7b68ef5ba79 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -209,7 +209,7 @@ static int map_fd = -1;  static int prog_load_cnt(int verdict, int val)  { -	int cgroup_storage_fd; +	int cgroup_storage_fd, percpu_cgroup_storage_fd;  	if (map_fd < 0)  		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); @@ -225,6 +225,14 @@ static int prog_load_cnt(int verdict, int val)  		return -1;  	} +	percpu_cgroup_storage_fd = bpf_create_map( +		BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, +		sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); +	if (percpu_cgroup_storage_fd < 0) { +		printf("failed to create map '%s'\n", strerror(errno)); +		return -1; +	} +  	struct bpf_insn prog[] = {  		BPF_MOV32_IMM(BPF_REG_0, 0),  		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ @@ -235,11 +243,20 @@ static int prog_load_cnt(int verdict, int val)  		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),  		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */  		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ +  		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),  		BPF_MOV64_IMM(BPF_REG_2, 0),  		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),  		BPF_MOV64_IMM(BPF_REG_1, val),  		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), + +		BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), +		BPF_MOV64_IMM(BPF_REG_2, 0), +		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), +		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), +		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), +		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), +  		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */  		BPF_EXIT_INSN(),  	}; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e22fbe8b975f..6003e9598973 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -72,13 +72,15 @@ static const char * const map_type_name[] = {  	[BPF_MAP_TYPE_SOCKHASH]		= "sockhash",  	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",  	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", +	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",  };  static bool map_is_per_cpu(__u32 type)  {  	return type == BPF_MAP_TYPE_PERCPU_HASH ||  	       type == BPF_MAP_TYPE_PERCPU_ARRAY || -	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH || +	       type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;  }  static bool map_is_map_of_maps(__u32 type) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index aa5ccd2385ed..e2070d819e04 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -127,6 +127,7 @@ enum bpf_map_type {  	BPF_MAP_TYPE_SOCKHASH,  	BPF_MAP_TYPE_CGROUP_STORAGE,  	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, +	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,  };  enum bpf_prog_type { diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 059d64a0f897..f802de526f57 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,7 +23,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \  	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \  	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ -	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names +	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ +	test_netcnt  TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \  	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \ @@ -35,7 +36,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test  	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \  	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \  	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ -	test_skb_cgroup_id_kern.o bpf_flow.o +	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o  # Order correspond to 'make run_tests' order  TEST_PROGS := test_kmod.sh \ @@ -72,6 +73,7 @@ $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c  $(OUTPUT)/test_progs: trace_helpers.c  $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c  $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c +$(OUTPUT)/test_netcnt: cgroup_helpers.c  .PHONY: force diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h new file mode 100644 index 000000000000..81084c1c2c23 --- /dev/null +++ b/tools/testing/selftests/bpf/netcnt_common.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __NETCNT_COMMON_H +#define __NETCNT_COMMON_H + +#include <linux/types.h> + +#define MAX_PERCPU_PACKETS 32 + +struct percpu_net_cnt { +	__u64 packets; +	__u64 bytes; + +	__u64 prev_ts; + +	__u64 prev_packets; +	__u64 prev_bytes; +}; + +struct net_cnt { +	__u64 packets; +	__u64 bytes; +}; + +#endif diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c new file mode 100644 index 000000000000..1198abca1360 --- /dev/null +++ b/tools/testing/selftests/bpf/netcnt_prog.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/version.h> + +#include "bpf_helpers.h" +#include "netcnt_common.h" + +#define MAX_BPS	(3 * 1024 * 1024) + +#define REFRESH_TIME_NS	100000000 +#define NS_PER_SEC	1000000000 + +struct bpf_map_def SEC("maps") percpu_netcnt = { +	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, +	.key_size = sizeof(struct bpf_cgroup_storage_key), +	.value_size = sizeof(struct percpu_net_cnt), +}; + +struct bpf_map_def SEC("maps") netcnt = { +	.type = BPF_MAP_TYPE_CGROUP_STORAGE, +	.key_size = sizeof(struct bpf_cgroup_storage_key), +	.value_size = sizeof(struct net_cnt), +}; + +SEC("cgroup/skb") +int bpf_nextcnt(struct __sk_buff *skb) +{ +	struct percpu_net_cnt *percpu_cnt; +	char fmt[] = "%d %llu %llu\n"; +	struct net_cnt *cnt; +	__u64 ts, dt; +	int ret; + +	cnt = bpf_get_local_storage(&netcnt, 0); +	percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0); + +	percpu_cnt->packets++; +	percpu_cnt->bytes += skb->len; + +	if (percpu_cnt->packets > MAX_PERCPU_PACKETS) { +		__sync_fetch_and_add(&cnt->packets, +				     percpu_cnt->packets); +		percpu_cnt->packets = 0; + +		__sync_fetch_and_add(&cnt->bytes, +				     percpu_cnt->bytes); +		percpu_cnt->bytes = 0; +	} + +	ts = bpf_ktime_get_ns(); +	dt = ts - percpu_cnt->prev_ts; + +	dt *= MAX_BPS; +	dt /= NS_PER_SEC; + +	if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt) +		ret = 1; +	else +		ret = 0; + +	if (dt > REFRESH_TIME_NS) { +		percpu_cnt->prev_ts = ts; +		percpu_cnt->prev_packets = cnt->packets; +		percpu_cnt->prev_bytes = cnt->bytes; +	} + +	return !!ret; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 4e196e3bfecf..f44834155f25 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -4,6 +4,7 @@  #include <linux/filter.h>  #include <stdio.h>  #include <stdlib.h> +#include <sys/sysinfo.h>  #include "bpf_rlimit.h"  #include "cgroup_helpers.h" @@ -15,6 +16,14 @@ char bpf_log_buf[BPF_LOG_BUF_SIZE];  int main(int argc, char **argv)  {  	struct bpf_insn prog[] = { +		BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */ +		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ +		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +			     BPF_FUNC_get_local_storage), +		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), +		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), +		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), +  		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */  		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */  		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, @@ -28,9 +37,18 @@ int main(int argc, char **argv)  	};  	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);  	int error = EXIT_FAILURE; -	int map_fd, prog_fd, cgroup_fd; +	int map_fd, percpu_map_fd, prog_fd, cgroup_fd;  	struct bpf_cgroup_storage_key key;  	unsigned long long value; +	unsigned long long *percpu_value; +	int cpu, nproc; + +	nproc = get_nprocs_conf(); +	percpu_value = malloc(sizeof(*percpu_value) * nproc); +	if (!percpu_value) { +		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); +		goto err; +	}  	map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),  				sizeof(value), 0, 0); @@ -39,7 +57,15 @@ int main(int argc, char **argv)  		goto out;  	} -	prog[0].imm = map_fd; +	percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, +				       sizeof(key), sizeof(value), 0, 0); +	if (percpu_map_fd < 0) { +		printf("Failed to create map: %s\n", strerror(errno)); +		goto out; +	} + +	prog[0].imm = percpu_map_fd; +	prog[7].imm = map_fd;  	prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,  				   prog, insns_cnt, "GPL", 0,  				   bpf_log_buf, BPF_LOG_BUF_SIZE); @@ -77,7 +103,15 @@ int main(int argc, char **argv)  	}  	if (bpf_map_lookup_elem(map_fd, &key, &value)) { -		printf("Failed to lookup cgroup storage\n"); +		printf("Failed to lookup cgroup storage 0\n"); +		goto err; +	} + +	for (cpu = 0; cpu < nproc; cpu++) +		percpu_value[cpu] = 1000; + +	if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) { +		printf("Failed to update the data in the cgroup storage\n");  		goto err;  	} @@ -120,11 +154,31 @@ int main(int argc, char **argv)  		goto err;  	} +	/* Check the final value of the counter in the percpu local storage */ + +	for (cpu = 0; cpu < nproc; cpu++) +		percpu_value[cpu] = 0; + +	if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) { +		printf("Failed to lookup the per-cpu cgroup storage\n"); +		goto err; +	} + +	value = 0; +	for (cpu = 0; cpu < nproc; cpu++) +		value += percpu_value[cpu]; + +	if (value != nproc * 1000 + 6) { +		printf("Unexpected data in the per-cpu cgroup storage\n"); +		goto err; +	} +  	error = 0;  	printf("test_cgroup_storage:PASS\n");  err:  	cleanup_cgroup_environment(); +	free(percpu_value);  out:  	return error; diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c new file mode 100644 index 000000000000..7887df693399 --- /dev/null +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/sysinfo.h> +#include <sys/time.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" +#include "netcnt_common.h" + +#define BPF_PROG "./netcnt_prog.o" +#define TEST_CGROUP "/test-network-counters/" + +static int bpf_find_map(const char *test, struct bpf_object *obj, +			const char *name) +{ +	struct bpf_map *map; + +	map = bpf_object__find_map_by_name(obj, name); +	if (!map) { +		printf("%s:FAIL:map '%s' not found\n", test, name); +		return -1; +	} +	return bpf_map__fd(map); +} + +int main(int argc, char **argv) +{ +	struct percpu_net_cnt *percpu_netcnt; +	struct bpf_cgroup_storage_key key; +	int map_fd, percpu_map_fd; +	int error = EXIT_FAILURE; +	struct net_cnt netcnt; +	struct bpf_object *obj; +	int prog_fd, cgroup_fd; +	unsigned long packets; +	unsigned long bytes; +	int cpu, nproc; +	__u32 prog_cnt; + +	nproc = get_nprocs_conf(); +	percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); +	if (!percpu_netcnt) { +		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); +		goto err; +	} + +	if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB, +			  &obj, &prog_fd)) { +		printf("Failed to load bpf program\n"); +		goto out; +	} + +	if (setup_cgroup_environment()) { +		printf("Failed to load bpf program\n"); +		goto err; +	} + +	/* Create a cgroup, get fd, and join it */ +	cgroup_fd = create_and_get_cgroup(TEST_CGROUP); +	if (!cgroup_fd) { +		printf("Failed to create test cgroup\n"); +		goto err; +	} + +	if (join_cgroup(TEST_CGROUP)) { +		printf("Failed to join cgroup\n"); +		goto err; +	} + +	/* Attach bpf program */ +	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { +		printf("Failed to attach bpf program"); +		goto err; +	} + +	assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0); + +	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, +			   &prog_cnt)) { +		printf("Failed to query attached programs"); +		goto err; +	} + +	map_fd = bpf_find_map(__func__, obj, "netcnt"); +	if (map_fd < 0) { +		printf("Failed to find bpf map with net counters"); +		goto err; +	} + +	percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt"); +	if (percpu_map_fd < 0) { +		printf("Failed to find bpf map with percpu net counters"); +		goto err; +	} + +	if (bpf_map_get_next_key(map_fd, NULL, &key)) { +		printf("Failed to get key in cgroup storage\n"); +		goto err; +	} + +	if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) { +		printf("Failed to lookup cgroup storage\n"); +		goto err; +	} + +	if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) { +		printf("Failed to lookup percpu cgroup storage\n"); +		goto err; +	} + +	/* Some packets can be still in per-cpu cache, but not more than +	 * MAX_PERCPU_PACKETS. +	 */ +	packets = netcnt.packets; +	bytes = netcnt.bytes; +	for (cpu = 0; cpu < nproc; cpu++) { +		if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) { +			printf("Unexpected percpu value: %llu\n", +			       percpu_netcnt[cpu].packets); +			goto err; +		} + +		packets += percpu_netcnt[cpu].packets; +		bytes += percpu_netcnt[cpu].bytes; +	} + +	/* No packets should be lost */ +	if (packets != 10000) { +		printf("Unexpected packet count: %lu\n", packets); +		goto err; +	} + +	/* Let's check that bytes counter matches the number of packets +	 * multiplied by the size of ipv6 ICMP packet. +	 */ +	if (bytes != packets * 104) { +		printf("Unexpected bytes count: %lu\n", bytes); +		goto err; +	} + +	error = 0; +	printf("test_netcnt:PASS\n"); + +err: +	cleanup_cgroup_environment(); +	free(percpu_netcnt); + +out: +	return error; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 67c412d19c09..c7d25f23baf9 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -68,6 +68,7 @@ struct bpf_test {  	int fixup_prog2[MAX_FIXUPS];  	int fixup_map_in_map[MAX_FIXUPS];  	int fixup_cgroup_storage[MAX_FIXUPS]; +	int fixup_percpu_cgroup_storage[MAX_FIXUPS];  	const char *errstr;  	const char *errstr_unpriv;  	uint32_t retval; @@ -4676,7 +4677,7 @@ static struct bpf_test tests[] = {  		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,  	},  	{ -		"invalid per-cgroup storage access 3", +		"invalid cgroup storage access 3",  		.insns = {  			BPF_MOV64_IMM(BPF_REG_2, 0),  			BPF_LD_MAP_FD(BPF_REG_1, 0), @@ -4744,6 +4745,121 @@ static struct bpf_test tests[] = {  		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,  	},  	{ +		"valid per-cpu cgroup storage access", +		.insns = { +			BPF_MOV64_IMM(BPF_REG_2, 0), +			BPF_LD_MAP_FD(BPF_REG_1, 0), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), +			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +			BPF_EXIT_INSN(), +		}, +		.fixup_percpu_cgroup_storage = { 1 }, +		.result = ACCEPT, +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{ +		"invalid per-cpu cgroup storage access 1", +		.insns = { +			BPF_MOV64_IMM(BPF_REG_2, 0), +			BPF_LD_MAP_FD(BPF_REG_1, 0), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), +			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +			BPF_EXIT_INSN(), +		}, +		.fixup_map1 = { 1 }, +		.result = REJECT, +		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage", +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{ +		"invalid per-cpu cgroup storage access 2", +		.insns = { +			BPF_MOV64_IMM(BPF_REG_2, 0), +			BPF_LD_MAP_FD(BPF_REG_1, 1), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +			BPF_EXIT_INSN(), +		}, +		.result = REJECT, +		.errstr = "fd 1 is not pointing to valid bpf_map", +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{ +		"invalid per-cpu cgroup storage access 3", +		.insns = { +			BPF_MOV64_IMM(BPF_REG_2, 0), +			BPF_LD_MAP_FD(BPF_REG_1, 0), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), +			BPF_MOV64_IMM(BPF_REG_0, 0), +			BPF_EXIT_INSN(), +		}, +		.fixup_percpu_cgroup_storage = { 1 }, +		.result = REJECT, +		.errstr = "invalid access to map value, value_size=64 off=256 size=4", +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{ +		"invalid per-cpu cgroup storage access 4", +		.insns = { +			BPF_MOV64_IMM(BPF_REG_2, 0), +			BPF_LD_MAP_FD(BPF_REG_1, 0), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), +			BPF_EXIT_INSN(), +		}, +		.fixup_cgroup_storage = { 1 }, +		.result = REJECT, +		.errstr = "invalid access to map value, value_size=64 off=-2 size=4", +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{ +		"invalid per-cpu cgroup storage access 5", +		.insns = { +			BPF_MOV64_IMM(BPF_REG_2, 7), +			BPF_LD_MAP_FD(BPF_REG_1, 0), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), +			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +			BPF_EXIT_INSN(), +		}, +		.fixup_percpu_cgroup_storage = { 1 }, +		.result = REJECT, +		.errstr = "get_local_storage() doesn't support non-zero flags", +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{ +		"invalid per-cpu cgroup storage access 6", +		.insns = { +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), +			BPF_LD_MAP_FD(BPF_REG_1, 0), +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, +				     BPF_FUNC_get_local_storage), +			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), +			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +			BPF_EXIT_INSN(), +		}, +		.fixup_percpu_cgroup_storage = { 1 }, +		.result = REJECT, +		.errstr = "get_local_storage() doesn't support non-zero flags", +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB, +	}, +	{  		"multiple registers share map_lookup_elem result",  		.insns = {  			BPF_MOV64_IMM(BPF_REG_1, 10), @@ -12615,15 +12731,17 @@ static int create_map_in_map(void)  	return outer_map_fd;  } -static int create_cgroup_storage(void) +static int create_cgroup_storage(bool percpu)  { +	enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE : +		BPF_MAP_TYPE_CGROUP_STORAGE;  	int fd; -	fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, -			    sizeof(struct bpf_cgroup_storage_key), +	fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),  			    TEST_DATA_LEN, 0, 0);  	if (fd < 0) -		printf("Failed to create array '%s'!\n", strerror(errno)); +		printf("Failed to create cgroup storage '%s'!\n", +		       strerror(errno));  	return fd;  } @@ -12641,6 +12759,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,  	int *fixup_prog2 = test->fixup_prog2;  	int *fixup_map_in_map = test->fixup_map_in_map;  	int *fixup_cgroup_storage = test->fixup_cgroup_storage; +	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;  	if (test->fill_helper)  		test->fill_helper(test); @@ -12710,12 +12829,20 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,  	}  	if (*fixup_cgroup_storage) { -		map_fds[7] = create_cgroup_storage(); +		map_fds[7] = create_cgroup_storage(false);  		do {  			prog[*fixup_cgroup_storage].imm = map_fds[7];  			fixup_cgroup_storage++;  		} while (*fixup_cgroup_storage);  	} + +	if (*fixup_percpu_cgroup_storage) { +		map_fds[8] = create_cgroup_storage(true); +		do { +			prog[*fixup_percpu_cgroup_storage].imm = map_fds[8]; +			fixup_percpu_cgroup_storage++; +		} while (*fixup_percpu_cgroup_storage); +	}  }  static void do_test_single(struct bpf_test *test, bool unpriv, | 
