diff options
author | David S. Miller <davem@davemloft.net> | 2018-08-07 11:02:05 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-08-07 11:02:05 -0700 |
commit | 1ba982806ce58baac44c52e6c7812245afad27c8 (patch) | |
tree | b0a8061a496c3d01eed36eadf22cea845a2a995b /kernel/bpf/local_storage.c | |
parent | c5d99d2b35dadebab2408bb10dcd50364eaaf9f4 (diff) | |
parent | 85fc4b16aaf05fc8978d242c556a1711dce15cf8 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2018-08-07
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Add cgroup local storage for BPF programs, which provides a fast
accessible memory for storing various per-cgroup data like number
of transmitted packets, etc, from Roman.
2) Support bpf_get_socket_cookie() BPF helper in several more program
types that have a full socket available, from Andrey.
3) Significantly improve the performance of perf events which are
reported from BPF offload. Also convert a couple of BPF AF_XDP
samples overto use libbpf, both from Jakub.
4) seg6local LWT provides the End.DT6 action, which allows to
decapsulate an outer IPv6 header containing a Segment Routing Header.
Adds this action now to the seg6local BPF interface, from Mathieu.
5) Do not mark dst register as unbounded in MOV64 instruction when
both src and dst register are the same, from Arthur.
6) Define u_smp_rmb() and u_smp_wmb() to their respective barrier
instructions on arm64 for the AF_XDP sample code, from Brian.
7) Convert the tcp_client.py and tcp_server.py BPF selftest scripts
over from Python 2 to Python 3, from Jeremy.
8) Enable BTF build flags to the BPF sample code Makefile, from Taeung.
9) Remove an unnecessary rcu_read_lock() in run_lwt_bpf(), from Taehee.
10) Several improvements to the README.rst from the BPF documentation
to make it more consistent with RST format, from Tobin.
11) Replace all occurrences of strerror() by calls to strerror_r()
in libbpf and fix a FORTIFY_SOURCE build error along with it,
from Thomas.
12) Fix a bug in bpftool's get_btf() function to correctly propagate
an error via PTR_ERR(), from Yue.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/local_storage.c')
-rw-r--r-- | kernel/bpf/local_storage.c | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c new file mode 100644 index 000000000000..fc4e37f68f2a --- /dev/null +++ b/kernel/bpf/local_storage.c @@ -0,0 +1,378 @@ +//SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf-cgroup.h> +#include <linux/bpf.h> +#include <linux/bug.h> +#include <linux/filter.h> +#include <linux/mm.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +DEFINE_PER_CPU(void*, bpf_cgroup_storage); + +#ifdef CONFIG_CGROUP_BPF + +#define LOCAL_STORAGE_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + +struct bpf_cgroup_storage_map { + struct bpf_map map; + + spinlock_t lock; + struct bpf_prog *prog; + struct rb_root root; + struct list_head list; +}; + +static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) +{ + return container_of(map, struct bpf_cgroup_storage_map, map); +} + +static int bpf_cgroup_storage_key_cmp( + const struct bpf_cgroup_storage_key *key1, + const struct bpf_cgroup_storage_key *key2) +{ + if (key1->cgroup_inode_id < key2->cgroup_inode_id) + return -1; + else if (key1->cgroup_inode_id > key2->cgroup_inode_id) + return 1; + else if (key1->attach_type < key2->attach_type) + return -1; + else if (key1->attach_type > key2->attach_type) + return 1; + return 0; +} + +static struct bpf_cgroup_storage *cgroup_storage_lookup( + struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, + bool locked) +{ + struct rb_root *root = &map->root; + struct rb_node *node; + + if (!locked) + spin_lock_bh(&map->lock); + + node = root->rb_node; + while (node) { + struct bpf_cgroup_storage *storage; + + storage = container_of(node, struct bpf_cgroup_storage, node); + + switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { + case -1: + node = node->rb_left; + break; + case 1: + node = node->rb_right; + break; + default: + if (!locked) + spin_unlock_bh(&map->lock); + return storage; + } + } + + if (!locked) + spin_unlock_bh(&map->lock); + + return NULL; +} + +static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, + struct bpf_cgroup_storage *storage) +{ + struct rb_root *root = &map->root; + struct rb_node **new = &(root->rb_node), *parent = NULL; + + while (*new) { + struct bpf_cgroup_storage *this; + + this = container_of(*new, struct bpf_cgroup_storage, node); + + parent = *new; + switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { + case -1: + new = &((*new)->rb_left); + break; + case 1: + new = &((*new)->rb_right); + break; + default: + return -EEXIST; + } + } + + rb_link_node(&storage->node, parent, new); + rb_insert_color(&storage->node, root); + + return 0; +} + +static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + + storage = cgroup_storage_lookup(map, key, false); + if (!storage) + return NULL; + + return &READ_ONCE(storage->buf)->data[0]; +} + +static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, + void *value, u64 flags) +{ + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + struct bpf_storage_buffer *new; + + if (flags & BPF_NOEXIST) + return -EINVAL; + + storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, + key, false); + if (!storage) + return -ENOENT; + + new = kmalloc_node(sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_USER, + map->numa_node); + if (!new) + return -ENOMEM; + + memcpy(&new->data[0], value, map->value_size); + + new = xchg(&storage->buf, new); + kfree_rcu(new, rcu); + + return 0; +} + +static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, + void *_next_key) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage_key *next = _next_key; + struct bpf_cgroup_storage *storage; + + spin_lock_bh(&map->lock); + + if (list_empty(&map->list)) + goto enoent; + + if (key) { + storage = cgroup_storage_lookup(map, key, true); + if (!storage) + goto enoent; + + storage = list_next_entry(storage, list); + if (!storage) + goto enoent; + } else { + storage = list_first_entry(&map->list, + struct bpf_cgroup_storage, list); + } + + spin_unlock_bh(&map->lock); + next->attach_type = storage->key.attach_type; + next->cgroup_inode_id = storage->key.cgroup_inode_id; + return 0; + +enoent: + spin_unlock_bh(&map->lock); + return -ENOENT; +} + +static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) +{ + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_cgroup_storage_map *map; + + if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) + return ERR_PTR(-EINVAL); + + if (attr->value_size > PAGE_SIZE) + return ERR_PTR(-E2BIG); + + if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) + /* reserved bits should not be used */ + return ERR_PTR(-EINVAL); + + if (attr->max_entries) + /* max_entries is not used and enforced to be 0 */ + return ERR_PTR(-EINVAL); + + map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), + __GFP_ZERO | GFP_USER, numa_node); + if (!map) + return ERR_PTR(-ENOMEM); + + map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), + PAGE_SIZE) >> PAGE_SHIFT; + + /* copy mandatory map attributes */ + bpf_map_init_from_attr(&map->map, attr); + + spin_lock_init(&map->lock); + map->root = RB_ROOT; + INIT_LIST_HEAD(&map->list); + + return &map->map; +} + +static void cgroup_storage_map_free(struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + + WARN_ON(!RB_EMPTY_ROOT(&map->root)); + WARN_ON(!list_empty(&map->list)); + + kfree(map); +} + +static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +const struct bpf_map_ops cgroup_storage_map_ops = { + .map_alloc = cgroup_storage_map_alloc, + .map_free = cgroup_storage_map_free, + .map_get_next_key = cgroup_storage_get_next_key, + .map_lookup_elem = cgroup_storage_lookup_elem, + .map_update_elem = cgroup_storage_update_elem, + .map_delete_elem = cgroup_storage_delete_elem, +}; + +int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + int ret = -EBUSY; + + spin_lock_bh(&map->lock); + + if (map->prog && map->prog != prog) + goto unlock; + if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map) + goto unlock; + + map->prog = prog; + prog->aux->cgroup_storage = _map; + ret = 0; +unlock: + spin_unlock_bh(&map->lock); + + return ret; +} + +void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + + spin_lock_bh(&map->lock); + if (map->prog == prog) { + WARN_ON(prog->aux->cgroup_storage != _map); + map->prog = NULL; + prog->aux->cgroup_storage = NULL; + } + spin_unlock_bh(&map->lock); +} + +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog) +{ + struct bpf_cgroup_storage *storage; + struct bpf_map *map; + u32 pages; + + map = prog->aux->cgroup_storage; + if (!map) + return NULL; + + pages = round_up(sizeof(struct bpf_cgroup_storage) + + sizeof(struct bpf_storage_buffer) + + map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + if (bpf_map_charge_memlock(map, pages)) + return ERR_PTR(-EPERM); + + storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), + __GFP_ZERO | GFP_USER, map->numa_node); + if (!storage) { + bpf_map_uncharge_memlock(map, pages); + return ERR_PTR(-ENOMEM); + } + + storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_USER, + map->numa_node); + if (!storage->buf) { + bpf_map_uncharge_memlock(map, pages); + kfree(storage); + return ERR_PTR(-ENOMEM); + } + + storage->map = (struct bpf_cgroup_storage_map *)map; + + return storage; +} + +void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) +{ + u32 pages; + struct bpf_map *map; + + if (!storage) + return; + + map = &storage->map->map; + pages = round_up(sizeof(struct bpf_cgroup_storage) + + sizeof(struct bpf_storage_buffer) + + map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + bpf_map_uncharge_memlock(map, pages); + + kfree_rcu(storage->buf, rcu); + kfree_rcu(storage, rcu); +} + +void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, + struct cgroup *cgroup, + enum bpf_attach_type type) +{ + struct bpf_cgroup_storage_map *map; + + if (!storage) + return; + + storage->key.attach_type = type; + storage->key.cgroup_inode_id = cgroup->kn->id.id; + + map = storage->map; + + spin_lock_bh(&map->lock); + WARN_ON(cgroup_storage_insert(map, storage)); + list_add(&storage->list, &map->list); + spin_unlock_bh(&map->lock); +} + +void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) +{ + struct bpf_cgroup_storage_map *map; + struct rb_root *root; + + if (!storage) + return; + + map = storage->map; + + spin_lock_bh(&map->lock); + root = &map->root; + rb_erase(&storage->node, root); + + list_del(&storage->list); + spin_unlock_bh(&map->lock); +} + +#endif |