diff options
| author | David S. Miller <davem@davemloft.net> | 2015-08-09 22:50:06 -0700 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-08-09 22:50:06 -0700 |
| commit | d74a790d5237e7f56677030d932bc4f37ec36c92 (patch) | |
| tree | 1393486ec4906557cdb1d9173e6e2e675bab3354 /kernel/bpf/arraymap.c | |
| parent | f1d5ca434413b20cd3f8c18ff2b634b7782149a5 (diff) | |
| parent | 47efb30274cbec1bd3c0c980a7ece328df2c16a8 (diff) | |
Merge branch 'bpf-perf'
Kaixu Xia says:
====================
bpf: Introduce the new ability of eBPF programs to access hardware PMU counter
This patchset is base on the net-next:
git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
commit 9dc20a649609c95ce7c5ac4282656ba627b67d49.
Previous patch v6 url:
https://lkml.org/lkml/2015/8/4/188
changes in V7:
- rebase the whole patch set to net-next tree(9dc20a64);
- split out the core perf APIs into Patch 1/5;
- change the return value of function perf_event_attrs()
from struct perf_event * to const struct perf_event * in
Patch 1/5;
- rename the function perf_event_read_internal() to perf_event_
read_local() and rewrite it in Patch 1/5;
- rename the function check_func_limit() to check_map_func
_compatibility() and remove the unnecessary pass pointer to
a pointer in Patch 4/5;
changes in V6:
- make the Patch 1/4 commit message more meaning and readable;
- remove the unnecessary comment in Patch 2/4 and make it clean;
- declare the function perf_event_release_kernel() in include/
linux/perf_event.h to fix the build error when CONFIG_PERF_EVENTS
isn't configured in Patch 2/4;
- add function perf_event_attrs() to get the struct perf_event_attr
in Patch 2/4.
- move the related code from kernel/trace/bpf_trace.c to kernel/
events/core.c and add function perf_event_read_internal() to
avoid poking inside of the event outside of perf code in Patch 3/4;
- generial the func & map match-pair with an array in Patch 3/4;
changes in V5:
- move struct fd_array_map_ops* fd_ops to bpf_map;
- move array perf event decrement refcnt function to
map_free;
- fix the NULL ptr of perf_event_get();
- move bpf_perf_event_read() to kernel/bpf/bpf_trace.c;
- get rid of the remaining struct bpf_prog;
- move the unnecessay cast on void *;
changes in V4:
- make the bpf_prog_array_map more generic;
- fix the bug of event refcnt leak;
- use more useful errno in bpf_perf_event_read();
changes in V3:
- collapse V2 patches 1-3 into one;
- drop the function map->ops->map_traverse_elem() and release
the struct perf_event in map_free;
- only allow to access bpf_perf_event_read() from programs;
- update the perf_event_array_map elem via xchg();
- pass index directly to bpf_perf_event_read() instead of
MAP_KEY;
changes in V2:
- put atomic_long_inc_not_zero() between fdget() and fdput();
- limit the event type to PERF_TYPE_RAW and PERF_TYPE_HARDWARE;
- Only read the event counter on current CPU or on current
process;
- add new map type BPF_MAP_TYPE_PERF_EVENT_ARRAY to store the
pointer to the struct perf_event;
- according to the perf_event_map_fd and key, the function
bpf_perf_event_read() can get the Hardware PMU counter value;
Patch 5/5 is a simple example and shows how to use this new eBPF
programs ability. The PMU counter data can be found in
/sys/kernel/debug/tracing/trace(trace_pipe).(the cycles PMU
value when 'kprobe/sys_write' sampling)
$ cat /sys/kernel/debug/tracing/trace_pipe
$ ./tracex6
...
syslog-ng-548 [000] d..1 76.905673: : CPU-0 681765271
syslog-ng-548 [000] d..1 76.905690: : CPU-0 681787855
syslog-ng-548 [000] d..1 76.905707: : CPU-0 681810504
syslog-ng-548 [000] d..1 76.905725: : CPU-0 681834771
syslog-ng-548 [000] d..1 76.905745: : CPU-0 681859519
syslog-ng-548 [000] d..1 76.905766: : CPU-0 681890419
syslog-ng-548 [000] d..1 76.905783: : CPU-0 681914045
syslog-ng-548 [000] d..1 76.905800: : CPU-0 681935950
syslog-ng-548 [000] d..1 76.905816: : CPU-0 681958299
ls-690 [005] d..1 82.241308: : CPU-5 3138451
sh-691 [004] d..1 82.244570: : CPU-4 7324988
<...>-699 [007] d..1 99.961387: : CPU-7 3194027
<...>-695 [003] d..1 99.961474: : CPU-3 288901
<...>-695 [003] d..1 99.961541: : CPU-3 383145
<...>-695 [003] d..1 99.961591: : CPU-3 450365
<...>-695 [003] d..1 99.961639: : CPU-3 515751
<...>-695 [003] d..1 99.961686: : CPU-3 579047
...
The detail of patches is as follow:
Patch 1/5 add the necessary core perf APIs perf_event_attrs(),
perf_event_get(),perf_event_read_local() when accessing events
counters in eBPF programs
Patch 2/5 rewrites part of the bpf_prog_array map code and make it
more generic;
Patch 3/5 introduces a new bpf map type. This map only stores the
pointer to struct perf_event;
Patch 4/5 implements function bpf_perf_event_read() that get the
selected hardware PMU conuter;
Patch 5/5 gives a simple example.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/arraymap.c')
| -rw-r--r-- | kernel/bpf/arraymap.c | 137 |
1 files changed, 106 insertions, 31 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cb31229a6fa4..29ace107f236 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -150,15 +150,15 @@ static int __init register_array_map(void) } late_initcall(register_array_map); -static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) { - /* only bpf_prog file descriptors can be stored in prog_array map */ + /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return ERR_PTR(-EINVAL); return array_map_alloc(attr); } -static void prog_array_map_free(struct bpf_map *map) +static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -167,21 +167,21 @@ static void prog_array_map_free(struct bpf_map *map) /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) - BUG_ON(array->prog[i] != NULL); + BUG_ON(array->ptrs[i] != NULL); kvfree(array); } -static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return NULL; } /* only called from syscall */ -static int prog_array_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static int fd_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *prog, *old_prog; + void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) @@ -191,57 +191,75 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key, return -E2BIG; ufd = *(u32 *)value; - prog = bpf_prog_get(ufd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - if (!bpf_prog_array_compatible(array, prog)) { - bpf_prog_put(prog); - return -EINVAL; - } + new_ptr = map->ops->map_fd_get_ptr(map, ufd); + if (IS_ERR(new_ptr)) + return PTR_ERR(new_ptr); - old_prog = xchg(array->prog + index, prog); - if (old_prog) - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, new_ptr); + if (old_ptr) + map->ops->map_fd_put_ptr(old_ptr); return 0; } -static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +static int fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *old_prog; + void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; - old_prog = xchg(array->prog + index, NULL); - if (old_prog) { - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, NULL); + if (old_ptr) { + map->ops->map_fd_put_ptr(old_ptr); return 0; } else { return -ENOENT; } } +static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog = bpf_prog_get(fd); + if (IS_ERR(prog)) + return prog; + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return ERR_PTR(-EINVAL); + } + return prog; +} + +static void prog_fd_array_put_ptr(void *ptr) +{ + struct bpf_prog *prog = ptr; + + bpf_prog_put_rcu(prog); +} + /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_prog_array_map_clear(struct bpf_map *map) +void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) - prog_array_map_delete_elem(map, &i); + fd_array_map_delete_elem(map, &i); } static const struct bpf_map_ops prog_array_ops = { - .map_alloc = prog_array_map_alloc, - .map_free = prog_array_map_free, + .map_alloc = fd_array_map_alloc, + .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, - .map_lookup_elem = prog_array_map_lookup_elem, - .map_update_elem = prog_array_map_update_elem, - .map_delete_elem = prog_array_map_delete_elem, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = prog_fd_array_get_ptr, + .map_fd_put_ptr = prog_fd_array_put_ptr, }; static struct bpf_map_type_list prog_array_type __read_mostly = { @@ -255,3 +273,60 @@ static int __init register_prog_array_map(void) return 0; } late_initcall(register_prog_array_map); + +static void perf_event_array_map_free(struct bpf_map *map) +{ + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct perf_event *event; + const struct perf_event_attr *attr; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return event; + + attr = perf_event_attrs(event); + if (IS_ERR(attr)) + return (void *)attr; + + if (attr->type != PERF_TYPE_RAW && + attr->type != PERF_TYPE_HARDWARE) { + perf_event_release_kernel(event); + return ERR_PTR(-EINVAL); + } + return event; +} + +static void perf_event_fd_array_put_ptr(void *ptr) +{ + struct perf_event *event = ptr; + + perf_event_release_kernel(event); +} + +static const struct bpf_map_ops perf_event_array_ops = { + .map_alloc = fd_array_map_alloc, + .map_free = perf_event_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = perf_event_fd_array_get_ptr, + .map_fd_put_ptr = perf_event_fd_array_put_ptr, +}; + +static struct bpf_map_type_list perf_event_array_type __read_mostly = { + .ops = &perf_event_array_ops, + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +}; + +static int __init register_perf_event_array_map(void) +{ + bpf_register_map_type(&perf_event_array_type); + return 0; +} +late_initcall(register_perf_event_array_map); |
