diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:20 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:21 +0200 |
commit | f6ef56589374670b7c1939720dfa00212bd80a5b (patch) | |
tree | f8f5e66c8fba220b34783b0d38518192bc53bf39 /tools | |
parent | 6f5c39fa5cd4a78c5432021e981aa8f79437a32c (diff) | |
parent | 3bbe0869884ceebffd59d5519c1d560207c6e116 (diff) |
Merge branch 'bpf-raw-tracepoints'
Alexei Starovoitov says:
====================
v7->v8:
- moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map'
that increases memory overhead, but can be optimized/compressed later.
Now it's zero changes in tracepoint.[ch]
v6->v7:
- adopted Steven's bpf_raw_tp_map section approach to find tracepoint
and corresponding bpf probe function instead of kallsyms approach.
dropped kernel_tracepoint_find_by_name() patch
v5->v6:
- avoid changing semantics of for_each_kernel_tracepoint() function, instead
introduce kernel_tracepoint_find_by_name() helper
v4->v5:
- adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6
v3->v4:
- adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small
struct to u64. That nicely reduced the size of patch 1
v2->v3:
- with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros
(or rather moved them from apparmor)
that cleaned up patch 6
- added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4
Now any tracepoint with >12 args will have build error
v1->v2:
- simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into
bpf_raw_tp_open(name, prog_fd) as suggested by Daniel.
That simplifies bpf_detach as well which is now simple close() of fd.
- fixed memory leak in error path which was spotted by Daniel.
- fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints
- added more tests
- fixed allyesconfig build caught by buildbot
v1:
This patch set is a different way to address the pressing need to access
task_struct pointers in sched tracepoints from bpf programs.
The first approach simply added these pointers to sched tracepoints:
https://lkml.org/lkml/2017/12/14/753
which Peter nacked.
Few options were discussed and eventually the discussion converged on
doing bpf specific tracepoint_probe_register() probe functions.
Details here:
https://lkml.org/lkml/2017/12/20/929
Patch 1 is kernel wide cleanup of pass-struct-by-value into
pass-struct-by-reference into tracepoints.
Patches 2 and 3 are minor cleanups to address allyesconfig build
Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args
Patch 5 introduces COUNT_ARGS macro
Patch 6 introduces BPF_RAW_TRACEPOINT api.
the auto-cleanup and multiple concurrent users are must have
features of tracing api. For bpf raw tracepoints it looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint
// and attach bpf program to it
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);
Ctrl-C of tracing daemon or cmdline tool will automatically
detach bpf program, unload it and unregister tracepoint probe.
More details in patch 6.
Patch 7 - trivial support in libbpf
Patches 8, 9 - user space tests
samples/bpf/test_overhead performance on 1 cpu:
tracepoint base kprobe+bpf tracepoint+bpf raw_tracepoint+bpf
task_rename 1.1M 769K 947K 1.0M
urandom_read 789K 697K 750K 755K
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 11 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 11 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_progs.c | 91 |
4 files changed, 93 insertions, 21 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d245c41213ac..58060bec999d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_cmd { BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, }; enum bpf_map_type { @@ -134,6 +135,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, }; enum bpf_attach_type { @@ -344,6 +346,11 @@ union bpf_attr { __aligned_u64 prog_ids; __u32 prog_cnt; } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -1151,4 +1158,8 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 592a58a2b681..e0500055f1a6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -428,6 +428,17 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) return err; } +int bpf_raw_tracepoint_open(const char *name, int prog_fd) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.raw_tracepoint.name = ptr_to_u64(name); + attr.raw_tracepoint.prog_fd = prog_fd; + + return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); +} + int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) { struct sockaddr_nl sa; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8d18fb73d7fb..ee59342c6f42 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -79,4 +79,5 @@ int bpf_map_get_fd_by_id(__u32 id); int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); +int bpf_raw_tracepoint_open(const char *name, int prog_fd); #endif diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index e9df48b306df..faadbe233966 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -877,7 +877,7 @@ static void test_stacktrace_map() err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) - goto out; + return; /* Get the ID for the sched/sched_switch tracepoint */ snprintf(buf, sizeof(buf), @@ -888,8 +888,7 @@ static void test_stacktrace_map() bytes = read(efd, buf, sizeof(buf)); close(efd); - if (CHECK(bytes <= 0 || bytes >= sizeof(buf), - "read", "bytes %d errno %d\n", bytes, errno)) + if (bytes <= 0 || bytes >= sizeof(buf)) goto close_prog; /* Open the perf event and attach bpf progrram */ @@ -906,29 +905,24 @@ static void test_stacktrace_map() goto close_prog; err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); - if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", - err, errno)) - goto close_pmu; + if (err) + goto disable_pmu; err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); - if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", - err, errno)) + if (err) goto disable_pmu; /* find map fds */ control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", - "err %d errno %d\n", err, errno)) + if (control_map_fd < 0) goto disable_pmu; stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", - "err %d errno %d\n", err, errno)) + if (stackid_hmap_fd < 0) goto disable_pmu; stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", - err, errno)) + if (stackmap_fd < 0) goto disable_pmu; /* give some time for bpf program run */ @@ -945,24 +939,78 @@ static void test_stacktrace_map() err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto disable_pmu_noerr; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - ; /* fall through */ + goto disable_pmu_noerr; + goto disable_pmu_noerr; disable_pmu: + error_cnt++; +disable_pmu_noerr: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); - -close_pmu: close(pmu_fd); - close_prog: bpf_object__close(obj); +} -out: - return; +static void test_stacktrace_map_raw_tp() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int efd, err, prog_fd; + __u32 key, val, duration = 0; + struct bpf_object *obj; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sched_switch", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (control_map_fd < 0) + goto close_prog; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (stackid_hmap_fd < 0) + goto close_prog; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (stackmap_fd < 0) + goto close_prog; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto close_prog; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); } static int extract_build_id(char *build_id, size_t size) @@ -1138,6 +1186,7 @@ int main(void) test_tp_attach_query(); test_stacktrace_map(); test_stacktrace_build_id(); + test_stacktrace_map_raw_tp(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; |