diff options
author | Alexei Starovoitov <ast@kernel.org> | 2020-05-01 10:36:32 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2020-05-01 10:37:34 -0700 |
commit | 3dbb5b5040c396274a253b607289748f39a680a0 (patch) | |
tree | 6b010c99c159d08a48680e709ca37e3fe3e17971 | |
parent | c321022244708aec4675de4f032ef1ba9ff0c640 (diff) | |
parent | 31a9f7fe93378ab587d758d5b2e96a237caa7b8c (diff) |
Merge branch 'bpf_enable_stats'
Song Liu says:
====================
run_time_ns is a useful stats for BPF programs. However, it is gated by
sysctl kernel.bpf_stats_enabled. When multiple user space tools are
toggling kernl.bpf_stats_enabled at the same time, they may confuse each
other.
Solve this problem with a new BPF command BPF_ENABLE_STATS.
Changes v8 => v9:
1. Clean up in selftest (Andrii).
2. Not using static variable in test program (Andrii).
Changes v7 => v8:
1. Change name BPF_STATS_RUNTIME_CNT => BPF_STATS_RUN_TIME (Alexei).
2. Add CHECK_ATTR to bpf_enable_stats() (Alexei).
3. Rebase (Andrii).
4. Simplfy the selftest (Alexei).
Changes v6 => v7:
1. Add test to verify run_cnt matches count measured by the program.
Changes v5 => v6:
1. Simplify test program (Yonghong).
2. Rebase (with some conflicts).
Changes v4 => v5:
1. Use memset to zero bpf_attr in bpf_enable_stats() (Andrii).
Changes v3 => v4:
1. Add libbpf support and selftest;
2. Avoid cleaning trailing space.
Changes v2 => v3:
1. Rename the command to BPF_ENABLE_STATS, and make it extendible.
2. fix commit log;
3. remove unnecessary headers.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/linux/bpf.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 11 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 57 | ||||
-rw-r--r-- | kernel/sysctl.c | 36 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 11 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 10 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 1 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/enable_stats.c | 45 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_enable_stats.c | 18 |
10 files changed, 190 insertions, 1 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c07b1d2f3824..1262ec460ab3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -987,6 +987,7 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern struct mutex bpf_stats_enabled_mutex; /* * Block execution of BPF programs attached to instrumentation (perf, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0eccafae55bb..705e4822f997 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c75b2dd2459c..4f34eecec9ce 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) return fd; } +DEFINE_MUTEX(bpf_stats_enabled_mutex); + +static int bpf_stats_release(struct inode *inode, struct file *file) +{ + mutex_lock(&bpf_stats_enabled_mutex); + static_key_slow_dec(&bpf_stats_enabled_key.key); + mutex_unlock(&bpf_stats_enabled_mutex); + return 0; +} + +static const struct file_operations bpf_stats_fops = { + .release = bpf_stats_release, +}; + +static int bpf_enable_runtime_stats(void) +{ + int fd; + + mutex_lock(&bpf_stats_enabled_mutex); + + /* Set a very high limit to avoid overflow */ + if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { + mutex_unlock(&bpf_stats_enabled_mutex); + return -EBUSY; + } + + fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); + if (fd >= 0) + static_key_slow_inc(&bpf_stats_enabled_key.key); + + mutex_unlock(&bpf_stats_enabled_mutex); + return fd; +} + +#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type + +static int bpf_enable_stats(union bpf_attr *attr) +{ + + if (CHECK_ATTR(BPF_ENABLE_STATS)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (attr->enable_stats.type) { + case BPF_STATS_RUN_TIME: + return bpf_enable_runtime_stats(); + default: + break; + } + return -EINVAL; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr; @@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_obj_get_next_id(&attr, uattr, &link_idr, &link_idr_lock); break; + case BPF_ENABLE_STATS: + err = bpf_enable_stats(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e961286d0e14..7adfe5dbce9d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000; #endif /* CONFIG_SYSCTL */ +#ifdef CONFIG_BPF_SYSCALL +static int bpf_stats_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct static_key *key = (struct static_key *)table->data; + static int saved_val; + int val, ret; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&bpf_stats_enabled_mutex); + val = saved_val; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret && val != saved_val) { + if (val) + static_key_slow_inc(key); + else + static_key_slow_dec(key); + saved_val = val; + } + mutex_unlock(&bpf_stats_enabled_mutex); + return ret; +} +#endif + /* * /proc/sys support */ @@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = { .data = &bpf_stats_enabled_key.key, .maxlen = sizeof(bpf_stats_enabled_key), .mode = 0644, - .proc_handler = proc_do_static_key, + .proc_handler = bpf_stats_handler, }, #endif #if defined(CONFIG_TREE_RCU) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0eccafae55bb..705e4822f997 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 8f2f0958d446..43322f0d6c7f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, return err; } + +int bpf_enable_stats(enum bpf_stats_type type) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.enable_stats.type = type; + + return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 335b457b3a25..1901b2777854 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); +LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7cd49aa38005..e03bd4db827e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -257,6 +257,7 @@ LIBBPF_0.0.8 { LIBBPF_0.0.9 { global: + bpf_enable_stats; bpf_link_get_fd_by_id; bpf_link_get_next_id; } LIBBPF_0.0.8; diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c new file mode 100644 index 000000000000..2cb2085917e7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "test_enable_stats.skel.h" + +void test_enable_stats(void) +{ + struct test_enable_stats *skel; + int stats_fd, err, prog_fd; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + int duration = 0; + + skel = test_enable_stats__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + return; + + stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) { + test_enable_stats__destroy(skel); + return; + } + + err = test_enable_stats__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) + goto cleanup; + + test_enable_stats__detach(skel); + + prog_fd = bpf_program__fd(skel->progs.test_enable_stats); + memset(&info, 0, info_len); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err, "get_prog_info", + "failed to get bpf_prog_info for fd %d\n", prog_fd)) + goto cleanup; + if (CHECK(info.run_time_ns == 0, "check_stats_enabled", + "failed to enable run_time_ns stats\n")) + goto cleanup; + + CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid", + "invalid run_cnt stats\n"); + +cleanup: + test_enable_stats__destroy(skel); + close(stats_fd); +} diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c new file mode 100644 index 000000000000..01a002ade529 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u64 count = 0; + +SEC("raw_tracepoint/sys_enter") +int test_enable_stats(void *ctx) +{ + count += 1; + return 0; +} |