From af9bd3e3331b8af42b6606c75797d041ab39380c Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Wed, 8 Jul 2020 03:48:52 +0900 Subject: samples: bpf: Fix bpf programs with kprobe/sys_connect event Currently, BPF programs with kprobe/sys_connect does not work properly. Commit 34745aed515c ("samples/bpf: fix kprobe attachment issue on x64") This commit modifies the bpf_load behavior of kprobe events in the x64 architecture. If the current kprobe event target starts with "sys_*", add the prefix "__x64_" to the front of the event. Appending "__x64_" prefix with kprobe/sys_* event was appropriate as a solution to most of the problems caused by the commit below. commit d5a00528b58c ("syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*()") However, there is a problem with the sys_connect kprobe event that does not work properly. For __sys_connect event, parameters can be fetched normally, but for __x64_sys_connect, parameters cannot be fetched. ffffffff818d3520 <__x64_sys_connect>: ffffffff818d3520: e8 fb df 32 00 callq 0xffffffff81c01520 <__fentry__> ffffffff818d3525: 48 8b 57 60 movq 96(%rdi), %rdx ffffffff818d3529: 48 8b 77 68 movq 104(%rdi), %rsi ffffffff818d352d: 48 8b 7f 70 movq 112(%rdi), %rdi ffffffff818d3531: e8 1a ff ff ff callq 0xffffffff818d3450 <__sys_connect> ffffffff818d3536: 48 98 cltq ffffffff818d3538: c3 retq ffffffff818d3539: 0f 1f 80 00 00 00 00 nopl (%rax) As the assembly code for __x64_sys_connect shows, parameters should be fetched and set into rdi, rsi, rdx registers prior to calling __sys_connect. Because of this problem, this commit fixes the sys_connect event by first getting the value of the rdi register and then the value of the rdi, rsi, and rdx register through an offset based on that value. Fixes: 34745aed515c ("samples/bpf: fix kprobe attachment issue on x64") Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200707184855.30968-2-danieltimlee@gmail.com --- samples/bpf/map_perf_test_kern.c | 9 ++++++--- samples/bpf/test_map_in_map_kern.c | 9 ++++++--- samples/bpf/test_probe_write_user_kern.c | 9 ++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'samples') diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 12e91ae64d4d..c9b31193ca12 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -11,6 +11,8 @@ #include #include "bpf_legacy.h" #include +#include +#include "trace_common.h" #define MAX_ENTRIES 1000 #define MAX_NR_CPUS 1024 @@ -154,9 +156,10 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_connect") +SEC("kprobe/" SYSCALL(sys_connect)) int stress_lru_hmap_alloc(struct pt_regs *ctx) { + struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn"; union { u16 dst6[8]; @@ -175,8 +178,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) long val = 1; u32 key = 0; - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); - addrlen = (int)PT_REGS_PARM3(ctx); + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs); + addrlen = (int)PT_REGS_PARM3_CORE(real_regs); if (addrlen != sizeof(*in6)) return 0; diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c index 6cee61e8ce9b..36a203e69064 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map_kern.c @@ -13,6 +13,8 @@ #include #include "bpf_legacy.h" #include +#include +#include "trace_common.h" #define MAX_NR_PORTS 65536 @@ -102,9 +104,10 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port) return result ? *result : -ENOENT; } -SEC("kprobe/sys_connect") +SEC("kprobe/" SYSCALL(sys_connect)) int trace_sys_connect(struct pt_regs *ctx) { + struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); struct sockaddr_in6 *in6; u16 test_case, port, dst6[8]; int addrlen, ret, inline_ret, ret_key = 0; @@ -112,8 +115,8 @@ int trace_sys_connect(struct pt_regs *ctx) void *outer_map, *inner_map; bool inline_hash = false; - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); - addrlen = (int)PT_REGS_PARM3(ctx); + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs); + addrlen = (int)PT_REGS_PARM3_CORE(real_regs); if (addrlen != sizeof(*in6)) return 0; diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c index f033f36a13a3..fd651a65281e 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user_kern.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include "trace_common.h" struct bpf_map_def SEC("maps") dnat_map = { .type = BPF_MAP_TYPE_HASH, @@ -26,13 +28,14 @@ struct bpf_map_def SEC("maps") dnat_map = { * This example sits on a syscall, and the syscall ABI is relatively stable * of course, across platforms, and over time, the ABI may change. */ -SEC("kprobe/sys_connect") +SEC("kprobe/" SYSCALL(sys_connect)) int bpf_prog1(struct pt_regs *ctx) { + struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); + void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs); + int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs); struct sockaddr_in new_addr, orig_addr = {}; struct sockaddr_in *mapped_addr; - void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx); - int sockaddr_len = (int)PT_REGS_PARM3(ctx); if (sockaddr_len > sizeof(orig_addr)) return 0; -- cgit From 88795b4adb01a30fbfd75ef1c1ef73b4442e38b2 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Wed, 8 Jul 2020 03:48:53 +0900 Subject: samples: bpf: Refactor BPF map in map test with libbpf From commit 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support"), a way to define internal map in BTF-defined map has been added. Instead of using previous 'inner_map_idx' definition, the structure to be used for the inner map can be directly defined using array directive. __array(values, struct inner_map) This commit refactors map in map test program with libbpf by explicitly defining inner map with BTF-defined format. Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200707184855.30968-3-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/test_map_in_map_kern.c | 85 +++++++++++++++++++------------------- samples/bpf/test_map_in_map_user.c | 53 +++++++++++++++++++++--- 3 files changed, 91 insertions(+), 49 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8403e4762306..f87ee02073ba 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -93,7 +93,7 @@ sampleip-objs := sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o -test_map_in_map-objs := bpf_load.o test_map_in_map_user.o +test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o xdp_redirect-objs := xdp_redirect_user.o xdp_redirect_map-objs := xdp_redirect_map_user.o diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c index 36a203e69064..8def45c5b697 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map_kern.c @@ -11,7 +11,6 @@ #include #include #include -#include "bpf_legacy.h" #include #include #include "trace_common.h" @@ -19,60 +18,60 @@ #define MAX_NR_PORTS 65536 /* map #0 */ -struct bpf_map_def_legacy SEC("maps") port_a = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(int), - .max_entries = MAX_NR_PORTS, -}; +struct inner_a { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, int); + __uint(max_entries, MAX_NR_PORTS); +} port_a SEC(".maps"); /* map #1 */ -struct bpf_map_def_legacy SEC("maps") port_h = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(int), - .max_entries = 1, -}; +struct inner_h { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, int); + __uint(max_entries, 1); +} port_h SEC(".maps"); /* map #2 */ -struct bpf_map_def_legacy SEC("maps") reg_result_h = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(int), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, int); + __uint(max_entries, 1); +} reg_result_h SEC(".maps"); /* map #3 */ -struct bpf_map_def_legacy SEC("maps") inline_result_h = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(int), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, int); + __uint(max_entries, 1); +} inline_result_h SEC(".maps"); /* map #4 */ /* Test case #0 */ -struct bpf_map_def_legacy SEC("maps") a_of_port_a = { - .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, - .key_size = sizeof(u32), - .inner_map_idx = 0, /* map_fd[0] is port_a */ - .max_entries = MAX_NR_PORTS, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, MAX_NR_PORTS); + __uint(key_size, sizeof(u32)); + __array(values, struct inner_a); /* use inner_a as inner map */ +} a_of_port_a SEC(".maps"); /* map #5 */ /* Test case #1 */ -struct bpf_map_def_legacy SEC("maps") h_of_port_a = { - .type = BPF_MAP_TYPE_HASH_OF_MAPS, - .key_size = sizeof(u32), - .inner_map_idx = 0, /* map_fd[0] is port_a */ - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(u32)); + __array(values, struct inner_a); /* use inner_a as inner map */ +} h_of_port_a SEC(".maps"); /* map #6 */ /* Test case #2 */ -struct bpf_map_def_legacy SEC("maps") h_of_port_h = { - .type = BPF_MAP_TYPE_HASH_OF_MAPS, - .key_size = sizeof(u32), - .inner_map_idx = 1, /* map_fd[1] is port_h */ - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(u32)); + __array(values, struct inner_h); /* use inner_h as inner map */ +} h_of_port_h SEC(".maps"); static __always_inline int do_reg_lookup(void *inner_map, u32 port) { diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index eb29bcb76f3f..98656de56b83 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -11,7 +11,9 @@ #include #include #include -#include "bpf_load.h" +#include + +static int map_fd[7]; #define PORT_A (map_fd[0]) #define PORT_H (map_fd[1]) @@ -113,18 +115,59 @@ static void test_map_in_map(void) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; - assert(!setrlimit(RLIMIT_MEMLOCK, &r)); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + prog = bpf_object__find_program_by_name(obj, "trace_sys_connect"); + if (!prog) { + printf("finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "port_a"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "port_h"); + map_fd[2] = bpf_object__find_map_fd_by_name(obj, "reg_result_h"); + map_fd[3] = bpf_object__find_map_fd_by_name(obj, "inline_result_h"); + map_fd[4] = bpf_object__find_map_fd_by_name(obj, "a_of_port_a"); + map_fd[5] = bpf_object__find_map_fd_by_name(obj, "h_of_port_a"); + map_fd[6] = bpf_object__find_map_fd_by_name(obj, "h_of_port_h"); + if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0 || + map_fd[3] < 0 || map_fd[4] < 0 || map_fd[5] < 0 || map_fd[6] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } test_map_in_map(); +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return 0; } -- cgit From cc7f641d637bef0b31194d28667553f77c4ef947 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Wed, 8 Jul 2020 03:48:54 +0900 Subject: samples: bpf: Refactor BPF map performance test with libbpf Previously, in order to set the numa_node attribute at the time of map creation using "libbpf", it was necessary to call bpf_create_map_node() directly (bpf_load approach), instead of calling bpf_object_load() that handles everything on its own, including map creation. And because of this problem, this sample had problems with refactoring from bpf_load to libbbpf. However, by commit 1bdb6c9a1c43 ("libbpf: Add a bunch of attribute getters/setters for map definitions") added the numa_node attribute and allowed it to be set in the map. By using libbpf instead of bpf_load, the inner map definition has been explicitly declared with BTF-defined format. Also, the element of ARRAY_OF_MAPS was also statically specified using the BTF format. And for this reason some logic in fixup_map() was not needed and changed or removed. Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200707184855.30968-4-danieltimlee@gmail.com --- samples/bpf/map_perf_test_kern.c | 179 ++++++++++++++++++++------------------- samples/bpf/map_perf_test_user.c | 164 ++++++++++++++++++++++------------- 2 files changed, 196 insertions(+), 147 deletions(-) (limited to 'samples') diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index c9b31193ca12..8773f22b6a98 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -9,7 +9,6 @@ #include #include #include -#include "bpf_legacy.h" #include #include #include "trace_common.h" @@ -17,89 +16,93 @@ #define MAX_ENTRIES 1000 #define MAX_NR_CPUS 1024 -struct bpf_map_def_legacy SEC("maps") hash_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, long); + __uint(max_entries, MAX_ENTRIES); +} hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, u32); + __type(value, long); + __uint(max_entries, 10000); +} lru_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, u32); + __type(value, long); + __uint(max_entries, 10000); + __uint(map_flags, BPF_F_NO_COMMON_LRU); +} nocommon_lru_hash_map SEC(".maps"); + +struct inner_lru { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, u32); + __type(value, long); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NUMA_NODE); + __uint(numa_node, 0); +} inner_lru_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, MAX_NR_CPUS); + __uint(key_size, sizeof(u32)); + __array(values, struct inner_lru); /* use inner_lru as inner map */ +} array_of_lru_hashs SEC(".maps") = { + /* statically initialize the first element */ + .values = { &inner_lru_hash_map }, }; -struct bpf_map_def_legacy SEC("maps") lru_hash_map = { - .type = BPF_MAP_TYPE_LRU_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = 10000, -}; - -struct bpf_map_def_legacy SEC("maps") nocommon_lru_hash_map = { - .type = BPF_MAP_TYPE_LRU_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = 10000, - .map_flags = BPF_F_NO_COMMON_LRU, -}; - -struct bpf_map_def_legacy SEC("maps") inner_lru_hash_map = { - .type = BPF_MAP_TYPE_LRU_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, - .map_flags = BPF_F_NUMA_NODE, - .numa_node = 0, -}; - -struct bpf_map_def_legacy SEC("maps") array_of_lru_hashs = { - .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, - .key_size = sizeof(u32), - .max_entries = MAX_NR_CPUS, -}; - -struct bpf_map_def_legacy SEC("maps") percpu_hash_map = { - .type = BPF_MAP_TYPE_PERCPU_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, -}; - -struct bpf_map_def_legacy SEC("maps") hash_map_alloc = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, - .map_flags = BPF_F_NO_PREALLOC, -}; - -struct bpf_map_def_legacy SEC("maps") percpu_hash_map_alloc = { - .type = BPF_MAP_TYPE_PERCPU_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, - .map_flags = BPF_F_NO_PREALLOC, -}; - -struct bpf_map_def_legacy SEC("maps") lpm_trie_map_alloc = { - .type = BPF_MAP_TYPE_LPM_TRIE, - .key_size = 8, - .value_size = sizeof(long), - .max_entries = 10000, - .map_flags = BPF_F_NO_PREALLOC, -}; - -struct bpf_map_def_legacy SEC("maps") array_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, -}; - -struct bpf_map_def_legacy SEC("maps") lru_hash_lookup_map = { - .type = BPF_MAP_TYPE_LRU_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(long), - .max_entries = MAX_ENTRIES, -}; - -SEC("kprobe/sys_getuid") +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(long)); + __uint(max_entries, MAX_ENTRIES); +} percpu_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, long); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} hash_map_alloc SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(long)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} percpu_hash_map_alloc SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(key_size, 8); + __uint(value_size, sizeof(long)); + __uint(max_entries, 10000); + __uint(map_flags, BPF_F_NO_PREALLOC); +} lpm_trie_map_alloc SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, long); + __uint(max_entries, MAX_ENTRIES); +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, u32); + __type(value, long); + __uint(max_entries, MAX_ENTRIES); +} lru_hash_lookup_map SEC(".maps"); + +SEC("kprobe/" SYSCALL(sys_getuid)) int stress_hmap(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -114,7 +117,7 @@ int stress_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_geteuid") +SEC("kprobe/" SYSCALL(sys_geteuid)) int stress_percpu_hmap(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -128,7 +131,7 @@ int stress_percpu_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getgid") +SEC("kprobe/" SYSCALL(sys_getgid)) int stress_hmap_alloc(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -142,7 +145,7 @@ int stress_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getegid") +SEC("kprobe/" SYSCALL(sys_getegid)) int stress_percpu_hmap_alloc(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -236,7 +239,7 @@ done: return 0; } -SEC("kprobe/sys_gettid") +SEC("kprobe/" SYSCALL(sys_gettid)) int stress_lpm_trie_map_alloc(struct pt_regs *ctx) { union { @@ -258,7 +261,7 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getpgid") +SEC("kprobe/" SYSCALL(sys_getpgid)) int stress_hash_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; @@ -271,7 +274,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getppid") +SEC("kprobe/" SYSCALL(sys_getppid)) int stress_array_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index fe5564bff39b..8b13230b4c46 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -19,7 +18,7 @@ #include #include -#include "bpf_load.h" +#include #define TEST_BIT(t) (1U << (t)) #define MAX_NR_CPUS 1024 @@ -61,12 +60,18 @@ const char *test_map_names[NR_TESTS] = { [LRU_HASH_LOOKUP] = "lru_hash_lookup_map", }; +enum map_idx { + array_of_lru_hashs_idx, + hash_map_alloc_idx, + lru_hash_lookup_idx, + NR_IDXES, +}; + +static int map_fd[NR_IDXES]; + static int test_flags = ~0; static uint32_t num_map_entries; static uint32_t inner_lru_hash_size; -static int inner_lru_hash_idx = -1; -static int array_of_lru_hashs_idx = -1; -static int lru_hash_lookup_idx = -1; static int lru_hash_lookup_test_entries = 32; static uint32_t max_cnt = 1000000; @@ -122,30 +127,30 @@ static void do_test_lru(enum test_type test, int cpu) __u64 start_time; int i, ret; - if (test == INNER_LRU_HASH_PREALLOC) { + if (test == INNER_LRU_HASH_PREALLOC && cpu) { + /* If CPU is not 0, create inner_lru hash map and insert the fd + * value into the array_of_lru_hash map. In case of CPU 0, + * 'inner_lru_hash_map' was statically inserted on the map init + */ int outer_fd = map_fd[array_of_lru_hashs_idx]; unsigned int mycpu, mynode; assert(cpu < MAX_NR_CPUS); - if (cpu) { - ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); - assert(!ret); - - inner_lru_map_fds[cpu] = - bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, - test_map_names[INNER_LRU_HASH_PREALLOC], - sizeof(uint32_t), - sizeof(long), - inner_lru_hash_size, 0, - mynode); - if (inner_lru_map_fds[cpu] == -1) { - printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", - strerror(errno), errno); - exit(1); - } - } else { - inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx]; + ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); + assert(!ret); + + inner_lru_map_fds[cpu] = + bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + test_map_names[INNER_LRU_HASH_PREALLOC], + sizeof(uint32_t), + sizeof(long), + inner_lru_hash_size, 0, + mynode); + if (inner_lru_map_fds[cpu] == -1) { + printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", + strerror(errno), errno); + exit(1); } ret = bpf_map_update_elem(outer_fd, &cpu, @@ -377,7 +382,8 @@ static void fill_lpm_trie(void) key->data[1] = rand() & 0xff; key->data[2] = rand() & 0xff; key->data[3] = rand() & 0xff; - r = bpf_map_update_elem(map_fd[6], key, &value, 0); + r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], + key, &value, 0); assert(!r); } @@ -388,59 +394,52 @@ static void fill_lpm_trie(void) key->data[3] = 1; value = 128; - r = bpf_map_update_elem(map_fd[6], key, &value, 0); + r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], key, &value, 0); assert(!r); } -static void fixup_map(struct bpf_map_data *map, int idx) +static void fixup_map(struct bpf_object *obj) { + struct bpf_map *map; int i; - if (!strcmp("inner_lru_hash_map", map->name)) { - inner_lru_hash_idx = idx; - inner_lru_hash_size = map->def.max_entries; - } + bpf_object__for_each_map(map, obj) { + const char *name = bpf_map__name(map); - if (!strcmp("array_of_lru_hashs", map->name)) { - if (inner_lru_hash_idx == -1) { - printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n"); - exit(1); + /* Only change the max_entries for the enabled test(s) */ + for (i = 0; i < NR_TESTS; i++) { + if (!strcmp(test_map_names[i], name) && + (check_test_flags(i))) { + bpf_map__resize(map, num_map_entries); + continue; + } } - map->def.inner_map_idx = inner_lru_hash_idx; - array_of_lru_hashs_idx = idx; } - if (!strcmp("lru_hash_lookup_map", map->name)) - lru_hash_lookup_idx = idx; - - if (num_map_entries <= 0) - return; - inner_lru_hash_size = num_map_entries; - - /* Only change the max_entries for the enabled test(s) */ - for (i = 0; i < NR_TESTS; i++) { - if (!strcmp(test_map_names[i], map->name) && - (check_test_flags(i))) { - map->def.max_entries = num_map_entries; - } - } } int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + struct bpf_link *links[8]; + struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; char filename[256]; - int num_cpu = 8; + int i = 0; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } if (argc > 1) test_flags = atoi(argv[1]) ? : test_flags; if (argc > 2) - num_cpu = atoi(argv[2]) ? : num_cpu; + nr_cpus = atoi(argv[2]) ? : nr_cpus; if (argc > 3) num_map_entries = atoi(argv[3]); @@ -448,14 +447,61 @@ int main(int argc, char **argv) if (argc > 4) max_cnt = atoi(argv[4]); - if (load_bpf_file_fixup_map(filename, fixup_map)) { - printf("%s", bpf_log_buf); - return 1; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + map = bpf_object__find_map_by_name(obj, "inner_lru_hash_map"); + if (libbpf_get_error(map)) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + inner_lru_hash_size = bpf_map__max_entries(map); + if (!inner_lru_hash_size) { + fprintf(stderr, "ERROR: failed to get map attribute\n"); + goto cleanup; + } + + /* resize BPF map prior to loading */ + if (num_map_entries > 0) + fixup_map(obj); + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "array_of_lru_hashs"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "hash_map_alloc"); + map_fd[2] = bpf_object__find_map_fd_by_name(obj, "lru_hash_lookup_map"); + if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + links[i] = bpf_program__attach(prog); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } fill_lpm_trie(); - run_perf_test(num_cpu); + run_perf_test(nr_cpus); + +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + bpf_object__close(obj); return 0; } -- cgit From eef8a42d6ce087d1c81c960ae0d14f955b742feb Mon Sep 17 00:00:00 2001 From: Wenbo Zhang Date: Fri, 10 Jul 2020 05:20:35 -0400 Subject: bpf: Fix fds_example SIGSEGV error The `BPF_LOG_BUF_SIZE`'s value is `UINT32_MAX >> 8`, so define an array with it on stack caused an overflow. Signed-off-by: Wenbo Zhang Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200710092035.28919-1-ethercflow@gmail.com --- samples/bpf/fds_example.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index d5992f787232..59f45fef5110 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -30,6 +30,8 @@ #define BPF_M_MAP 1 #define BPF_M_PROG 2 +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static void usage(void) { printf("Usage: fds_example [...]\n"); @@ -57,7 +59,6 @@ static int bpf_prog_create(const char *object) BPF_EXIT_INSN(), }; size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); - char bpf_log_buf[BPF_LOG_BUF_SIZE]; struct bpf_object *obj; int prog_fd; -- cgit From b36c3206f9ef3ea2844e9092c12d29c0d1f56c54 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:34 +0000 Subject: samples: bpf: Add an option for printing extra statistics in xdpsock Introduce the --extra-stats (or simply -x) flag to the xdpsock application which prints additional statistics alongside the regular rx and tx counters. The new statistics printed report error conditions eg. rx ring full, invalid descriptors, etc. Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-3-ciara.loftus@intel.com --- samples/bpf/xdpsock_user.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index c91e91362a0c..19c679456a0e 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -77,6 +77,7 @@ static u32 opt_batch_size = 64; static int opt_pkt_count; static u16 opt_pkt_size = MIN_PKT_SIZE; static u32 opt_pkt_fill_pattern = 0x12345678; +static bool opt_extra_stats; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; @@ -103,8 +104,20 @@ struct xsk_socket_info { struct xsk_socket *xsk; unsigned long rx_npkts; unsigned long tx_npkts; + unsigned long rx_dropped_npkts; + unsigned long rx_invalid_npkts; + unsigned long tx_invalid_npkts; + unsigned long rx_full_npkts; + unsigned long rx_fill_empty_npkts; + unsigned long tx_empty_npkts; unsigned long prev_rx_npkts; unsigned long prev_tx_npkts; + unsigned long prev_rx_dropped_npkts; + unsigned long prev_rx_invalid_npkts; + unsigned long prev_tx_invalid_npkts; + unsigned long prev_rx_full_npkts; + unsigned long prev_rx_fill_empty_npkts; + unsigned long prev_tx_empty_npkts; u32 outstanding_tx; }; @@ -147,6 +160,30 @@ static void print_benchmark(bool running) } } +static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) +{ + struct xdp_statistics stats; + socklen_t optlen; + int err; + + optlen = sizeof(stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); + if (err) + return err; + + if (optlen == sizeof(struct xdp_statistics)) { + xsk->rx_dropped_npkts = stats.rx_dropped; + xsk->rx_invalid_npkts = stats.rx_invalid_descs; + xsk->tx_invalid_npkts = stats.tx_invalid_descs; + xsk->rx_full_npkts = stats.rx_ring_full; + xsk->rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; + xsk->tx_empty_npkts = stats.tx_ring_empty_descs; + return 0; + } + + return -EINVAL; +} + static void dump_stats(void) { unsigned long now = get_nsecs(); @@ -157,7 +194,8 @@ static void dump_stats(void) for (i = 0; i < num_socks && xsks[i]; i++) { char *fmt = "%-15s %'-11.0f %'-11lu\n"; - double rx_pps, tx_pps; + double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, + tx_invalid_pps, tx_empty_pps; rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * 1000000000. / dt; @@ -175,6 +213,46 @@ static void dump_stats(void) xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; + + if (opt_extra_stats) { + if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { + dropped_pps = (xsks[i]->rx_dropped_npkts - + xsks[i]->prev_rx_dropped_npkts) * 1000000000. / dt; + rx_invalid_pps = (xsks[i]->rx_invalid_npkts - + xsks[i]->prev_rx_invalid_npkts) * 1000000000. / dt; + tx_invalid_pps = (xsks[i]->tx_invalid_npkts - + xsks[i]->prev_tx_invalid_npkts) * 1000000000. / dt; + full_pps = (xsks[i]->rx_full_npkts - + xsks[i]->prev_rx_full_npkts) * 1000000000. / dt; + fill_empty_pps = (xsks[i]->rx_fill_empty_npkts - + xsks[i]->prev_rx_fill_empty_npkts) + * 1000000000. / dt; + tx_empty_pps = (xsks[i]->tx_empty_npkts - + xsks[i]->prev_tx_empty_npkts) * 1000000000. / dt; + + printf(fmt, "rx dropped", dropped_pps, + xsks[i]->rx_dropped_npkts); + printf(fmt, "rx invalid", rx_invalid_pps, + xsks[i]->rx_invalid_npkts); + printf(fmt, "tx invalid", tx_invalid_pps, + xsks[i]->tx_invalid_npkts); + printf(fmt, "rx queue full", full_pps, + xsks[i]->rx_full_npkts); + printf(fmt, "fill ring empty", fill_empty_pps, + xsks[i]->rx_fill_empty_npkts); + printf(fmt, "tx ring empty", tx_empty_pps, + xsks[i]->tx_empty_npkts); + + xsks[i]->prev_rx_dropped_npkts = xsks[i]->rx_dropped_npkts; + xsks[i]->prev_rx_invalid_npkts = xsks[i]->rx_invalid_npkts; + xsks[i]->prev_tx_invalid_npkts = xsks[i]->tx_invalid_npkts; + xsks[i]->prev_rx_full_npkts = xsks[i]->rx_full_npkts; + xsks[i]->prev_rx_fill_empty_npkts = xsks[i]->rx_fill_empty_npkts; + xsks[i]->prev_tx_empty_npkts = xsks[i]->tx_empty_npkts; + } else { + printf("%-15s\n", "Error retrieving extra stats"); + } + } } } @@ -630,6 +708,7 @@ static struct option long_options[] = { {"tx-pkt-count", required_argument, 0, 'C'}, {"tx-pkt-size", required_argument, 0, 's'}, {"tx-pkt-pattern", required_argument, 0, 'P'}, + {"extra-stats", no_argument, 0, 'x'}, {0, 0, 0, 0} }; @@ -664,6 +743,7 @@ static void usage(const char *prog) " (Default: %d bytes)\n" " Min size: %d, Max size %d.\n" " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" + " -x, --extra-stats Display extra statistics.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -679,7 +759,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:x", long_options, &option_index); if (c == -1) break; @@ -760,6 +840,9 @@ static void parse_command_line(int argc, char **argv) case 'P': opt_pkt_fill_pattern = strtol(optarg, NULL, 16); break; + case 'x': + opt_extra_stats = 1; + break; default: usage(basename(argv[0])); } -- cgit From a4e76f1bda8e7b358dc21f0f925b8a2c4c98e733 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:36 +0200 Subject: samples/bpf: xdp_redirect_cpu_user: Do not update bpf maps in option loop Do not update xdp_redirect_cpu maps running while option loop but defer it after all available options have been parsed. This is a preliminary patch to pass the program name we want to attach to the map entries as a user option Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/95dc46286fd2c609042948e04bb7ae1f5b425538.1594734381.git.lorenzo@kernel.org --- samples/bpf/xdp_redirect_cpu_user.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f4e755e0dd73..6bb2d95cb26c 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -681,6 +681,7 @@ int main(int argc, char **argv) int add_cpu = -1; int opt, err; int prog_fd; + int *cpu, i; __u32 qsize; n_cpus = get_nprocs_conf(); @@ -716,6 +717,13 @@ int main(int argc, char **argv) } mark_cpus_unavailable(); + cpu = malloc(n_cpus * sizeof(int)); + if (!cpu) { + fprintf(stderr, "failed to allocate cpu array\n"); + return EXIT_FAIL; + } + memset(cpu, 0, n_cpus * sizeof(int)); + /* Parse commands line args */ while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", long_options, &longindex)) != -1) { @@ -760,8 +768,7 @@ int main(int argc, char **argv) errno, strerror(errno)); goto error; } - create_cpu_entry(add_cpu, qsize, added_cpus, true); - added_cpus++; + cpu[added_cpus++] = add_cpu; break; case 'q': qsize = atoi(optarg); @@ -772,6 +779,7 @@ int main(int argc, char **argv) case 'h': error: default: + free(cpu); usage(argv, obj); return EXIT_FAIL_OPTION; } @@ -784,16 +792,21 @@ int main(int argc, char **argv) if (ifindex == -1) { fprintf(stderr, "ERR: required option --dev missing\n"); usage(argv, obj); - return EXIT_FAIL_OPTION; + err = EXIT_FAIL_OPTION; + goto out; } /* Required option */ if (add_cpu == -1) { fprintf(stderr, "ERR: required option --cpu missing\n"); fprintf(stderr, " Specify multiple --cpu option to add more\n"); usage(argv, obj); - return EXIT_FAIL_OPTION; + err = EXIT_FAIL_OPTION; + goto out; } + for (i = 0; i < added_cpus; i++) + create_cpu_entry(cpu[i], qsize, i, true); + /* Remove XDP program when program is interrupted or killed */ signal(SIGINT, int_exit); signal(SIGTERM, int_exit); @@ -801,27 +814,32 @@ int main(int argc, char **argv) prog = bpf_object__find_program_by_title(obj, prog_name); if (!prog) { fprintf(stderr, "bpf_object__find_program_by_title failed\n"); - return EXIT_FAIL; + err = EXIT_FAIL; + goto out; } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { fprintf(stderr, "bpf_program__fd failed\n"); - return EXIT_FAIL; + err = EXIT_FAIL; + goto out; } if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { fprintf(stderr, "link set xdp fd failed\n"); - return EXIT_FAIL_XDP; + err = EXIT_FAIL_XDP; + goto out; } err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); if (err) { printf("can't get prog info - %s\n", strerror(errno)); - return err; + goto out; } prog_id = info.id; stats_poll(interval, use_separators, prog_name, stress_mode); - return EXIT_OK; +out: + free(cpu); + return err; } -- cgit From ce4dade7f12a8f3e7918184ac851d992f551805d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:41 +0200 Subject: samples/bpf: xdp_redirect_cpu: Load a eBPF program on cpumap Extend xdp_redirect_cpu_{usr,kern}.c adding the possibility to load a XDP program on cpumap entries. The following options have been added: - mprog-name: cpumap entry program name - mprog-filename: cpumap entry program filename - redirect-device: output interface if the cpumap program performs a XDP_REDIRECT to an egress interface - redirect-map: bpf map used to perform XDP_REDIRECT to an egress interface - mprog-disable: disable loading XDP program on cpumap entries Add xdp_pass, xdp_drop, xdp_redirect stats accounting Co-developed-by: Jesper Dangaard Brouer Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/aa5a9a281b9dac425620fdabe82670ffb6bbdb92.1594734381.git.lorenzo@kernel.org --- samples/bpf/xdp_redirect_cpu_kern.c | 25 ++++-- samples/bpf/xdp_redirect_cpu_user.c | 175 +++++++++++++++++++++++++++++++++--- 2 files changed, 178 insertions(+), 22 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c index 2baf8db1f7e7..8255025dea97 100644 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ b/samples/bpf/xdp_redirect_cpu_kern.c @@ -21,7 +21,7 @@ struct { __uint(type, BPF_MAP_TYPE_CPUMAP); __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); __uint(max_entries, MAX_CPUS); } cpu_map SEC(".maps"); @@ -30,6 +30,9 @@ struct datarec { __u64 processed; __u64 dropped; __u64 issue; + __u64 xdp_pass; + __u64 xdp_drop; + __u64 xdp_redirect; }; /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success @@ -692,13 +695,16 @@ int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) * Code in: kernel/include/trace/events/xdp.h */ struct cpumap_kthread_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int map_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int cpu; // offset:16; size:4; signed:1; - unsigned int drops; // offset:20; size:4; signed:0; - unsigned int processed; // offset:24; size:4; signed:0; - int sched; // offset:28; size:4; signed:1; + u64 __pad; // First 8 bytes are not accessible + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int sched; // offset:28; size:4; signed:1; + unsigned int xdp_pass; // offset:32; size:4; signed:0; + unsigned int xdp_drop; // offset:36; size:4; signed:0; + unsigned int xdp_redirect; // offset:40; size:4; signed:0; }; SEC("tracepoint/xdp/xdp_cpumap_kthread") @@ -712,6 +718,9 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) return 0; rec->processed += ctx->processed; rec->dropped += ctx->drops; + rec->xdp_pass += ctx->xdp_pass; + rec->xdp_drop += ctx->xdp_drop; + rec->xdp_redirect += ctx->xdp_redirect; /* Count times kthread yielded CPU via schedule call */ if (ctx->sched) diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 6bb2d95cb26c..004c0622c913 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -70,6 +70,11 @@ static const struct option long_options[] = { {"stress-mode", no_argument, NULL, 'x' }, {"no-separators", no_argument, NULL, 'z' }, {"force", no_argument, NULL, 'F' }, + {"mprog-disable", no_argument, NULL, 'n' }, + {"mprog-name", required_argument, NULL, 'e' }, + {"mprog-filename", required_argument, NULL, 'f' }, + {"redirect-device", required_argument, NULL, 'r' }, + {"redirect-map", required_argument, NULL, 'm' }, {0, 0, NULL, 0 } }; @@ -156,6 +161,9 @@ struct datarec { __u64 processed; __u64 dropped; __u64 issue; + __u64 xdp_pass; + __u64 xdp_drop; + __u64 xdp_redirect; }; struct record { __u64 timestamp; @@ -175,6 +183,9 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec) /* For percpu maps, userspace gets a value per possible CPU */ unsigned int nr_cpus = bpf_num_possible_cpus(); struct datarec values[nr_cpus]; + __u64 sum_xdp_redirect = 0; + __u64 sum_xdp_pass = 0; + __u64 sum_xdp_drop = 0; __u64 sum_processed = 0; __u64 sum_dropped = 0; __u64 sum_issue = 0; @@ -196,10 +207,19 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec) sum_dropped += values[i].dropped; rec->cpu[i].issue = values[i].issue; sum_issue += values[i].issue; + rec->cpu[i].xdp_pass = values[i].xdp_pass; + sum_xdp_pass += values[i].xdp_pass; + rec->cpu[i].xdp_drop = values[i].xdp_drop; + sum_xdp_drop += values[i].xdp_drop; + rec->cpu[i].xdp_redirect = values[i].xdp_redirect; + sum_xdp_redirect += values[i].xdp_redirect; } rec->total.processed = sum_processed; rec->total.dropped = sum_dropped; rec->total.issue = sum_issue; + rec->total.xdp_pass = sum_xdp_pass; + rec->total.xdp_drop = sum_xdp_drop; + rec->total.xdp_redirect = sum_xdp_redirect; return true; } @@ -300,17 +320,33 @@ static __u64 calc_errs_pps(struct datarec *r, return pps; } +static void calc_xdp_pps(struct datarec *r, struct datarec *p, + double *xdp_pass, double *xdp_drop, + double *xdp_redirect, double period_) +{ + *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; + if (period_ > 0) { + *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; + *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; + *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; + } +} + static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, - char *prog_name) + char *prog_name, char *mprog_name, int mprog_fd) { unsigned int nr_cpus = bpf_num_possible_cpus(); double pps = 0, drop = 0, err = 0; + bool mprog_enabled = false; struct record *rec, *prev; int to_cpu; double t; int i; + if (mprog_fd > 0) + mprog_enabled = true; + /* Header */ printf("Running XDP/eBPF prog_name:%s\n", prog_name); printf("%-15s %-7s %-14s %-11s %-9s\n", @@ -455,6 +491,34 @@ static void stats_print(struct stats_record *stats_rec, printf(fm2_err, "xdp_exception", "total", pps, drop); } + /* CPUMAP attached XDP program that runs on remote/destination CPU */ + if (mprog_enabled) { + char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n"; + char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n"; + double xdp_pass, xdp_drop, xdp_redirect; + + printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name); + printf("%-15s %-7s %-14s %-11s %-9s\n", + "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir"); + + rec = &stats_rec->kthread; + prev = &stats_prev->kthread; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, + &xdp_redirect, t); + if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0) + printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop, + xdp_redirect); + } + calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, + &xdp_redirect, t); + printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect); + } + printf("\n"); fflush(stdout); } @@ -491,7 +555,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b) *b = tmp; } -static int create_cpu_entry(__u32 cpu, __u32 queue_size, +static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, __u32 avail_idx, bool new) { __u32 curr_cpus_count = 0; @@ -501,7 +565,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Add a CPU entry to cpumap, as this allocate a cpu entry in * the kernel for the cpu. */ - ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0); + ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0); if (ret) { fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); exit(EXIT_FAIL_BPF); @@ -532,9 +596,9 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, } } /* map_fd[7] = cpus_iterator */ - printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n", + printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n", new ? "Add-new":"Replace", cpu, avail_idx, - queue_size, curr_cpus_count); + value->qsize, value->bpf_prog.fd, curr_cpus_count); return 0; } @@ -558,21 +622,26 @@ static void mark_cpus_unavailable(void) } /* Stress cpumap management code by concurrently changing underlying cpumap */ -static void stress_cpumap(void) +static void stress_cpumap(struct bpf_cpumap_val *value) { /* Changing qsize will cause kernel to free and alloc a new * bpf_cpu_map_entry, with an associated/complicated tear-down * procedure. */ - create_cpu_entry(1, 1024, 0, false); - create_cpu_entry(1, 8, 0, false); - create_cpu_entry(1, 16000, 0, false); + value->qsize = 1024; + create_cpu_entry(1, value, 0, false); + value->qsize = 8; + create_cpu_entry(1, value, 0, false); + value->qsize = 16000; + create_cpu_entry(1, value, 0, false); } static void stats_poll(int interval, bool use_separators, char *prog_name, + char *mprog_name, struct bpf_cpumap_val *value, bool stress_mode) { struct stats_record *record, *prev; + int mprog_fd; record = alloc_stats_record(); prev = alloc_stats_record(); @@ -584,11 +653,12 @@ static void stats_poll(int interval, bool use_separators, char *prog_name, while (1) { swap(&prev, &record); + mprog_fd = value->bpf_prog.fd; stats_collect(record); - stats_print(record, prev, prog_name); + stats_print(record, prev, prog_name, mprog_name, mprog_fd); sleep(interval); if (stress_mode) - stress_cpumap(); + stress_cpumap(value); } free_stats_record(record); @@ -661,15 +731,66 @@ static int init_map_fds(struct bpf_object *obj) return 0; } +static int load_cpumap_prog(char *file_name, char *prog_name, + char *redir_interface, char *redir_map) +{ + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + .expected_attach_type = BPF_XDP_CPUMAP, + .file = file_name, + }; + struct bpf_program *prog; + struct bpf_object *obj; + int fd; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd)) + return -1; + + if (fd < 0) { + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", + strerror(errno)); + return fd; + } + + if (redir_interface && redir_map) { + int err, map_fd, ifindex_out, key = 0; + + map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); + if (map_fd < 0) + return map_fd; + + ifindex_out = if_nametoindex(redir_interface); + if (!ifindex_out) + return -1; + + err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0); + if (err < 0) + return err; + } + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) { + fprintf(stderr, "bpf_object__find_program_by_title failed\n"); + return EXIT_FAIL; + } + + return bpf_program__fd(prog); +} + int main(int argc, char **argv) { struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; + char *mprog_filename = "xdp_redirect_kern.o"; + char *redir_interface = NULL, *redir_map = NULL; + char *mprog_name = "xdp_redirect_dummy"; + bool mprog_disable = false; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); + struct bpf_cpumap_val value; bool use_separators = true; bool stress_mode = false; struct bpf_program *prog; @@ -725,7 +846,7 @@ int main(int argc, char **argv) memset(cpu, 0, n_cpus * sizeof(int)); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -759,6 +880,21 @@ int main(int argc, char **argv) /* Selecting eBPF prog to load */ prog_name = optarg; break; + case 'n': + mprog_disable = true; + break; + case 'f': + mprog_filename = optarg; + break; + case 'e': + mprog_name = optarg; + break; + case 'r': + redir_interface = optarg; + break; + case 'm': + redir_map = optarg; + break; case 'c': /* Add multiple CPUs */ add_cpu = strtoul(optarg, NULL, 0); @@ -804,8 +940,18 @@ int main(int argc, char **argv) goto out; } + value.bpf_prog.fd = 0; + if (!mprog_disable) + value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name, + redir_interface, redir_map); + if (value.bpf_prog.fd < 0) { + err = value.bpf_prog.fd; + goto out; + } + value.qsize = qsize; + for (i = 0; i < added_cpus; i++) - create_cpu_entry(cpu[i], qsize, i, true); + create_cpu_entry(cpu[i], &value, i, true); /* Remove XDP program when program is interrupted or killed */ signal(SIGINT, int_exit); @@ -838,7 +984,8 @@ int main(int argc, char **argv) } prog_id = info.id; - stats_poll(interval, use_separators, prog_name, stress_mode); + stats_poll(interval, use_separators, prog_name, mprog_name, + &value, stress_mode); out: free(cpu); return err; -- cgit From e4d9c2320716ea0e9ef59f503ddd8f253a642ddd Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 20 Jul 2020 13:48:06 +0200 Subject: samples/bpf, selftests/bpf: Use bpf_probe_read_kernel A handful of samples and selftests fail to build on s390, because after commit 0ebeea8ca8a4 ("bpf: Restrict bpf_probe_read{, str}() only to archs where they work") bpf_probe_read is not available anymore. Fix by using bpf_probe_read_kernel. Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720114806.88823-1-iii@linux.ibm.com --- samples/bpf/offwaketime_kern.c | 7 ++++++- samples/bpf/test_overhead_kprobe_kern.c | 12 +++++++++--- samples/bpf/tracex1_kern.c | 9 +++++++-- samples/bpf/tracex5_kern.c | 4 ++-- 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'samples') diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index d459f73412a4..e74ee1cd4b9c 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -12,7 +12,12 @@ #include #include -#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) \ + ({ \ + typeof(P) val; \ + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ + val; \ + }) #define MINBLOCK_US 1 diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c index 8b811c29dc79..f6d593e47037 100644 --- a/samples/bpf/test_overhead_kprobe_kern.c +++ b/samples/bpf/test_overhead_kprobe_kern.c @@ -10,7 +10,12 @@ #include #include -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) \ + ({ \ + typeof(P) val = 0; \ + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ + val; \ + }) SEC("kprobe/__set_task_comm") int prog(struct pt_regs *ctx) @@ -25,8 +30,9 @@ int prog(struct pt_regs *ctx) tsk = (void *)PT_REGS_PARM1(ctx); pid = _(tsk->pid); - bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm); - bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm); + bpf_probe_read_kernel(newcomm, sizeof(newcomm), + (void *)PT_REGS_PARM2(ctx)); signal = _(tsk->signal); oom_score_adj = _(signal->oom_score_adj); return 0; diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 8e2610e14475..3f4599c9a202 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -11,7 +11,12 @@ #include #include -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) \ + ({ \ + typeof(P) val = 0; \ + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ + val; \ + }) /* kprobe is NOT a stable ABI * kernel functions can be removed, renamed or completely change semantics. @@ -34,7 +39,7 @@ int bpf_prog1(struct pt_regs *ctx) dev = _(skb->dev); len = _(skb->len); - bpf_probe_read(devname, sizeof(devname), dev->name); + bpf_probe_read_kernel(devname, sizeof(devname), dev->name); if (devname[0] == 'l' && devname[1] == 'o') { char fmt[] = "skb %p len %d\n"; diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index 32b49e8ab6bd..64a1f7550d7e 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), -- cgit