diff options
Diffstat (limited to 'tools/bpf')
59 files changed, 4811 insertions, 2066 deletions
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 243b79f2b451..fd2585af1252 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -27,18 +27,12 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - FEATURE_USER = .bpf FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled FEATURE_DISPLAY = libbfd check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean resolve_btfids_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -76,7 +70,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm -all: $(PROGS) bpftool runqslower +all: $(PROGS) bpftool $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm' $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o @@ -92,7 +86,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean runqslower_clean resolve_btfids_clean +clean: bpftool_clean resolve_btfids_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* @@ -118,12 +112,6 @@ bpftool_install: bpftool_clean: $(call descend,bpftool,clean) -runqslower: - $(call descend,runqslower) - -runqslower_clean: - $(call descend,runqslower,clean) - resolve_btfids: $(call descend,resolve_btfids) @@ -131,5 +119,4 @@ resolve_btfids_clean: $(call descend,resolve_btfids,clean) .PHONY: all install clean bpftool bpftool_install bpftool_clean \ - runqslower runqslower_clean \ resolve_btfids resolve_btfids_clean diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index a90a5d110f92..5ab8f80e2834 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -45,6 +45,8 @@ static void get_exec_path(char *tpath, size_t size) assert(path); len = readlink(path, tpath, size); + if (len < 0) + len = 0; tpath[len] = 0; free(path); @@ -210,7 +212,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, return NULL; } if (proglen > 1000000) { - printf("proglen of %d too big, stopping\n", proglen); + printf("proglen of %u too big, stopping\n", proglen); return NULL; } diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index ac8487dcff1d..bf843f328812 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -5,12 +5,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man8dir = $(mandir)/man8 @@ -31,9 +25,9 @@ see_also = $(subst " ",, \ "\n" \ "SEE ALSO\n" \ "========\n" \ - "\t**bpf**\ (2),\n" \ - "\t**bpf-helpers**\\ (7)" \ - $(foreach page,$(call list_pages,$(1)),",\n\t**$(page)**\\ (8)") \ + "**bpf**\ (2),\n" \ + "**bpf-helpers**\\ (7)" \ + $(foreach page,$(call list_pages,$(1)),",\n**$(page)**\\ (8)") \ "\n") $(OUTPUT)%.8: %.rst diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 342716f74ec4..d47dddc2b4ee 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -14,82 +14,83 @@ tool for inspection of BTF data SYNOPSIS ======== - **bpftool** [*OPTIONS*] **btf** *COMMAND* +**bpftool** [*OPTIONS*] **btf** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } } +*OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } } - *COMMANDS* := { **dump** | **help** } +*COMMANDS* := { **dump** | **help** } BTF COMMANDS ============= -| **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*] -| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] -| **bpftool** **btf help** +| **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*] +| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] [**root_id** *ROOT_ID*] +| **bpftool** **btf help** | -| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } -| *FORMAT* := { **raw** | **c** } -| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } +| *FORMAT* := { **raw** | **c** [**unsorted**] } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } DESCRIPTION =========== - **bpftool btf { show | list }** [**id** *BTF_ID*] - Show information about loaded BTF objects. If a BTF ID is - specified, show information only about given BTF object, - otherwise list all BTF objects currently loaded on the - system. +bpftool btf { show | list } [id *BTF_ID*] + Show information about loaded BTF objects. If a BTF ID is specified, show + information only about given BTF object, otherwise list all BTF objects + currently loaded on the system. - Since Linux 5.8 bpftool is able to discover information about - processes that hold open file descriptors (FDs) against BTF - objects. On such kernels bpftool will automatically emit this - information as well. + Since Linux 5.8 bpftool is able to discover information about processes + that hold open file descriptors (FDs) against BTF objects. On such kernels + bpftool will automatically emit this information as well. - **bpftool btf dump** *BTF_SRC* - Dump BTF entries from a given *BTF_SRC*. +bpftool btf dump *BTF_SRC* [format *FORMAT*] [root_id *ROOT_ID*] + Dump BTF entries from a given *BTF_SRC*. - When **id** is specified, BTF object with that ID will be - loaded and all its BTF types emitted. + When **id** is specified, BTF object with that ID will be loaded and all + its BTF types emitted. - When **map** is provided, it's expected that map has - associated BTF object with BTF types describing key and - value. It's possible to select whether to dump only BTF - type(s) associated with key (**key**), value (**value**), - both key and value (**kv**), or all BTF types present in - associated BTF object (**all**). If not specified, **kv** - is assumed. + When **map** is provided, it's expected that map has associated BTF object + with BTF types describing key and value. It's possible to select whether to + dump only BTF type(s) associated with key (**key**), value (**value**), + both key and value (**kv**), or all BTF types present in associated BTF + object (**all**). If not specified, **kv** is assumed. - When **prog** is provided, it's expected that program has - associated BTF object with BTF types. + When **prog** is provided, it's expected that program has associated BTF + object with BTF types. - When specifying *FILE*, an ELF file is expected, containing - .BTF section with well-defined BTF binary format data, - typically produced by clang or pahole. + When specifying *FILE*, an ELF file is expected, containing .BTF section + with well-defined BTF binary format data, typically produced by clang or + pahole. - **format** option can be used to override default (raw) - output format. Raw (**raw**) or C-syntax (**c**) output - formats are supported. + **format** option can be used to override default (raw) output format. Raw + (**raw**) or C-syntax (**c**) output formats are supported. With C-style + formatting, the output is sorted by default. Use the **unsorted** option + to avoid sorting the output. - **bpftool btf help** - Print short help message. + **root_id** option can be used to filter a dump to a single type and all + its dependent types. It cannot be used with any other types of filtering + (such as the "key", "value", or "kv" arguments when dumping BTF for a map). + It can be passed multiple times to dump multiple types. + +bpftool btf help + Print short help message. OPTIONS ======= - .. include:: common_options.rst - - -B, --base-btf *FILE* - Pass a base BTF object. Base BTF objects are typically used - with BTF objects for kernel modules. To avoid duplicating - all kernel symbols required by modules, BTF objects for - modules are "split", they are built incrementally on top of - the kernel (vmlinux) BTF object. So the base BTF reference - should usually point to the kernel BTF. - - When the main BTF object to process (for example, the - module BTF to dump) is passed as a *FILE*, bpftool attempts - to autodetect the path for the base object, and passing - this option is optional. When the main BTF object is passed - through other handles, this option becomes necessary. +.. include:: common_options.rst + +-B, --base-btf *FILE* + Pass a base BTF object. Base BTF objects are typically used with BTF + objects for kernel modules. To avoid duplicating all kernel symbols + required by modules, BTF objects for modules are "split", they are + built incrementally on top of the kernel (vmlinux) BTF object. So the + base BTF reference should usually point to the kernel BTF. + + When the main BTF object to process (for example, the module BTF to + dump) is passed as a *FILE*, bpftool attempts to autodetect the path + for the base object, and passing this option is optional. When the main + BTF object is passed through other handles, this option becomes + necessary. EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index bd015ec9847b..e8185596a759 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -14,124 +14,125 @@ tool for inspection and simple manipulation of eBPF progs SYNOPSIS ======== - **bpftool** [*OPTIONS*] **cgroup** *COMMAND* +**bpftool** [*OPTIONS*] **cgroup** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } } +*OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } } - *COMMANDS* := - { **show** | **list** | **tree** | **attach** | **detach** | **help** } +*COMMANDS* := +{ **show** | **list** | **tree** | **attach** | **detach** | **help** } CGROUP COMMANDS =============== -| **bpftool** **cgroup** { **show** | **list** } *CGROUP* [**effective**] -| **bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**] -| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] -| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* -| **bpftool** **cgroup help** +| **bpftool** **cgroup** { **show** | **list** } *CGROUP* [**effective**] +| **bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**] +| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] +| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* +| **bpftool** **cgroup help** | -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** | -| **cgroup_inet_sock_create** | **cgroup_sock_ops** | -| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | -| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | -| **cgroup_inet4_connect** | **cgroup_inet6_connect** | -| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | -| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | -| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | -| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | -| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | -| **cgroup_inet_sock_release** } -| *ATTACH_FLAGS* := { **multi** | **override** } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } +| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** | +| **cgroup_inet_sock_create** | **cgroup_sock_ops** | +| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | +| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | +| **cgroup_inet4_connect** | **cgroup_inet6_connect** | +| **cgroup_unix_connect** | **cgroup_inet4_getpeername** | +| **cgroup_inet6_getpeername** | **cgroup_unix_getpeername** | +| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | +| **cgroup_unix_getsockname** | **cgroup_udp4_sendmsg** | +| **cgroup_udp6_sendmsg** | **cgroup_unix_sendmsg** | +| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | +| **cgroup_unix_recvmsg** | **cgroup_sysctl** | +| **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_inet_sock_release** } +| *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION =========== - **bpftool cgroup { show | list }** *CGROUP* [**effective**] - List all programs attached to the cgroup *CGROUP*. - - Output will start with program ID followed by attach type, - attach flags and program name. - - If **effective** is specified retrieve effective programs that - will execute for events within a cgroup. This includes - inherited along with attached ones. - - **bpftool cgroup tree** [*CGROUP_ROOT*] [**effective**] - Iterate over all cgroups in *CGROUP_ROOT* and list all - attached programs. If *CGROUP_ROOT* is not specified, - bpftool uses cgroup v2 mountpoint. - - The output is similar to the output of cgroup show/list - commands: it starts with absolute cgroup path, followed by - program ID, attach type, attach flags and program name. - - If **effective** is specified retrieve effective programs that - will execute for events within a cgroup. This includes - inherited along with attached ones. - - **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] - Attach program *PROG* to the cgroup *CGROUP* with attach type - *ATTACH_TYPE* and optional *ATTACH_FLAGS*. - - *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs - some bpf program, the program in this cgroup yields to sub-cgroup - program; **multi** if a sub-cgroup installs some bpf program, - that cgroup program gets run in addition to the program in this - cgroup. - - Only one program is allowed to be attached to a cgroup with - no attach flags or the **override** flag. Attaching another - program will release old program and attach the new one. - - Multiple programs are allowed to be attached to a cgroup with - **multi**. They are executed in FIFO order (those that were - attached first, run first). - - Non-default *ATTACH_FLAGS* are supported by kernel version 4.14 - and later. - - *ATTACH_TYPE* can be on of: - **ingress** ingress path of the inet socket (since 4.10); - **egress** egress path of the inet socket (since 4.10); - **sock_create** opening of an inet socket (since 4.10); - **sock_ops** various socket operations (since 4.12); - **device** device access (since 4.15); - **bind4** call to bind(2) for an inet4 socket (since 4.17); - **bind6** call to bind(2) for an inet6 socket (since 4.17); - **post_bind4** return from bind(2) for an inet4 socket (since 4.17); - **post_bind6** return from bind(2) for an inet6 socket (since 4.17); - **connect4** call to connect(2) for an inet4 socket (since 4.17); - **connect6** call to connect(2) for an inet6 socket (since 4.17); - **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an - unconnected udp4 socket (since 4.18); - **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an - unconnected udp6 socket (since 4.18); - **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp4 socket (since 5.2); - **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp6 socket (since 5.2); - **sysctl** sysctl access (since 5.2); - **getsockopt** call to getsockopt (since 5.3); - **setsockopt** call to setsockopt (since 5.3); - **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8); - **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); - **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); - **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). - **sock_release** closing an userspace inet socket (since 5.9). - - **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* - Detach *PROG* from the cgroup *CGROUP* and attach type - *ATTACH_TYPE*. - - **bpftool prog help** - Print short help message. +bpftool cgroup { show | list } *CGROUP* [effective] + List all programs attached to the cgroup *CGROUP*. + + Output will start with program ID followed by attach type, attach flags and + program name. + + If **effective** is specified retrieve effective programs that will execute + for events within a cgroup. This includes inherited along with attached + ones. + +bpftool cgroup tree [*CGROUP_ROOT*] [effective] + Iterate over all cgroups in *CGROUP_ROOT* and list all attached programs. + If *CGROUP_ROOT* is not specified, bpftool uses cgroup v2 mountpoint. + + The output is similar to the output of cgroup show/list commands: it starts + with absolute cgroup path, followed by program ID, attach type, attach + flags and program name. + + If **effective** is specified retrieve effective programs that will execute + for events within a cgroup. This includes inherited along with attached + ones. + +bpftool cgroup attach *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] + Attach program *PROG* to the cgroup *CGROUP* with attach type *ATTACH_TYPE* + and optional *ATTACH_FLAGS*. + + *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs some + bpf program, the program in this cgroup yields to sub-cgroup program; + **multi** if a sub-cgroup installs some bpf program, that cgroup program + gets run in addition to the program in this cgroup. + + Only one program is allowed to be attached to a cgroup with no attach flags + or the **override** flag. Attaching another program will release old + program and attach the new one. + + Multiple programs are allowed to be attached to a cgroup with **multi**. + They are executed in FIFO order (those that were attached first, run + first). + + Non-default *ATTACH_FLAGS* are supported by kernel version 4.14 and later. + + *ATTACH_TYPE* can be one of: + + - **ingress** ingress path of the inet socket (since 4.10) + - **egress** egress path of the inet socket (since 4.10) + - **sock_create** opening of an inet socket (since 4.10) + - **sock_ops** various socket operations (since 4.12) + - **device** device access (since 4.15) + - **bind4** call to bind(2) for an inet4 socket (since 4.17) + - **bind6** call to bind(2) for an inet6 socket (since 4.17) + - **post_bind4** return from bind(2) for an inet4 socket (since 4.17) + - **post_bind6** return from bind(2) for an inet6 socket (since 4.17) + - **connect4** call to connect(2) for an inet4 socket (since 4.17) + - **connect6** call to connect(2) for an inet6 socket (since 4.17) + - **connect_unix** call to connect(2) for a unix socket (since 6.7) + - **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp4 socket (since 4.18) + - **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18) + - **sendmsg_unix** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected unix socket (since 6.7) + - **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp4 socket (since 5.2) + - **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp6 socket (since 5.2) + - **recvmsg_unix** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected unix socket (since 6.7) + - **sysctl** sysctl access (since 5.2) + - **getsockopt** call to getsockopt (since 5.3) + - **setsockopt** call to setsockopt (since 5.3) + - **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8) + - **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8) + - **getpeername_unix** call to getpeername(2) for a unix socket (since 6.7) + - **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8) + - **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8) + - **getsockname_unix** call to getsockname(2) for a unix socket (since 6.7) + - **sock_release** closing a userspace inet socket (since 5.9) + +bpftool cgroup detach *CGROUP* *ATTACH_TYPE* *PROG* + Detach *PROG* from the cgroup *CGROUP* and attach type *ATTACH_TYPE*. + +bpftool prog help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst - -f, --bpffs - Show file names of pinned programs. +-f, --bpffs + Show file names of pinned programs. EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index e44039f89be7..c7f837898bc7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -14,77 +14,70 @@ tool for inspection of eBPF-related parameters for Linux kernel or net device SYNOPSIS ======== - **bpftool** [*OPTIONS*] **feature** *COMMAND* +**bpftool** [*OPTIONS*] **feature** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } +*OPTIONS* := { |COMMON_OPTIONS| } - *COMMANDS* := { **probe** | **help** } +*COMMANDS* := { **probe** | **help** } FEATURE COMMANDS ================ -| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]] -| **bpftool** **feature list_builtins** *GROUP* -| **bpftool** **feature help** +| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]] +| **bpftool** **feature list_builtins** *GROUP* +| **bpftool** **feature help** | -| *COMPONENT* := { **kernel** | **dev** *NAME* } -| *GROUP* := { **prog_types** | **map_types** | **attach_types** | **link_types** | **helpers** } +| *COMPONENT* := { **kernel** | **dev** *NAME* } +| *GROUP* := { **prog_types** | **map_types** | **attach_types** | **link_types** | **helpers** } DESCRIPTION =========== - **bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]] - Probe the running kernel and dump a number of eBPF-related - parameters, such as availability of the **bpf**\ () system call, - JIT status, eBPF program types availability, eBPF helper - functions availability, and more. - - By default, bpftool **does not run probes** for - **bpf_probe_write_user**\ () and **bpf_trace_printk**\() - helpers which print warnings to kernel logs. To enable them - and run all probes, the **full** keyword should be used. - - If the **macros** keyword (but not the **-j** option) is - passed, a subset of the output is dumped as a list of - **#define** macros that are ready to be included in a C - header file, for example. If, additionally, **prefix** is - used to define a *PREFIX*, the provided string will be used - as a prefix to the names of the macros: this can be used to - avoid conflicts on macro names when including the output of - this command as a header file. - - Keyword **kernel** can be omitted. If no probe target is - specified, probing the kernel is the default behaviour. - - When the **unprivileged** keyword is used, bpftool will dump - only the features available to a user who does not have the - **CAP_SYS_ADMIN** capability set. The features available in - that case usually represent a small subset of the parameters - supported by the system. Unprivileged users MUST use the - **unprivileged** keyword: This is to avoid misdetection if - bpftool is inadvertently run as non-root, for example. This - keyword is unavailable if bpftool was compiled without - libcap. - - **bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]] - Probe network device for supported eBPF features and dump - results to the console. - - The keywords **full**, **macros** and **prefix** have the - same role as when probing the kernel. - - **bpftool feature list_builtins** *GROUP* - List items known to bpftool. These can be BPF program types - (**prog_types**), BPF map types (**map_types**), attach types - (**attach_types**), link types (**link_types**), or BPF helper - functions (**helpers**). The command does not probe the system, but - simply lists the elements that bpftool knows from compilation time, - as provided from libbpf (for all object types) or from the BPF UAPI - header (list of helpers). This can be used in scripts to iterate over - BPF types or helpers. - - **bpftool feature help** - Print short help message. +bpftool feature probe [kernel] [full] [macros [prefix *PREFIX*]] + Probe the running kernel and dump a number of eBPF-related parameters, such + as availability of the **bpf**\ () system call, JIT status, eBPF program + types availability, eBPF helper functions availability, and more. + + By default, bpftool **does not run probes** for **bpf_probe_write_user**\ + () and **bpf_trace_printk**\() helpers which print warnings to kernel logs. + To enable them and run all probes, the **full** keyword should be used. + + If the **macros** keyword (but not the **-j** option) is passed, a subset + of the output is dumped as a list of **#define** macros that are ready to + be included in a C header file, for example. If, additionally, **prefix** + is used to define a *PREFIX*, the provided string will be used as a prefix + to the names of the macros: this can be used to avoid conflicts on macro + names when including the output of this command as a header file. + + Keyword **kernel** can be omitted. If no probe target is specified, probing + the kernel is the default behaviour. + + When the **unprivileged** keyword is used, bpftool will dump only the + features available to a user who does not have the **CAP_SYS_ADMIN** + capability set. The features available in that case usually represent a + small subset of the parameters supported by the system. Unprivileged users + MUST use the **unprivileged** keyword: This is to avoid misdetection if + bpftool is inadvertently run as non-root, for example. This keyword is + unavailable if bpftool was compiled without libcap. + +bpftool feature probe dev *NAME* [full] [macros [prefix *PREFIX*]] + Probe network device for supported eBPF features and dump results to the + console. + + The keywords **full**, **macros** and **prefix** have the same role as when + probing the kernel. + +bpftool feature list_builtins *GROUP* + List items known to bpftool. These can be BPF program types + (**prog_types**), BPF map types (**map_types**), attach types + (**attach_types**), link types (**link_types**), or BPF helper functions + (**helpers**). The command does not probe the system, but simply lists the + elements that bpftool knows from compilation time, as provided from libbpf + (for all object types) or from the BPF UAPI header (list of helpers). This + can be used in scripts to iterate over BPF types or helpers. + +bpftool feature help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 68454ef28f58..d0a36f442db7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -14,199 +14,188 @@ tool for BPF code-generation SYNOPSIS ======== - **bpftool** [*OPTIONS*] **gen** *COMMAND* +**bpftool** [*OPTIONS*] **gen** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } } +*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } | [ { **-S** | **--sign** } {**-k** <private_key.pem>} **-i** <certificate.x509> ] } - *COMMAND* := { **object** | **skeleton** | **help** } +*COMMAND* := { **object** | **skeleton** | **help** } GEN COMMANDS ============= -| **bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...] -| **bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*] -| **bpftool** **gen subskeleton** *FILE* [**name** *OBJECT_NAME*] -| **bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...] -| **bpftool** **gen help** +| **bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...] +| **bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*] +| **bpftool** **gen subskeleton** *FILE* [**name** *OBJECT_NAME*] +| **bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...] +| **bpftool** **gen help** DESCRIPTION =========== - **bpftool gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...] - Statically link (combine) together one or more *INPUT_FILE*'s - into a single resulting *OUTPUT_FILE*. All the files involved - are BPF ELF object files. - - The rules of BPF static linking are mostly the same as for - user-space object files, but in addition to combining data - and instruction sections, .BTF and .BTF.ext (if present in - any of the input files) data are combined together. .BTF - data is deduplicated, so all the common types across - *INPUT_FILE*'s will only be represented once in the resulting - BTF information. - - BPF static linking allows to partition BPF source code into - individually compiled files that are then linked into - a single resulting BPF object file, which can be used to - generated BPF skeleton (with **gen skeleton** command) or - passed directly into **libbpf** (using **bpf_object__open()** - family of APIs). - - **bpftool gen skeleton** *FILE* - Generate BPF skeleton C header file for a given *FILE*. - - BPF skeleton is an alternative interface to existing libbpf - APIs for working with BPF objects. Skeleton code is intended - to significantly shorten and simplify code to load and work - with BPF programs from userspace side. Generated code is - tailored to specific input BPF object *FILE*, reflecting its - structure by listing out available maps, program, variables, - etc. Skeleton eliminates the need to lookup mentioned - components by name. Instead, if skeleton instantiation - succeeds, they are populated in skeleton structure as valid - libbpf types (e.g., **struct bpf_map** pointer) and can be - passed to existing generic libbpf APIs. - - In addition to simple and reliable access to maps and - programs, skeleton provides a storage for BPF links (**struct - bpf_link**) for each BPF program within BPF object. When - requested, supported BPF programs will be automatically - attached and resulting BPF links stored for further use by - user in pre-allocated fields in skeleton struct. For BPF - programs that can't be automatically attached by libbpf, - user can attach them manually, but store resulting BPF link - in per-program link field. All such set up links will be - automatically destroyed on BPF skeleton destruction. This - eliminates the need for users to manage links manually and - rely on libbpf support to detach programs and free up - resources. - - Another facility provided by BPF skeleton is an interface to - global variables of all supported kinds: mutable, read-only, - as well as extern ones. This interface allows to pre-setup - initial values of variables before BPF object is loaded and - verified by kernel. For non-read-only variables, the same - interface can be used to fetch values of global variables on - userspace side, even if they are modified by BPF code. - - During skeleton generation, contents of source BPF object - *FILE* is embedded within generated code and is thus not - necessary to keep around. This ensures skeleton and BPF - object file are matching 1-to-1 and always stay in sync. - Generated code is dual-licensed under LGPL-2.1 and - BSD-2-Clause licenses. - - It is a design goal and guarantee that skeleton interfaces - are interoperable with generic libbpf APIs. User should - always be able to use skeleton API to create and load BPF - object, and later use libbpf APIs to keep working with - specific maps, programs, etc. - - As part of skeleton, few custom functions are generated. - Each of them is prefixed with object name. Object name can - either be derived from object file name, i.e., if BPF object - file name is **example.o**, BPF object name will be - **example**. Object name can be also specified explicitly - through **name** *OBJECT_NAME* parameter. The following - custom functions are provided (assuming **example** as - the object name): - - - **example__open** and **example__open_opts**. - These functions are used to instantiate skeleton. It - corresponds to libbpf's **bpf_object__open**\ () API. - **_opts** variants accepts extra **bpf_object_open_opts** - options. - - - **example__load**. - This function creates maps, loads and verifies BPF - programs, initializes global data maps. It corresponds to - libppf's **bpf_object__load**\ () API. - - - **example__open_and_load** combines **example__open** and - **example__load** invocations in one commonly used - operation. - - - **example__attach** and **example__detach** - This pair of functions allow to attach and detach, - correspondingly, already loaded BPF object. Only BPF - programs of types supported by libbpf for auto-attachment - will be auto-attached and their corresponding BPF links - instantiated. For other BPF programs, user can manually - create a BPF link and assign it to corresponding fields in - skeleton struct. **example__detach** will detach both - links created automatically, as well as those populated by - user manually. - - - **example__destroy** - Detach and unload BPF programs, free up all the resources - used by skeleton and BPF object. - - If BPF object has global variables, corresponding structs - with memory layout corresponding to global data data section - layout will be created. Currently supported ones are: *.data*, - *.bss*, *.rodata*, and *.kconfig* structs/data sections. - These data sections/structs can be used to set up initial - values of variables, if set before **example__load**. - Afterwards, if target kernel supports memory-mapped BPF - arrays, same structs can be used to fetch and update - (non-read-only) data from userspace, with same simplicity - as for BPF side. - - **bpftool gen subskeleton** *FILE* - Generate BPF subskeleton C header file for a given *FILE*. - - Subskeletons are similar to skeletons, except they do not own - the corresponding maps, programs, or global variables. They - require that the object file used to generate them is already - loaded into a *bpf_object* by some other means. - - This functionality is useful when a library is included into a - larger BPF program. A subskeleton for the library would have - access to all objects and globals defined in it, without - having to know about the larger program. - - Consequently, there are only two functions defined - for subskeletons: - - - **example__open(bpf_object\*)** - Instantiates a subskeleton from an already opened (but not - necessarily loaded) **bpf_object**. - - - **example__destroy()** - Frees the storage for the subskeleton but *does not* unload - any BPF programs or maps. - - **bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...] - Generate a minimum BTF file as *OUTPUT*, derived from a given - *INPUT* BTF file, containing all needed BTF types so one, or - more, given eBPF objects CO-RE relocations may be satisfied. - - When kernels aren't compiled with CONFIG_DEBUG_INFO_BTF, - libbpf, when loading an eBPF object, has to rely on external - BTF files to be able to calculate CO-RE relocations. - - Usually, an external BTF file is built from existing kernel - DWARF data using pahole. It contains all the types used by - its respective kernel image and, because of that, is big. - - The min_core_btf feature builds smaller BTF files, customized - to one or multiple eBPF objects, so they can be distributed - together with an eBPF CO-RE based application, turning the - application portable to different kernel versions. - - Check examples bellow for more information how to use it. - - **bpftool gen help** - Print short help message. +bpftool gen object *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...] + Statically link (combine) together one or more *INPUT_FILE*'s into a single + resulting *OUTPUT_FILE*. All the files involved are BPF ELF object files. + + The rules of BPF static linking are mostly the same as for user-space + object files, but in addition to combining data and instruction sections, + .BTF and .BTF.ext (if present in any of the input files) data are combined + together. .BTF data is deduplicated, so all the common types across + *INPUT_FILE*'s will only be represented once in the resulting BTF + information. + + BPF static linking allows to partition BPF source code into individually + compiled files that are then linked into a single resulting BPF object + file, which can be used to generated BPF skeleton (with **gen skeleton** + command) or passed directly into **libbpf** (using **bpf_object__open()** + family of APIs). + +bpftool gen skeleton *FILE* + Generate BPF skeleton C header file for a given *FILE*. + + BPF skeleton is an alternative interface to existing libbpf APIs for + working with BPF objects. Skeleton code is intended to significantly + shorten and simplify code to load and work with BPF programs from userspace + side. Generated code is tailored to specific input BPF object *FILE*, + reflecting its structure by listing out available maps, program, variables, + etc. Skeleton eliminates the need to lookup mentioned components by name. + Instead, if skeleton instantiation succeeds, they are populated in skeleton + structure as valid libbpf types (e.g., **struct bpf_map** pointer) and can + be passed to existing generic libbpf APIs. + + In addition to simple and reliable access to maps and programs, skeleton + provides a storage for BPF links (**struct bpf_link**) for each BPF program + within BPF object. When requested, supported BPF programs will be + automatically attached and resulting BPF links stored for further use by + user in pre-allocated fields in skeleton struct. For BPF programs that + can't be automatically attached by libbpf, user can attach them manually, + but store resulting BPF link in per-program link field. All such set up + links will be automatically destroyed on BPF skeleton destruction. This + eliminates the need for users to manage links manually and rely on libbpf + support to detach programs and free up resources. + + Another facility provided by BPF skeleton is an interface to global + variables of all supported kinds: mutable, read-only, as well as extern + ones. This interface allows to pre-setup initial values of variables before + BPF object is loaded and verified by kernel. For non-read-only variables, + the same interface can be used to fetch values of global variables on + userspace side, even if they are modified by BPF code. + + During skeleton generation, contents of source BPF object *FILE* is + embedded within generated code and is thus not necessary to keep around. + This ensures skeleton and BPF object file are matching 1-to-1 and always + stay in sync. Generated code is dual-licensed under LGPL-2.1 and + BSD-2-Clause licenses. + + It is a design goal and guarantee that skeleton interfaces are + interoperable with generic libbpf APIs. User should always be able to use + skeleton API to create and load BPF object, and later use libbpf APIs to + keep working with specific maps, programs, etc. + + As part of skeleton, few custom functions are generated. Each of them is + prefixed with object name. Object name can either be derived from object + file name, i.e., if BPF object file name is **example.o**, BPF object name + will be **example**. Object name can be also specified explicitly through + **name** *OBJECT_NAME* parameter. The following custom functions are + provided (assuming **example** as the object name): + + - **example__open** and **example__open_opts**. + These functions are used to instantiate skeleton. It corresponds to + libbpf's **bpf_object__open**\ () API. **_opts** variants accepts extra + **bpf_object_open_opts** options. + + - **example__load**. + This function creates maps, loads and verifies BPF programs, initializes + global data maps. It corresponds to libbpf's **bpf_object__load**\ () + API. + + - **example__open_and_load** combines **example__open** and + **example__load** invocations in one commonly used operation. + + - **example__attach** and **example__detach**. + This pair of functions allow to attach and detach, correspondingly, + already loaded BPF object. Only BPF programs of types supported by libbpf + for auto-attachment will be auto-attached and their corresponding BPF + links instantiated. For other BPF programs, user can manually create a + BPF link and assign it to corresponding fields in skeleton struct. + **example__detach** will detach both links created automatically, as well + as those populated by user manually. + + - **example__destroy**. + Detach and unload BPF programs, free up all the resources used by + skeleton and BPF object. + + If BPF object has global variables, corresponding structs with memory + layout corresponding to global data data section layout will be created. + Currently supported ones are: *.data*, *.bss*, *.rodata*, and *.kconfig* + structs/data sections. These data sections/structs can be used to set up + initial values of variables, if set before **example__load**. Afterwards, + if target kernel supports memory-mapped BPF arrays, same structs can be + used to fetch and update (non-read-only) data from userspace, with same + simplicity as for BPF side. + +bpftool gen subskeleton *FILE* + Generate BPF subskeleton C header file for a given *FILE*. + + Subskeletons are similar to skeletons, except they do not own the + corresponding maps, programs, or global variables. They require that the + object file used to generate them is already loaded into a *bpf_object* by + some other means. + + This functionality is useful when a library is included into a larger BPF + program. A subskeleton for the library would have access to all objects and + globals defined in it, without having to know about the larger program. + + Consequently, there are only two functions defined for subskeletons: + + - **example__open(bpf_object\*)**. + Instantiates a subskeleton from an already opened (but not necessarily + loaded) **bpf_object**. + + - **example__destroy()**. + Frees the storage for the subskeleton but *does not* unload any BPF + programs or maps. + +bpftool gen min_core_btf *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...] + Generate a minimum BTF file as *OUTPUT*, derived from a given *INPUT* BTF + file, containing all needed BTF types so one, or more, given eBPF objects + CO-RE relocations may be satisfied. + + When kernels aren't compiled with CONFIG_DEBUG_INFO_BTF, libbpf, when + loading an eBPF object, has to rely on external BTF files to be able to + calculate CO-RE relocations. + + Usually, an external BTF file is built from existing kernel DWARF data + using pahole. It contains all the types used by its respective kernel image + and, because of that, is big. + + The min_core_btf feature builds smaller BTF files, customized to one or + multiple eBPF objects, so they can be distributed together with an eBPF + CO-RE based application, turning the application portable to different + kernel versions. + + Check examples below for more information on how to use it. + +bpftool gen help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst - -L, --use-loader - For skeletons, generate a "light" skeleton (also known as "loader" - skeleton). A light skeleton contains a loader eBPF program. It does - not use the majority of the libbpf infrastructure, and does not need - libelf. +-L, --use-loader + For skeletons, generate a "light" skeleton (also known as "loader" + skeleton). A light skeleton contains a loader eBPF program. It does not use + the majority of the libbpf infrastructure, and does not need libelf. + +-S, --sign + For skeletons, generate a signed skeleton. This option must be used with + **-k** and **-i**. Using this flag implicitly enables **--use-loader**. + +-k <private_key.pem> + Path to the private key file in PEM format, required for signing. + +-i <certificate.x509> + Path to the X.509 certificate file in PEM or DER format, required for + signing. EXAMPLES ======== @@ -257,18 +246,48 @@ EXAMPLES return 0; } -This is example BPF application with two BPF programs and a mix of BPF maps -and global variables. Source code is split across two source code files. +**$ cat example3.bpf.c** -**$ clang -target bpf -g example1.bpf.c -o example1.bpf.o** +:: -**$ clang -target bpf -g example2.bpf.c -o example2.bpf.o** + #include <linux/ptrace.h> + #include <linux/bpf.h> + #include <bpf/bpf_helpers.h> + /* This header file is provided by the bpf_testmod module. */ + #include "bpf_testmod.h" -**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o** + int test_2_result = 0; -This set of commands compiles *example1.bpf.c* and *example2.bpf.c* -individually and then statically links respective object files into the final -BPF ELF object file *example.bpf.o*. + /* bpf_Testmod.ko calls this function, passing a "4" + * and testmod_map->data. + */ + SEC("struct_ops/test_2") + void BPF_PROG(test_2, int a, int b) + { + test_2_result = a + b; + } + + SEC(".struct_ops") + struct bpf_testmod_ops testmod_map = { + .test_2 = (void *)test_2, + .data = 0x1, + }; + +This is example BPF application with three BPF programs and a mix of BPF +maps and global variables. Source code is split across three source code +files. + +**$ clang --target=bpf -g example1.bpf.c -o example1.bpf.o** + +**$ clang --target=bpf -g example2.bpf.c -o example2.bpf.o** + +**$ clang --target=bpf -g example3.bpf.c -o example3.bpf.o** + +**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o example3.bpf.o** + +This set of commands compiles *example1.bpf.c*, *example2.bpf.c* and +*example3.bpf.c* individually and then statically links respective object +files into the final BPF ELF object file *example.bpf.o*. **$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h** @@ -291,7 +310,15 @@ BPF ELF object file *example.bpf.o*. struct bpf_map *data; struct bpf_map *bss; struct bpf_map *my_map; + struct bpf_map *testmod_map; } maps; + struct { + struct example__testmod_map__bpf_testmod_ops { + const struct bpf_program *test_1; + const struct bpf_program *test_2; + int data; + } *testmod_map; + } struct_ops; struct { struct bpf_program *handle_sys_enter; struct bpf_program *handle_sys_exit; @@ -304,6 +331,7 @@ BPF ELF object file *example.bpf.o*. struct { int x; } data; + int test_2_result; } *bss; struct example__data { _Bool global_flag; @@ -342,10 +370,16 @@ BPF ELF object file *example.bpf.o*. skel->rodata->param1 = 128; + /* Change the value through the pointer of shadow type */ + skel->struct_ops.testmod_map->data = 13; + err = example__load(skel); if (err) goto cleanup; + /* The result of the function test_2() */ + printf("test_2_result: %d\n", skel->bss->test_2_result); + err = example__attach(skel); if (err) goto cleanup; @@ -372,6 +406,7 @@ BPF ELF object file *example.bpf.o*. :: + test_2_result: 17 my_map name: my_map sys_enter prog FD: 8 my_static_var: 7 diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 84839d488621..2e5d81c906dc 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -14,50 +14,46 @@ tool to create BPF iterators SYNOPSIS ======== - **bpftool** [*OPTIONS*] **iter** *COMMAND* +**bpftool** [*OPTIONS*] **iter** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } +*OPTIONS* := { |COMMON_OPTIONS| } - *COMMANDS* := { **pin** | **help** } +*COMMANDS* := { **pin** | **help** } ITER COMMANDS -=================== +============= -| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*] -| **bpftool** **iter help** +| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*] +| **bpftool** **iter help** | -| *OBJ* := /a/file/of/bpf_iter_target.o -| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *OBJ* := /a/file/of/bpf_iter_target.o +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } DESCRIPTION =========== - **bpftool iter pin** *OBJ* *PATH* [**map** *MAP*] - A bpf iterator combines a kernel iterating of - particular kernel data (e.g., tasks, bpf_maps, etc.) - and a bpf program called for each kernel data object - (e.g., one task, one bpf_map, etc.). User space can - *read* kernel iterator output through *read()* syscall. - - The *pin* command creates a bpf iterator from *OBJ*, - and pin it to *PATH*. The *PATH* should be located - in *bpffs* mount. It must not contain a dot - character ('.'), which is reserved for future extensions - of *bpffs*. - - Map element bpf iterator requires an additional parameter - *MAP* so bpf program can iterate over map elements for - that map. User can have a bpf program in kernel to run - with each map element, do checking, filtering, aggregation, - etc. without copying data to user space. - - User can then *cat PATH* to see the bpf iterator output. - - **bpftool iter help** - Print short help message. +bpftool iter pin *OBJ* *PATH* [map *MAP*] + A bpf iterator combines a kernel iterating of particular kernel data (e.g., + tasks, bpf_maps, etc.) and a bpf program called for each kernel data object + (e.g., one task, one bpf_map, etc.). User space can *read* kernel iterator + output through *read()* syscall. + + The *pin* command creates a bpf iterator from *OBJ*, and pin it to *PATH*. + The *PATH* should be located in *bpffs* mount. It must not contain a dot + character ('.'), which is reserved for future extensions of *bpffs*. + + Map element bpf iterator requires an additional parameter *MAP* so bpf + program can iterate over map elements for that map. User can have a bpf + program in kernel to run with each map element, do checking, filtering, + aggregation, etc. without copying data to user space. + + User can then *cat PATH* to see the bpf iterator output. + +bpftool iter help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 52a4eee4af54..6f09d4405ed8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -14,67 +14,62 @@ tool for inspection and simple manipulation of eBPF links SYNOPSIS ======== - **bpftool** [*OPTIONS*] **link** *COMMAND* +**bpftool** [*OPTIONS*] **link** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } +*OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } - *COMMANDS* := { **show** | **list** | **pin** | **help** } +*COMMANDS* := { **show** | **list** | **pin** | **help** } LINK COMMANDS ============= -| **bpftool** **link { show | list }** [*LINK*] -| **bpftool** **link pin** *LINK* *FILE* -| **bpftool** **link detach** *LINK* -| **bpftool** **link help** +| **bpftool** **link { show | list }** [*LINK*] +| **bpftool** **link pin** *LINK* *FILE* +| **bpftool** **link detach** *LINK* +| **bpftool** **link help** | -| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* } +| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* } DESCRIPTION =========== - **bpftool link { show | list }** [*LINK*] - Show information about active links. If *LINK* is - specified show information only about given link, - otherwise list all links currently active on the system. +bpftool link { show | list } [*LINK*] + Show information about active links. If *LINK* is specified show + information only about given link, otherwise list all links currently + active on the system. - Output will start with link ID followed by link type and - zero or more named attributes, some of which depend on type - of link. + Output will start with link ID followed by link type and zero or more named + attributes, some of which depend on type of link. - Since Linux 5.8 bpftool is able to discover information about - processes that hold open file descriptors (FDs) against BPF - links. On such kernels bpftool will automatically emit this - information as well. + Since Linux 5.8 bpftool is able to discover information about processes + that hold open file descriptors (FDs) against BPF links. On such kernels + bpftool will automatically emit this information as well. - **bpftool link pin** *LINK* *FILE* - Pin link *LINK* as *FILE*. +bpftool link pin *LINK* *FILE* + Pin link *LINK* as *FILE*. - Note: *FILE* must be located in *bpffs* mount. It must not - contain a dot character ('.'), which is reserved for future - extensions of *bpffs*. + Note: *FILE* must be located in *bpffs* mount. It must not contain a dot + character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool link detach** *LINK* - Force-detach link *LINK*. BPF link and its underlying BPF - program will stay valid, but they will be detached from the - respective BPF hook and BPF link will transition into - a defunct state until last open file descriptor for that - link is closed. +bpftool link detach *LINK* + Force-detach link *LINK*. BPF link and its underlying BPF program will stay + valid, but they will be detached from the respective BPF hook and BPF link + will transition into a defunct state until last open file descriptor for + that link is closed. - **bpftool link help** - Print short help message. +bpftool link help + Print short help message. OPTIONS ======= - .. include:: common_options.rst + .. include:: common_options.rst - -f, --bpffs - When showing BPF links, show file names of pinned - links. + -f, --bpffs + When showing BPF links, show file names of pinned links. - -n, --nomount - Do not automatically attempt to mount any virtual file system - (such as tracefs or BPF virtual file system) when necessary. + -n, --nomount + Do not automatically attempt to mount any virtual file system (such as + tracefs or BPF virtual file system) when necessary. EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 11250c4734fe..1af3305ea2b2 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -14,166 +14,161 @@ tool for inspection and simple manipulation of eBPF maps SYNOPSIS ======== - **bpftool** [*OPTIONS*] **map** *COMMAND* +**bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } +*OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } - *COMMANDS* := - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | - **delete** | **pin** | **help** } +*COMMANDS* := +{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | +**delete** | **pin** | **help** } MAP COMMANDS ============= -| **bpftool** **map** { **show** | **list** } [*MAP*] -| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ -| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ -| [**dev** *NAME*] -| **bpftool** **map dump** *MAP* -| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] -| **bpftool** **map lookup** *MAP* [**key** *DATA*] -| **bpftool** **map getnext** *MAP* [**key** *DATA*] -| **bpftool** **map delete** *MAP* **key** *DATA* -| **bpftool** **map pin** *MAP* *FILE* -| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] -| **bpftool** **map peek** *MAP* -| **bpftool** **map push** *MAP* **value** *VALUE* -| **bpftool** **map pop** *MAP* -| **bpftool** **map enqueue** *MAP* **value** *VALUE* -| **bpftool** **map dequeue** *MAP* -| **bpftool** **map freeze** *MAP* -| **bpftool** **map help** +| **bpftool** **map** { **show** | **list** } [*MAP*] +| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ +| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ +| [**offload_dev** *NAME*] +| **bpftool** **map dump** *MAP* +| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] +| **bpftool** **map lookup** *MAP* [**key** *DATA*] +| **bpftool** **map getnext** *MAP* [**key** *DATA*] +| **bpftool** **map delete** *MAP* **key** *DATA* +| **bpftool** **map pin** *MAP* *FILE* +| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] +| **bpftool** **map peek** *MAP* +| **bpftool** **map push** *MAP* **value** *VALUE* +| **bpftool** **map pop** *MAP* +| **bpftool** **map enqueue** *MAP* **value** *VALUE* +| **bpftool** **map dequeue** *MAP* +| **bpftool** **map freeze** *MAP* +| **bpftool** **map help** | -| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* } -| *DATA* := { [**hex**] *BYTES* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } -| *VALUE* := { *DATA* | *MAP* | *PROG* } -| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } -| *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** -| | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** -| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** -| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** -| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** -| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* } +| *DATA* := { [**hex**] *BYTES* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } +| *VALUE* := { *DATA* | *MAP* | *PROG* } +| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } +| *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** +| | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** +| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** +| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** +| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** +| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** +| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** +| | **insn_array** } DESCRIPTION =========== - **bpftool map { show | list }** [*MAP*] - Show information about loaded maps. If *MAP* is specified - show information only about given maps, otherwise list all - maps currently loaded on the system. In case of **name**, - *MAP* may match several maps which will all be shown. +bpftool map { show | list } [*MAP*] + Show information about loaded maps. If *MAP* is specified show information + only about given maps, otherwise list all maps currently loaded on the + system. In case of **name**, *MAP* may match several maps which will all + be shown. - Output will start with map ID followed by map type and - zero or more named attributes (depending on kernel version). + Output will start with map ID followed by map type and zero or more named + attributes (depending on kernel version). - Since Linux 5.8 bpftool is able to discover information about - processes that hold open file descriptors (FDs) against BPF - maps. On such kernels bpftool will automatically emit this - information as well. + Since Linux 5.8 bpftool is able to discover information about processes + that hold open file descriptors (FDs) against BPF maps. On such kernels + bpftool will automatically emit this information as well. - **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*] - Create a new map with given parameters and pin it to *bpffs* - as *FILE*. +bpftool map create *FILE* type *TYPE* key *KEY_SIZE* value *VALUE_SIZE* entries *MAX_ENTRIES* name *NAME* [flags *FLAGS*] [inner_map *MAP*] [offload_dev *NAME*] + Create a new map with given parameters and pin it to *bpffs* as *FILE*. - *FLAGS* should be an integer which is the combination of - desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h - UAPI header for existing flags). + *FLAGS* should be an integer which is the combination of desired flags, + e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h UAPI header for existing + flags). - To create maps of type array-of-maps or hash-of-maps, the - **inner_map** keyword must be used to pass an inner map. The - kernel needs it to collect metadata related to the inner maps - that the new map will work with. + To create maps of type array-of-maps or hash-of-maps, the **inner_map** + keyword must be used to pass an inner map. The kernel needs it to collect + metadata related to the inner maps that the new map will work with. - Keyword **dev** expects a network interface name, and is used - to request hardware offload for the map. + Keyword **offload_dev** expects a network interface name, and is used to + request hardware offload for the map. - **bpftool map dump** *MAP* - Dump all entries in a given *MAP*. In case of **name**, - *MAP* may match several maps which will all be dumped. +bpftool map dump *MAP* + Dump all entries in a given *MAP*. In case of **name**, *MAP* may match + several maps which will all be dumped. - **bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] - Update map entry for a given *KEY*. +bpftool map update *MAP* [key *DATA*] [value *VALUE*] [*UPDATE_FLAGS*] + Update map entry for a given *KEY*. - *UPDATE_FLAGS* can be one of: **any** update existing entry - or add if doesn't exit; **exist** update only if entry already - exists; **noexist** update only if entry doesn't exist. + *UPDATE_FLAGS* can be one of: **any** update existing entry or add if + doesn't exit; **exist** update only if entry already exists; **noexist** + update only if entry doesn't exist. - If the **hex** keyword is provided in front of the bytes - sequence, the bytes are parsed as hexadecimal values, even if - no "0x" prefix is added. If the keyword is not provided, then - the bytes are parsed as decimal values, unless a "0x" prefix - (for hexadecimal) or a "0" prefix (for octal) is provided. + If the **hex** keyword is provided in front of the bytes sequence, the + bytes are parsed as hexadecimal values, even if no "0x" prefix is added. If + the keyword is not provided, then the bytes are parsed as decimal values, + unless a "0x" prefix (for hexadecimal) or a "0" prefix (for octal) is + provided. - **bpftool map lookup** *MAP* [**key** *DATA*] - Lookup **key** in the map. +bpftool map lookup *MAP* [key *DATA*] + Lookup **key** in the map. - **bpftool map getnext** *MAP* [**key** *DATA*] - Get next key. If *key* is not specified, get first key. +bpftool map getnext *MAP* [key *DATA*] + Get next key. If *key* is not specified, get first key. - **bpftool map delete** *MAP* **key** *DATA* - Remove entry from the map. +bpftool map delete *MAP* key *DATA* + Remove entry from the map. - **bpftool map pin** *MAP* *FILE* - Pin map *MAP* as *FILE*. +bpftool map pin *MAP* *FILE* + Pin map *MAP* as *FILE*. - Note: *FILE* must be located in *bpffs* mount. It must not - contain a dot character ('.'), which is reserved for future - extensions of *bpffs*. + Note: *FILE* must be located in *bpffs* mount. It must not contain a dot + character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] - Read events from a **BPF_MAP_TYPE_PERF_EVENT_ARRAY** map. +bpftool map event_pipe *MAP* [cpu *N* index *M*] + Read events from a **BPF_MAP_TYPE_PERF_EVENT_ARRAY** map. - Install perf rings into a perf event array map and dump - output of any **bpf_perf_event_output**\ () call in the kernel. - By default read the number of CPUs on the system and - install perf ring for each CPU in the corresponding index - in the array. + Install perf rings into a perf event array map and dump output of any + **bpf_perf_event_output**\ () call in the kernel. By default read the + number of CPUs on the system and install perf ring for each CPU in the + corresponding index in the array. - If **cpu** and **index** are specified, install perf ring - for given **cpu** at **index** in the array (single ring). + If **cpu** and **index** are specified, install perf ring for given **cpu** + at **index** in the array (single ring). - Note that installing a perf ring into an array will silently - replace any existing ring. Any other application will stop - receiving events if it installed its rings earlier. + Note that installing a perf ring into an array will silently replace any + existing ring. Any other application will stop receiving events if it + installed its rings earlier. - **bpftool map peek** *MAP* - Peek next value in the queue or stack. +bpftool map peek *MAP* + Peek next value in the queue or stack. - **bpftool map push** *MAP* **value** *VALUE* - Push *VALUE* onto the stack. +bpftool map push *MAP* value *VALUE* + Push *VALUE* onto the stack. - **bpftool map pop** *MAP* - Pop and print value from the stack. +bpftool map pop *MAP* + Pop and print value from the stack. - **bpftool map enqueue** *MAP* **value** *VALUE* - Enqueue *VALUE* into the queue. +bpftool map enqueue *MAP* value *VALUE* + Enqueue *VALUE* into the queue. - **bpftool map dequeue** *MAP* - Dequeue and print value from the queue. +bpftool map dequeue *MAP* + Dequeue and print value from the queue. - **bpftool map freeze** *MAP* - Freeze the map as read-only from user space. Entries from a - frozen map can not longer be updated or deleted with the - **bpf**\ () system call. This operation is not reversible, - and the map remains immutable from user space until its - destruction. However, read and write permissions for BPF - programs to the map remain unchanged. +bpftool map freeze *MAP* + Freeze the map as read-only from user space. Entries from a frozen map can + not longer be updated or deleted with the **bpf**\ () system call. This + operation is not reversible, and the map remains immutable from user space + until its destruction. However, read and write permissions for BPF programs + to the map remain unchanged. - **bpftool map help** - Print short help message. +bpftool map help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst - -f, --bpffs - Show file names of pinned maps. +-f, --bpffs + Show file names of pinned maps. - -n, --nomount - Do not automatically attempt to mount any virtual file system - (such as tracefs or BPF virtual file system) when necessary. +-n, --nomount + Do not automatically attempt to mount any virtual file system (such as + tracefs or BPF virtual file system) when necessary. EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index f4e0a516335a..a9ed8992800f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -4,7 +4,7 @@ bpftool-net ================ ------------------------------------------------------------------------------- -tool for inspection of netdev/tc related bpf prog attachments +tool for inspection of networking related bpf prog attachments ------------------------------------------------------------------------------- :Manual section: 8 @@ -14,72 +14,76 @@ tool for inspection of netdev/tc related bpf prog attachments SYNOPSIS ======== - **bpftool** [*OPTIONS*] **net** *COMMAND* +**bpftool** [*OPTIONS*] **net** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } +*OPTIONS* := { |COMMON_OPTIONS| } - *COMMANDS* := - { **show** | **list** | **attach** | **detach** | **help** } +*COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } NET COMMANDS ============ -| **bpftool** **net** { **show** | **list** } [ **dev** *NAME* ] -| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] -| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* -| **bpftool** **net help** +| **bpftool** **net** { **show** | **list** } [ **dev** *NAME* ] +| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] +| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* +| **bpftool** **net help** | -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } +| *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** | **tcx_ingress** | **tcx_egress** } DESCRIPTION =========== - **bpftool net { show | list }** [ **dev** *NAME* ] - List bpf program attachments in the kernel networking subsystem. - - Currently, only device driver xdp attachments and tc filter - classification/action attachments are implemented, i.e., for - program types **BPF_PROG_TYPE_SCHED_CLS**, - **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. - For programs attached to a particular cgroup, e.g., - **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, - **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, - users can use **bpftool cgroup** to dump cgroup attachments. - For sk_{filter, skb, msg, reuseport} and lwt/seg6 - bpf programs, users should consult other tools, e.g., iproute2. - - The current output will start with all xdp program attachments, followed by - all tc class/qdisc bpf program attachments. Both xdp programs and - tc programs are ordered based on ifindex number. If multiple bpf - programs attached to the same networking device through **tc filter**, - the order will be first all bpf programs attached to tc classes, then - all bpf programs attached to non clsact qdiscs, and finally all - bpf programs attached to root and clsact qdisc. - - **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] - Attach bpf program *PROG* to network interface *NAME* with - type specified by *ATTACH_TYPE*. Previously attached bpf program - can be replaced by the command used with **overwrite** option. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. - - *ATTACH_TYPE* can be of: - **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; - **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; - **xdpdrv** - Native XDP. runs earliest point in driver's receive path; - **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; - - **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* - Detach bpf program attached to network interface *NAME* with - type specified by *ATTACH_TYPE*. To detach bpf program, same - *ATTACH_TYPE* previously used for attach must be specified. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. - - **bpftool net help** - Print short help message. +bpftool net { show | list } [ dev *NAME* ] + List bpf program attachments in the kernel networking subsystem. + + Currently, device driver xdp attachments, tcx, netkit and old-style tc + classifier/action attachments, flow_dissector as well as netfilter + attachments are implemented, i.e., for program types **BPF_PROG_TYPE_XDP**, + **BPF_PROG_TYPE_SCHED_CLS**, **BPF_PROG_TYPE_SCHED_ACT**, + **BPF_PROG_TYPE_FLOW_DISSECTOR**, **BPF_PROG_TYPE_NETFILTER**. + + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, users + can use **bpftool cgroup** to dump cgroup attachments. For sk_{filter, skb, + msg, reuseport} and lwt/seg6 bpf programs, users should consult other + tools, e.g., iproute2. + + The current output will start with all xdp program attachments, followed by + all tcx, netkit, then tc class/qdisc bpf program attachments, then + flow_dissector and finally netfilter programs. Both xdp programs and + tcx/netkit/tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc**, the order + will be first all bpf programs attached to tcx, netkit, then tc classes, + then all bpf programs attached to non clsact qdiscs, and finally all bpf + programs attached to root and clsact qdisc. + +bpftool net attach *ATTACH_TYPE* *PROG* dev *NAME* [ overwrite ] + Attach bpf program *PROG* to network interface *NAME* with type specified + by *ATTACH_TYPE*. Previously attached bpf program can be replaced by the + command used with **overwrite** option. Currently, only XDP-related modes + are supported for *ATTACH_TYPE*. + + *ATTACH_TYPE* can be of: + **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; + **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; + **xdpdrv** - Native XDP. runs earliest point in driver's receive path; + **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; + **tcx_ingress** - Ingress TCX. runs on ingress net traffic; + **tcx_egress** - Egress TCX. runs on egress net traffic; + +bpftool net detach *ATTACH_TYPE* dev *NAME* + Detach bpf program attached to network interface *NAME* with type specified + by *ATTACH_TYPE*. To detach bpf program, same *ATTACH_TYPE* previously used + for attach must be specified. Currently, only XDP-related modes are + supported for *ATTACH_TYPE*. + +bpftool net help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst EXAMPLES ======== @@ -176,3 +180,23 @@ EXAMPLES :: xdp: + +| +| **# bpftool net attach tcx_ingress name tc_prog dev lo** +| **# bpftool net** +| + +:: + + tc: + lo(1) tcx/ingress tc_prog prog_id 29 + +| +| **# bpftool net attach tcx_ingress name tc_prog dev lo** +| **# bpftool net detach tcx_ingress dev lo** +| **# bpftool net** +| + +:: + + tc: diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index 5fea633a82f1..8c1ae55be596 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -14,37 +14,37 @@ tool for inspection of perf related bpf prog attachments SYNOPSIS ======== - **bpftool** [*OPTIONS*] **perf** *COMMAND* +**bpftool** [*OPTIONS*] **perf** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } +*OPTIONS* := { |COMMON_OPTIONS| } - *COMMANDS* := - { **show** | **list** | **help** } +*COMMANDS* := +{ **show** | **list** | **help** } PERF COMMANDS ============= -| **bpftool** **perf** { **show** | **list** } -| **bpftool** **perf help** +| **bpftool** **perf** { **show** | **list** } +| **bpftool** **perf help** DESCRIPTION =========== - **bpftool perf { show | list }** - List all raw_tracepoint, tracepoint, kprobe attachment in the system. +bpftool perf { show | list } + List all raw_tracepoint, tracepoint, kprobe attachment in the system. - Output will start with process id and file descriptor in that process, - followed by bpf program id, attachment information, and attachment point. - The attachment point for raw_tracepoint/tracepoint is the trace probe name. - The attachment point for k[ret]probe is either symbol name and offset, - or a kernel virtual address. - The attachment point for u[ret]probe is the file name and the file offset. + Output will start with process id and file descriptor in that process, + followed by bpf program id, attachment information, and attachment point. + The attachment point for raw_tracepoint/tracepoint is the trace probe name. + The attachment point for k[ret]probe is either symbol name and offset, or a + kernel virtual address. The attachment point for u[ret]probe is the file + name and the file offset. - **bpftool perf help** - Print short help message. +bpftool perf help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 14de72544995..35aeeaf5f711 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -14,247 +14,251 @@ tool for inspection and simple manipulation of eBPF progs SYNOPSIS ======== - **bpftool** [*OPTIONS*] **prog** *COMMAND* +**bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| | - { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | - { **-L** | **--use-loader** } } +*OPTIONS* := { |COMMON_OPTIONS| | +{ **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | +{ **-L** | **--use-loader** } | [ { **-S** | **--sign** } **-k** <private_key.pem> **-i** <certificate.x509> ] } - *COMMANDS* := - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | - **loadall** | **help** } +*COMMANDS* := +{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | +**loadall** | **help** } PROG COMMANDS ============= -| **bpftool** **prog** { **show** | **list** } [*PROG*] -| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] -| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] -| **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] -| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] -| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] -| **bpftool** **prog tracelog** -| **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] -| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* -| **bpftool** **prog help** +| **bpftool** **prog** { **show** | **list** } [*PROG*] +| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] +| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] +| **bpftool** **prog pin** *PROG* *FILE* +| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] [**kernel_btf** *BTF_FILE*] +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog tracelog** +| **bpftool** **prog tracelog** [ { **stdout** | **stderr** } *PROG* ] +| **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] +| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* +| **bpftool** **prog help** | -| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } -| *TYPE* := { -| **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | -| **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** | -| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | -| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | -| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | -| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | -| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | -| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | -| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** -| } -| *ATTACH_TYPE* := { -| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** | -| **sk_skb_stream_parser** | **flow_dissector** -| } -| *METRICs* := { -| **cycles** | **instructions** | **l1d_loads** | **llc_misses** | -| **itlb_misses** | **dtlb_misses** -| } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } +| *TYPE* := { +| **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | +| **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** | +| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | +| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | +| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | +| **cgroup/connect4** | **cgroup/connect6** | **cgroup/connect_unix** | +| **cgroup/getpeername4** | **cgroup/getpeername6** | **cgroup/getpeername_unix** | +| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/getsockname_unix** | +| **cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** | +| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** | +| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | +| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** +| } +| *ATTACH_TYPE* := { +| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** | +| **sk_skb_stream_parser** | **flow_dissector** +| } +| *METRICs* := { +| **cycles** | **instructions** | **l1d_loads** | **llc_misses** | +| **itlb_misses** | **dtlb_misses** +| } DESCRIPTION =========== - **bpftool prog { show | list }** [*PROG*] - Show information about loaded programs. If *PROG* is - specified show information only about given programs, - otherwise list all programs currently loaded on the system. - In case of **tag** or **name**, *PROG* may match several - programs which will all be shown. - - Output will start with program ID followed by program type and - zero or more named attributes (depending on kernel version). - - Since Linux 5.1 the kernel can collect statistics on BPF - programs (such as the total time spent running the program, - and the number of times it was run). If available, bpftool - shows such statistics. However, the kernel does not collect - them by defaults, as it slightly impacts performance on each - program run. Activation or deactivation of the feature is - performed via the **kernel.bpf_stats_enabled** sysctl knob. - - Since Linux 5.8 bpftool is able to discover information about - processes that hold open file descriptors (FDs) against BPF - programs. On such kernels bpftool will automatically emit this - information as well. - - **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] - Dump eBPF instructions of the programs from the kernel. By - default, eBPF will be disassembled and printed to standard - output in human-readable format. In this case, **opcodes** - controls if raw opcodes should be printed as well. - - In case of **tag** or **name**, *PROG* may match several - programs which will all be dumped. However, if **file** or - **visual** is specified, *PROG* must match a single program. - - If **file** is specified, the binary image will instead be - written to *FILE*. - - If **visual** is specified, control flow graph (CFG) will be - built instead, and eBPF instructions will be presented with - CFG in DOT format, on standard output. - - If the programs have line_info available, the source line will - be displayed by default. If **linum** is specified, - the filename, line number and line column will also be - displayed on top of the source line. - - **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] - Dump jited image (host machine code) of the program. - - If *FILE* is specified image will be written to a file, - otherwise it will be disassembled and printed to stdout. - *PROG* must match a single program when **file** is specified. - - **opcodes** controls if raw opcodes will be printed. - - If the prog has line_info available, the source line will - be displayed by default. If **linum** is specified, - the filename, line number and line column will also be - displayed on top of the source line. - - **bpftool prog pin** *PROG* *FILE* - Pin program *PROG* as *FILE*. - - Note: *FILE* must be located in *bpffs* mount. It must not - contain a dot character ('.'), which is reserved for future - extensions of *bpffs*. - - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] - Load bpf program(s) from binary *OBJ* and pin as *PATH*. - **bpftool prog load** pins only the first program from the - *OBJ* as *PATH*. **bpftool prog loadall** pins all programs - from the *OBJ* under *PATH* directory. - **type** is optional, if not specified program type will be - inferred from section names. - By default bpftool will create new maps as declared in the ELF - object being loaded. **map** parameter allows for the reuse - of existing maps. It can be specified multiple times, each - time for a different map. *IDX* refers to index of the map - to be replaced in the ELF file counting from 0, while *NAME* - allows to replace a map by name. *MAP* specifies the map to - use, referring to it by **id** or through a **pinned** file. - If **dev** *NAME* is specified program will be loaded onto - given networking device (offload). - Optional **pinmaps** argument can be provided to pin all - maps under *MAP_DIR* directory. - - If **autoattach** is specified program will be attached - before pin. In that case, only the link (representing the - program attached to its hook) is pinned, not the program as - such, so the path won't show in **bpftool prog show -f**, - only show in **bpftool link show -f**. Also, this only works - when bpftool (libbpf) is able to infer all necessary - information from the object file, in particular, it's not - supported for all program types. If a program does not - support autoattach, bpftool falls back to regular pinning - for that program instead. - - Note: *PATH* must be located in *bpffs* mount. It must not - contain a dot character ('.'), which is reserved for future - extensions of *bpffs*. - - **bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*] - Attach bpf program *PROG* (with type specified by - *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* - parameter, with the exception of *flow_dissector* which is - attached to current networking name space. - - **bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*] - Detach bpf program *PROG* (with type specified by - *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* - parameter, with the exception of *flow_dissector* which is - detached from the current networking name space. - - **bpftool prog tracelog** - Dump the trace pipe of the system to the console (stdout). - Hit <Ctrl+C> to stop printing. BPF programs can write to this - trace pipe at runtime with the **bpf_trace_printk**\ () helper. - This should be used only for debugging purposes. For - streaming data from BPF programs to user space, one can use - perf events (see also **bpftool-map**\ (8)). - - **bpftool prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] - Run BPF program *PROG* in the kernel testing infrastructure - for BPF, meaning that the program works on the data and - context provided by the user, and not on actual packets or - monitored functions etc. Return value and duration for the - test run are printed out to the console. - - Input data is read from the *FILE* passed with **data_in**. - If this *FILE* is "**-**", input data is read from standard - input. Input context, if any, is read from *FILE* passed with - **ctx_in**. Again, "**-**" can be used to read from standard - input, but only if standard input is not already in use for - input data. If a *FILE* is passed with **data_out**, output - data is written to that file. Similarly, output context is - written to the *FILE* passed with **ctx_out**. For both - output flows, "**-**" can be used to print to the standard - output (as plain text, or JSON if relevant option was - passed). If output keywords are omitted, output data and - context are discarded. Keywords **data_size_out** and - **ctx_size_out** are used to pass the size (in bytes) for the - output buffers to the kernel, although the default of 32 kB - should be more than enough for most cases. - - Keyword **repeat** is used to indicate the number of - consecutive runs to perform. Note that output data and - context printed to files correspond to the last of those - runs. The duration printed out at the end of the runs is an - average over all runs performed by the command. - - Not all program types support test run. Among those which do, - not all of them can take the **ctx_in**/**ctx_out** - arguments. bpftool does not perform checks on program types. - - **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs* - Profile *METRICs* for bpf program *PROG* for *DURATION* - seconds or until user hits <Ctrl+C>. *DURATION* is optional. - If *DURATION* is not specified, the profiling will run up to - **UINT_MAX** seconds. - - **bpftool prog help** - Print short help message. +bpftool prog { show | list } [*PROG*] + Show information about loaded programs. If *PROG* is specified show + information only about given programs, otherwise list all programs + currently loaded on the system. In case of **tag** or **name**, *PROG* may + match several programs which will all be shown. + + Output will start with program ID followed by program type and zero or more + named attributes (depending on kernel version). + + Since Linux 5.1 the kernel can collect statistics on BPF programs (such as + the total time spent running the program, and the number of times it was + run). If available, bpftool shows such statistics. However, the kernel does + not collect them by defaults, as it slightly impacts performance on each + program run. Activation or deactivation of the feature is performed via the + **kernel.bpf_stats_enabled** sysctl knob. + + Since Linux 5.8 bpftool is able to discover information about processes + that hold open file descriptors (FDs) against BPF programs. On such kernels + bpftool will automatically emit this information as well. + +bpftool prog dump xlated *PROG* [{ file *FILE* | [opcodes] [linum] [visual] }] + Dump eBPF instructions of the programs from the kernel. By default, eBPF + will be disassembled and printed to standard output in human-readable + format. In this case, **opcodes** controls if raw opcodes should be printed + as well. + + In case of **tag** or **name**, *PROG* may match several programs which + will all be dumped. However, if **file** or **visual** is specified, + *PROG* must match a single program. + + If **file** is specified, the binary image will instead be written to + *FILE*. + + If **visual** is specified, control flow graph (CFG) will be built instead, + and eBPF instructions will be presented with CFG in DOT format, on standard + output. + + If the programs have line_info available, the source line will be + displayed. If **linum** is specified, the filename, line number and line + column will also be displayed. + +bpftool prog dump jited *PROG* [{ file *FILE* | [opcodes] [linum] }] + Dump jited image (host machine code) of the program. + + If *FILE* is specified image will be written to a file, otherwise it will + be disassembled and printed to stdout. *PROG* must match a single program + when **file** is specified. + + **opcodes** controls if raw opcodes will be printed. + + If the prog has line_info available, the source line will be displayed. If + **linum** is specified, the filename, line number and line column will also + be displayed. + +bpftool prog pin *PROG* *FILE* + Pin program *PROG* as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. It must not contain a dot + character ('.'), which is reserved for future extensions of *bpffs*. + +bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach] [kernel_btf *BTF_FILE*] + Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog + load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog + loadall** pins all programs from the *OBJ* under *PATH* directory. **type** + is optional, if not specified program type will be inferred from section + names. By default bpftool will create new maps as declared in the ELF + object being loaded. **map** parameter allows for the reuse of existing + maps. It can be specified multiple times, each time for a different map. + *IDX* refers to index of the map to be replaced in the ELF file counting + from 0, while *NAME* allows to replace a map by name. *MAP* specifies the + map to use, referring to it by **id** or through a **pinned** file. If + **offload_dev** *NAME* is specified program will be loaded onto given + networking device (offload). If **xdpmeta_dev** *NAME* is specified program + will become device-bound without offloading, this facilitates access to XDP + metadata. Optional **pinmaps** argument can be provided to pin all maps + under *MAP_DIR* directory. + + If **autoattach** is specified program will be attached before pin. In that + case, only the link (representing the program attached to its hook) is + pinned, not the program as such, so the path won't show in **bpftool prog + show -f**, only show in **bpftool link show -f**. Also, this only works + when bpftool (libbpf) is able to infer all necessary information from the + object file, in particular, it's not supported for all program types. If a + program does not support autoattach, bpftool falls back to regular pinning + for that program instead. + + The **kernel_btf** option allows specifying an external BTF file to replace + the system's own vmlinux BTF file for CO-RE relocations. Note that any + other feature relying on BTF (such as fentry/fexit programs, struct_ops) + requires the BTF file for the actual kernel running on the host, often + exposed at /sys/kernel/btf/vmlinux. + + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot + character ('.'), which is reserved for future extensions of *bpffs*. + +bpftool prog attach *PROG* *ATTACH_TYPE* [*MAP*] + Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*). Most + *ATTACH_TYPEs* require a *MAP* parameter, with the exception of + *flow_dissector* which is attached to current networking name space. + +bpftool prog detach *PROG* *ATTACH_TYPE* [*MAP*] + Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*). Most + *ATTACH_TYPEs* require a *MAP* parameter, with the exception of + *flow_dissector* which is detached from the current networking name space. + +bpftool prog tracelog + Dump the trace pipe of the system to the console (stdout). Hit <Ctrl+C> to + stop printing. BPF programs can write to this trace pipe at runtime with + the **bpf_trace_printk**\ () helper. This should be used only for debugging + purposes. For streaming data from BPF programs to user space, one can use + perf events (see also **bpftool-map**\ (8)). + +bpftool prog tracelog { stdout | stderr } *PROG* + Dump the BPF stream of the program. BPF programs can write to these streams + at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write + error messages to the standard error stream. This facility should be used + only for debugging purposes. + +bpftool prog run *PROG* data_in *FILE* [data_out *FILE* [data_size_out *L*]] [ctx_in *FILE* [ctx_out *FILE* [ctx_size_out *M*]]] [repeat *N*] + Run BPF program *PROG* in the kernel testing infrastructure for BPF, + meaning that the program works on the data and context provided by the + user, and not on actual packets or monitored functions etc. Return value + and duration for the test run are printed out to the console. + + Input data is read from the *FILE* passed with **data_in**. If this *FILE* + is "**-**", input data is read from standard input. Input context, if any, + is read from *FILE* passed with **ctx_in**. Again, "**-**" can be used to + read from standard input, but only if standard input is not already in use + for input data. If a *FILE* is passed with **data_out**, output data is + written to that file. Similarly, output context is written to the *FILE* + passed with **ctx_out**. For both output flows, "**-**" can be used to + print to the standard output (as plain text, or JSON if relevant option was + passed). If output keywords are omitted, output data and context are + discarded. Keywords **data_size_out** and **ctx_size_out** are used to pass + the size (in bytes) for the output buffers to the kernel, although the + default of 32 kB should be more than enough for most cases. + + Keyword **repeat** is used to indicate the number of consecutive runs to + perform. Note that output data and context printed to files correspond to + the last of those runs. The duration printed out at the end of the runs is + an average over all runs performed by the command. + + Not all program types support test run. Among those which do, not all of + them can take the **ctx_in**/**ctx_out** arguments. bpftool does not + perform checks on program types. + +bpftool prog profile *PROG* [duration *DURATION*] *METRICs* + Profile *METRICs* for bpf program *PROG* for *DURATION* seconds or until + user hits <Ctrl+C>. *DURATION* is optional. If *DURATION* is not specified, + the profiling will run up to **UINT_MAX** seconds. + +bpftool prog help + Print short help message. OPTIONS ======= - .. include:: common_options.rst - - -f, --bpffs - When showing BPF programs, show file names of pinned - programs. - - -m, --mapcompat - Allow loading maps with unknown map definitions. - - -n, --nomount - Do not automatically attempt to mount any virtual file system - (such as tracefs or BPF virtual file system) when necessary. - - -L, --use-loader - Load program as a "loader" program. This is useful to debug - the generation of such programs. When this option is in - use, bpftool attempts to load the programs from the object - file into the kernel, but does not pin them (therefore, the - *PATH* must not be provided). - - When combined with the **-d**\ \|\ **--debug** option, - additional debug messages are generated, and the execution - of the loader program will use the **bpf_trace_printk**\ () - helper to log each step of loading BTF, creating the maps, - and loading the programs (see **bpftool prog tracelog** as - a way to dump those messages). +.. include:: common_options.rst + +-f, --bpffs + When showing BPF programs, show file names of pinned programs. + +-m, --mapcompat + Allow loading maps with unknown map definitions. + +-n, --nomount + Do not automatically attempt to mount any virtual file system (such as + tracefs or BPF virtual file system) when necessary. + +-L, --use-loader + Load program as a "loader" program. This is useful to debug the generation + of such programs. When this option is in use, bpftool attempts to load the + programs from the object file into the kernel, but does not pin them + (therefore, the *PATH* must not be provided). + + When combined with the **-d**\ \|\ **--debug** option, additional debug + messages are generated, and the execution of the loader program will use + the **bpf_trace_printk**\ () helper to log each step of loading BTF, + creating the maps, and loading the programs (see **bpftool prog tracelog** + as a way to dump those messages). + +-S, --sign + Enable signing of the BPF program before loading. This option must be + used with **-k** and **-i**. Using this flag implicitly enables + **--use-loader**. + +-k <private_key.pem> + Path to the private key file in PEM format, required when signing. + +-i <certificate.x509> + Path to the X.509 certificate file in PEM or DER format, required when + signing. EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index ee53a122c0c7..e871b9539ac7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -14,57 +14,60 @@ tool to register/unregister/introspect BPF struct_ops SYNOPSIS ======== - **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* +**bpftool** [*OPTIONS*] **struct_ops** *COMMAND* - *OPTIONS* := { |COMMON_OPTIONS| } +*OPTIONS* := { |COMMON_OPTIONS| } - *COMMANDS* := - { **show** | **list** | **dump** | **register** | **unregister** | **help** } +*COMMANDS* := +{ **show** | **list** | **dump** | **register** | **unregister** | **help** } STRUCT_OPS COMMANDS =================== -| **bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*] -| **bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*] -| **bpftool** **struct_ops register** *OBJ* -| **bpftool** **struct_ops unregister** *STRUCT_OPS_MAP* -| **bpftool** **struct_ops help** +| **bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*] +| **bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*] +| **bpftool** **struct_ops register** *OBJ* [*LINK_DIR*] +| **bpftool** **struct_ops unregister** *STRUCT_OPS_MAP* +| **bpftool** **struct_ops help** | -| *STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* } -| *OBJ* := /a/file/of/bpf_struct_ops.o +| *STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* } +| *OBJ* := /a/file/of/bpf_struct_ops.o DESCRIPTION =========== - **bpftool struct_ops { show | list }** [*STRUCT_OPS_MAP*] - Show brief information about the struct_ops in the system. - If *STRUCT_OPS_MAP* is specified, it shows information only - for the given struct_ops. Otherwise, it lists all struct_ops - currently existing in the system. - - Output will start with struct_ops map ID, followed by its map - name and its struct_ops's kernel type. - - **bpftool struct_ops dump** [*STRUCT_OPS_MAP*] - Dump details information about the struct_ops in the system. - If *STRUCT_OPS_MAP* is specified, it dumps information only - for the given struct_ops. Otherwise, it dumps all struct_ops - currently existing in the system. - - **bpftool struct_ops register** *OBJ* - Register bpf struct_ops from *OBJ*. All struct_ops under - the ELF section ".struct_ops" will be registered to - its kernel subsystem. - - **bpftool struct_ops unregister** *STRUCT_OPS_MAP* - Unregister the *STRUCT_OPS_MAP* from the kernel subsystem. - - **bpftool struct_ops help** - Print short help message. +bpftool struct_ops { show | list } [*STRUCT_OPS_MAP*] + Show brief information about the struct_ops in the system. If + *STRUCT_OPS_MAP* is specified, it shows information only for the given + struct_ops. Otherwise, it lists all struct_ops currently existing in the + system. + + Output will start with struct_ops map ID, followed by its map name and its + struct_ops's kernel type. + +bpftool struct_ops dump [*STRUCT_OPS_MAP*] + Dump details information about the struct_ops in the system. If + *STRUCT_OPS_MAP* is specified, it dumps information only for the given + struct_ops. Otherwise, it dumps all struct_ops currently existing in the + system. + +bpftool struct_ops register *OBJ* [*LINK_DIR*] + Register bpf struct_ops from *OBJ*. All struct_ops under the ELF section + ".struct_ops" and ".struct_ops.link" will be registered to its kernel + subsystem. For each struct_ops in the ".struct_ops.link" section, a link + will be created. You can give *LINK_DIR* to provide a directory path where + these links will be pinned with the same name as their corresponding map + name. + +bpftool struct_ops unregister *STRUCT_OPS_MAP* + Unregister the *STRUCT_OPS_MAP* from the kernel subsystem. + +bpftool struct_ops help + Print short help message. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-token.rst b/tools/bpf/bpftool/Documentation/bpftool-token.rst new file mode 100644 index 000000000000..d082c499cfe3 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-token.rst @@ -0,0 +1,64 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +================ +bpftool-token +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF tokens +------------------------------------------------------------------------------- + +:Manual section: 8 + +.. include:: substitutions.rst + +SYNOPSIS +======== + +**bpftool** [*OPTIONS*] **token** *COMMAND* + +*OPTIONS* := { |COMMON_OPTIONS| } + +*COMMANDS* := { **show** | **list** | **help** } + +TOKEN COMMANDS +=============== + +| **bpftool** **token** { **show** | **list** } +| **bpftool** **token help** +| + +DESCRIPTION +=========== +bpftool token { show | list } + List BPF token information for each *bpffs* mount point containing token + information on the system. Information include mount point path, allowed + **bpf**\ () system call commands, maps, programs, and attach types for the + token. + +bpftool prog help + Print short help message. + +OPTIONS +======== +.. include:: common_options.rst + +EXAMPLES +======== +| +| **# mkdir -p /sys/fs/bpf/token** +| **# mount -t bpf bpffs /sys/fs/bpf/token** \ +| **-o delegate_cmds=prog_load:map_create** \ +| **-o delegate_progs=kprobe** \ +| **-o delegate_attachs=xdp** +| **# bpftool token list** + +:: + + token_info /sys/fs/bpf/token + allowed_cmds: + map_create prog_load + allowed_maps: + allowed_progs: + kprobe + allowed_attachs: + xdp diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6965c94dfdaf..f38ae5c40439 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -14,57 +14,57 @@ tool for inspection and simple manipulation of eBPF programs and maps SYNOPSIS ======== - **bpftool** [*OPTIONS*] *OBJECT* { *COMMAND* | **help** } +**bpftool** [*OPTIONS*] *OBJECT* { *COMMAND* | **help** } - **bpftool** **batch file** *FILE* +**bpftool** **batch file** *FILE* - **bpftool** **version** +**bpftool** **version** - *OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** | - **btf** | **gen** | **struct_ops** | **iter** } +*OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** | +**btf** | **gen** | **struct_ops** | **iter** } - *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } +*OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } - *MAP-COMMANDS* := - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | - **delete** | **pin** | **event_pipe** | **help** } +*MAP-COMMANDS* := +{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | +**delete** | **pin** | **event_pipe** | **help** } - *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | - **load** | **attach** | **detach** | **help** } +*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | +**load** | **attach** | **detach** | **help** } - *LINK-COMMANDS* := { **show** | **list** | **pin** | **detach** | **help** } +*LINK-COMMANDS* := { **show** | **list** | **pin** | **detach** | **help** } - *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } +*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } - *PERF-COMMANDS* := { **show** | **list** | **help** } +*PERF-COMMANDS* := { **show** | **list** | **help** } - *NET-COMMANDS* := { **show** | **list** | **help** } +*NET-COMMANDS* := { **show** | **list** | **help** } - *FEATURE-COMMANDS* := { **probe** | **help** } +*FEATURE-COMMANDS* := { **probe** | **help** } - *BTF-COMMANDS* := { **show** | **list** | **dump** | **help** } +*BTF-COMMANDS* := { **show** | **list** | **dump** | **help** } - *GEN-COMMANDS* := { **object** | **skeleton** | **min_core_btf** | **help** } +*GEN-COMMANDS* := { **object** | **skeleton** | **min_core_btf** | **help** } - *STRUCT-OPS-COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } +*STRUCT-OPS-COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } - *ITER-COMMANDS* := { **pin** | **help** } +*ITER-COMMANDS* := { **pin** | **help** } DESCRIPTION =========== - *bpftool* allows for inspection and simple modification of BPF objects - on the system. +*bpftool* allows for inspection and simple modification of BPF objects on the +system. - Note that format of the output of all tools is not guaranteed to be - stable and should not be depended upon. +Note that format of the output of all tools is not guaranteed to be stable and +should not be depended upon. OPTIONS ======= - .. include:: common_options.rst +.. include:: common_options.rst - -m, --mapcompat - Allow loading maps with unknown map definitions. +-m, --mapcompat + Allow loading maps with unknown map definitions. - -n, --nomount - Do not automatically attempt to mount any virtual file system - (such as tracefs or BPF virtual file system) when necessary. +-n, --nomount + Do not automatically attempt to mount any virtual file system (such as + tracefs or BPF virtual file system) when necessary. diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 30df7a707f02..9234b9dab768 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -1,25 +1,23 @@ .. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -h, --help - Print short help message (similar to **bpftool help**). + Print short help message (similar to **bpftool help**). -V, --version - Print bpftool's version number (similar to **bpftool version**), the - number of the libbpf version in use, and optional features that were - included when bpftool was compiled. Optional features include linking - against LLVM or libbfd to provide the disassembler for JIT-ted - programs (**bpftool prog dump jited**) and usage of BPF skeletons - (some features like **bpftool prog profile** or showing pids - associated to BPF objects may rely on it). + Print bpftool's version number (similar to **bpftool version**), the number + of the libbpf version in use, and optional features that were included when + bpftool was compiled. Optional features include linking against LLVM or + libbfd to provide the disassembler for JIT-ted programs (**bpftool prog + dump jited**) and usage of BPF skeletons (some features like **bpftool prog + profile** or showing pids associated to BPF objects may rely on it). -j, --json - Generate JSON output. For commands that cannot produce JSON, this - option has no effect. + Generate JSON output. For commands that cannot produce JSON, this option + has no effect. -p, --pretty - Generate human-readable JSON output. Implies **-j**. + Generate human-readable JSON output. Implies **-j**. -d, --debug - Print all logs available, even debug-level information. This includes - logs from libbpf as well as from the verifier, when attempting to - load programs. + Print all logs available, even debug-level information. This includes logs + from libbpf as well as from the verifier, when attempting to load programs. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f610e184ce02..586d1b2595d1 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) @@ -53,7 +47,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) $@ install_headers + ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" $@ install_headers $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) $(call QUIET_INSTALL, $@) @@ -71,7 +65,12 @@ prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions CFLAGS += -O2 -CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers +CFLAGS += -W +CFLAGS += -Wall +CFLAGS += -Wextra +CFLAGS += -Wformat-signedness +CFLAGS += -Wno-unused-parameter +CFLAGS += -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(or $(OUTPUT),.) \ @@ -89,6 +88,10 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif +HOST_CFLAGS := $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ + $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) +HOST_LDFLAGS := $(LDFLAGS) + INSTALL ?= install RM ?= rm -f @@ -102,6 +105,7 @@ FEATURE_TESTS += libbfd-liberty FEATURE_TESTS += libbfd-liberty-z FEATURE_TESTS += disassembler-four-args FEATURE_TESTS += disassembler-init-styled +FEATURE_TESTS += libelf-zstd FEATURE_DISPLAY := clang-bpf-co-re FEATURE_DISPLAY += llvm @@ -126,8 +130,14 @@ include $(FEATURES_DUMP) endif endif -LIBS = $(LIBBPF) -lelf -lz -LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz +LIBS = $(LIBBPF) -lelf -lz -lcrypto +LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto + +ifeq ($(feature-libelf-zstd),1) +LIBS += -lzstd +LIBS_BOOTSTRAP += -lzstd +endif + ifeq ($(feature-libcap), 1) CFLAGS += -DUSE_LIBCAP LIBS += -lcap @@ -143,7 +153,11 @@ ifeq ($(feature-llvm),1) # If LLVM is available, use it for JIT disassembly CFLAGS += -DHAVE_LLVM_SUPPORT LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets - CFLAGS += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + # llvm-config always adds -D_GNU_SOURCE, however, it may already be in CFLAGS + # (e.g. when bpftool build is called from selftests build as selftests + # Makefile includes lib.mk which sets -D_GNU_SOURCE) which would cause + # compilation error due to redefinition. Let's filter it out here. + CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags)) LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) ifeq ($(shell $(LLVM_CONFIG) --shared-mode),static) LIBS += $(shell $(LLVM_CONFIG) --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) @@ -178,12 +192,9 @@ ifeq ($(filter -DHAVE_LLVM_SUPPORT -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),) SRCS := $(filter-out jit_disasm.c,$(SRCS)) endif -HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ - $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) - BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool -BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) +BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o) $(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o @@ -203,10 +214,11 @@ ifeq ($(feature-clang-bpf-co-re),1) BUILD_BPF_SKELS := 1 -$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) ifeq ($(VMLINUX_H),) +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ else +$(OUTPUT)vmlinux.h: $(VMLINUX_H) $(Q)cp "$(VMLINUX_H)" $@ endif @@ -215,7 +227,8 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ - -g -O2 -Wall -target bpf -c $< -o $@ + -g -O2 -Wall -fno-stack-protector \ + --target=bpf -c $< -o $@ $(Q)$(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) @@ -230,14 +243,11 @@ endif CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) -$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ - $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) - $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ + $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(HOST_LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ @@ -293,3 +303,6 @@ FORCE: .PHONY: all FORCE bootstrap clean install-bin install uninstall .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 35f26f7c1124..53bcfeb1a76e 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -106,19 +106,19 @@ _bpftool_get_link_ids() _bpftool_get_obj_map_names() { - local obj + local obj maps obj=$1 - maps=$(objdump -j maps -t $obj 2>/dev/null | \ - command awk '/g . maps/ {print $NF}') + maps=$(objdump -j .maps -t $obj 2>/dev/null | \ + command awk '/g . .maps/ {print $NF}') COMPREPLY+=( $( compgen -W "$maps" -- "$cur" ) ) } _bpftool_get_obj_map_idxs() { - local obj + local obj nmaps obj=$1 @@ -136,7 +136,7 @@ _sysfs_get_netdevs() # Retrieve type of the map that we are operating on. _bpftool_map_guess_map_type() { - local keyword ref + local keyword idx ref="" for (( idx=3; idx < ${#words[@]}-1; idx++ )); do case "${words[$((idx-2))]}" in lookup|update) @@ -255,31 +255,35 @@ _bpftool_map_update_get_name() _bpftool() { - local cur prev words objword - _init_completion || return + local cur prev words cword comp_args + local json=0 + _init_completion -- "$@" || return # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf' + --use-loader --base-btf --sign -i -k' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi + if _bpftool_search_list -j --json -p --pretty; then + json=1 + fi # Deal with simplest keywords case $prev in - help|hex|opcodes|visual|linum) + help|hex) return 0 ;; tag) _bpftool_get_prog_tags return 0 ;; - dev) + dev|offload_dev|xdpmeta_dev) _sysfs_get_netdevs return 0 ;; - file|pinned|-B|--base-btf) + file|pinned|-B|--base-btf|-i|-k) _filedir return 0 ;; @@ -290,21 +294,29 @@ _bpftool() esac # Remove all options so completions don't have to deal with them. - local i + local i pprev for (( i=1; i < ${#words[@]}; )); do - if [[ ${words[i]::1} == - ]] && - [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then - words=( "${words[@]:0:i}" "${words[@]:i+1}" ) - [[ $i -le $cword ]] && cword=$(( cword - 1 )) - else - i=$(( ++i )) - fi + case ${words[i]} in + # Remove option and its argument + -B|--base-btf|-i|-k) + words=( "${words[@]:0:i}" "${words[@]:i+2}" ) + [[ $i -le $(($cword + 1)) ]] && cword=$(( cword - 2 )) + ;; + # No argument, remove option only + -*) + words=( "${words[@]:0:i}" "${words[@]:i+1}" ) + [[ $i -le $cword ]] && cword=$(( cword - 1 )) + ;; + *) + i=$(( ++i )) + ;; + esac done cur=${words[cword]} prev=${words[cword - 1]} pprev=${words[cword - 2]} - local object=${words[1]} command=${words[2]} + local object=${words[1]} if [[ -z $object || $cword -eq 1 ]]; then case $cur in @@ -321,8 +333,12 @@ _bpftool() esac fi + local command=${words[2]} [[ $command == help ]] && return 0 + local MAP_TYPE='id pinned name' + local PROG_TYPE='id pinned tag name' + # Completion depends on object and command in use case $object in prog) @@ -343,8 +359,6 @@ _bpftool() ;; esac - local PROG_TYPE='id pinned tag name' - local MAP_TYPE='id pinned name' local METRIC_TYPE='cycles instructions l1d_loads llc_misses \ itlb_misses dtlb_misses' case $command in @@ -366,13 +380,16 @@ _bpftool() return 0 ;; *) - _bpftool_once_attr 'file' - if _bpftool_search_list 'xlated'; then - COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ - "$cur" ) ) - else - COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ - "$cur" ) ) + # "file" is not compatible with other keywords here + if _bpftool_search_list 'file'; then + return 0 + fi + if ! _bpftool_search_list 'linum opcodes visual'; then + _bpftool_once_attr 'file' + fi + _bpftool_once_attr 'linum opcodes' + if _bpftool_search_list 'xlated' && [[ "$json" == 0 ]]; then + _bpftool_once_attr 'visual' fi return 0 ;; @@ -451,7 +468,7 @@ _bpftool() obj=${words[3]} if [[ ${words[-4]} == "map" ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) return 0 fi if [[ ${words[-3]} == "map" ]]; then @@ -474,13 +491,13 @@ _bpftool() action tracepoint raw_tracepoint \ xdp perf_event cgroup/skb cgroup/sock \ cgroup/dev lwt_in lwt_out lwt_xmit \ - lwt_seg6local sockops sk_skb sk_msg \ - lirc_mode2 cgroup/bind4 cgroup/bind6 \ - cgroup/connect4 cgroup/connect6 \ - cgroup/getpeername4 cgroup/getpeername6 \ - cgroup/getsockname4 cgroup/getsockname6 \ - cgroup/sendmsg4 cgroup/sendmsg6 \ - cgroup/recvmsg4 cgroup/recvmsg6 \ + lwt_seg6local sockops sk_skb sk_msg lirc_mode2 \ + cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 cgroup/connect_unix \ + cgroup/getpeername4 cgroup/getpeername6 cgroup/getpeername_unix \ + cgroup/getsockname4 cgroup/getsockname6 cgroup/getsockname_unix \ + cgroup/sendmsg4 cgroup/sendmsg6 cgroup/sendmsg_unix \ + cgroup/recvmsg4 cgroup/recvmsg6 cgroup/recvmsg_unix \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ cgroup/setsockopt cgroup/sock_release struct_ops \ @@ -496,22 +513,34 @@ _bpftool() _bpftool_get_map_names return 0 ;; - pinned|pinmaps) + pinned|pinmaps|kernel_btf) _filedir return 0 ;; *) COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) - _bpftool_once_attr 'type' - _bpftool_once_attr 'dev' - _bpftool_once_attr 'pinmaps' - _bpftool_once_attr 'autoattach' + _bpftool_once_attr 'type pinmaps autoattach kernel_btf' + _bpftool_one_of_list 'offload_dev xdpmeta_dev' return 0 ;; esac ;; tracelog) - return 0 + case $prev in + $command) + COMPREPLY+=( $( compgen -W "stdout stderr" -- \ + "$cur" ) ) + return 0 + ;; + stdout|stderr) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + *) + return 0 + ;; + esac ;; profile) case $cword in @@ -537,20 +566,9 @@ _bpftool() COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) ) return 0 ;; - 6) - case $prev in - duration) - return 0 - ;; - *) - COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) - return 0 - ;; - esac - return 0 - ;; *) - COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + [[ $prev == duration ]] && return 0 + _bpftool_once_attr "$METRIC_TYPE" return 0 ;; esac @@ -608,7 +626,7 @@ _bpftool() return 0 ;; register) - _filedir + [[ $prev == $command ]] && _filedir return 0 ;; *) @@ -634,9 +652,12 @@ _bpftool() pinned) _filedir ;; - *) + map) _bpftool_one_of_list $MAP_TYPE ;; + *) + _bpftool_once_attr 'map' + ;; esac return 0 ;; @@ -648,7 +669,6 @@ _bpftool() esac ;; map) - local MAP_TYPE='id pinned name' case $command in show|list|dump|peek|pop|dequeue|freeze) case $prev in @@ -730,16 +750,10 @@ _bpftool() esac ;; *) - _bpftool_once_attr 'type' - _bpftool_once_attr 'key' - _bpftool_once_attr 'value' - _bpftool_once_attr 'entries' - _bpftool_once_attr 'name' - _bpftool_once_attr 'flags' + _bpftool_once_attr 'type key value entries name flags offload_dev' if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then _bpftool_once_attr 'inner_map' fi - _bpftool_once_attr 'dev' return 0 ;; esac @@ -795,13 +809,11 @@ _bpftool() # map, depending on the type of the map to update. case "$(_bpftool_map_guess_map_type)" in array_of_maps|hash_of_maps) - local MAP_TYPE='id pinned name' COMPREPLY+=( $( compgen -W "$MAP_TYPE" \ -- "$cur" ) ) return 0 ;; prog_array) - local PROG_TYPE='id pinned tag name' COMPREPLY+=( $( compgen -W "$PROG_TYPE" \ -- "$cur" ) ) return 0 @@ -823,7 +835,7 @@ _bpftool() esac _bpftool_once_attr 'key' - local UPDATE_FLAGS='any exist noexist' + local UPDATE_FLAGS='any exist noexist' idx for (( idx=3; idx < ${#words[@]}-1; idx++ )); do if [[ ${words[idx]} == 'value' ]]; then # 'value' is present, but is not the last @@ -880,8 +892,7 @@ _bpftool() return 0 ;; *) - _bpftool_once_attr 'cpu' - _bpftool_once_attr 'index' + _bpftool_once_attr 'cpu index' return 0 ;; esac @@ -896,7 +907,6 @@ _bpftool() esac ;; btf) - local PROG_TYPE='id pinned tag name' local MAP_TYPE='id pinned name' case $command in dump) @@ -942,16 +952,24 @@ _bpftool() format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; + root_id) + return 0; + ;; + c) + COMPREPLY=( $( compgen -W "unsorted root_id" -- "$cur" ) ) + ;; *) # emit extra options case ${words[3]} in id|file) + COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) ) _bpftool_once_attr 'format' ;; map|prog) if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) ) fi + COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) ) _bpftool_once_attr 'format' ;; *) @@ -1036,7 +1054,6 @@ _bpftool() local BPFTOOL_CGROUP_ATTACH_TYPES="$(bpftool feature list_builtins attach_types 2>/dev/null | \ grep '^cgroup_')" local ATTACH_FLAGS='multi override' - local PROG_TYPE='id pinned tag name' # Check for $prev = $command first if [ $prev = $command ]; then _filedir @@ -1089,8 +1106,7 @@ _bpftool() esac ;; net) - local PROG_TYPE='id pinned tag name' - local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload' + local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload tcx_ingress tcx_egress' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -1196,17 +1212,28 @@ _bpftool() pin|detach) if [[ $prev == "$command" ]]; then COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) ) - else + elif [[ $pprev == "$command" ]]; then _filedir fi return 0 ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'help pin show list' -- "$cur" ) ) + COMPREPLY=( $( compgen -W 'help pin detach show list' -- "$cur" ) ) ;; esac ;; + token) + case $command in + show|list) + return 0 + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 352290ba7b29..946612029dee 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <linux/err.h> #include <stdbool.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <unistd.h> #include <linux/btf.h> @@ -20,6 +24,11 @@ #include "json_writer.h" #include "main.h" +#define KFUNC_DECL_TAG "bpf_kfunc" +#define FASTCALL_DECL_TAG "bpf_fastcall" + +#define MAX_ROOT_IDS 16 + static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", @@ -43,6 +52,14 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_ENUM64] = "ENUM64", }; +struct sort_datum { + int index; + int type_rank; + const char *sort_name; + const char *own_name; + __u64 disambig_hash; +}; + static const char *btf_int_enc_str(__u8 encoding) { switch (encoding) { @@ -236,7 +253,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, if (btf_kflag(t)) printf("\n\t'%s' val=%d", name, v->val); else - printf("\n\t'%s' val=%u", name, v->val); + printf("\n\t'%s' val=%u", name, (__u32)v->val); } } if (json_output) @@ -274,7 +291,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } else { if (btf_kflag(t)) printf("\n\t'%s' val=%lldLL", name, - (unsigned long long)val); + (long long)val); else printf("\n\t'%s' val=%lluULL", name, (unsigned long long)val); @@ -454,15 +471,307 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +struct ptr_array { + __u32 cnt; + __u32 cap; + const void **elems; +}; + +static int ptr_array_push(const void *ptr, struct ptr_array *arr) +{ + __u32 new_cap; + void *tmp; + + if (arr->cnt == arr->cap) { + new_cap = (arr->cap ?: 16) * 2; + tmp = realloc(arr->elems, sizeof(*arr->elems) * new_cap); + if (!tmp) + return -ENOMEM; + arr->elems = tmp; + arr->cap = new_cap; + } + arr->elems[arr->cnt++] = ptr; + return 0; +} + +static void ptr_array_free(struct ptr_array *arr) +{ + free(arr->elems); +} + +static int cmp_kfuncs(const void *pa, const void *pb, void *ctx) +{ + struct btf *btf = ctx; + const struct btf_type *a = *(void **)pa; + const struct btf_type *b = *(void **)pb; + + return strcmp(btf__str_by_offset(btf, a->name_off), + btf__str_by_offset(btf, b->name_off)); +} + +static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) +{ + LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); + __u32 cnt = btf__type_cnt(btf), i, j; + struct ptr_array fastcalls = {}; + struct ptr_array kfuncs = {}; + int err = 0; + + printf("\n/* BPF kfuncs */\n"); + printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); + + for (i = 1; i < cnt; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + const struct btf_type *ft; + const char *name; + + if (!btf_is_decl_tag(t)) + continue; + + if (btf_decl_tag(t)->component_idx != -1) + continue; + + ft = btf__type_by_id(btf, t->type); + if (!btf_is_func(ft)) + continue; + + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &kfuncs); + if (err) + goto out; + } + + if (strncmp(name, FASTCALL_DECL_TAG, sizeof(FASTCALL_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &fastcalls); + if (err) + goto out; + } + } + + /* Sort kfuncs by name for improved vmlinux.h stability */ + qsort_r(kfuncs.elems, kfuncs.cnt, sizeof(*kfuncs.elems), cmp_kfuncs, (void *)btf); + for (i = 0; i < kfuncs.cnt; i++) { + const struct btf_type *t = kfuncs.elems[i]; + + printf("extern "); + + /* Assume small amount of fastcall kfuncs */ + for (j = 0; j < fastcalls.cnt; j++) { + if (fastcalls.elems[j] == t) { + printf("__bpf_fastcall "); + break; + } + } + + opts.field_name = btf__name_by_offset(btf, t->name_off); + err = btf_dump__emit_type_decl(d, t->type, &opts); + if (err) + goto out; + + printf(" __weak __ksym;\n"); + } + + printf("#endif\n\n"); + +out: + ptr_array_free(&fastcalls); + ptr_array_free(&kfuncs); + return err; +} + static void __printf(2, 0) btf_dump_printf(void *ctx, const char *fmt, va_list args) { vfprintf(stdout, fmt, args); } +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + const int kind = btf_kind(t); + const int max_rank = 10; + + if (t->name_off) + has_name = true; + + switch (kind) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return has_name ? 1 : 0; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + return 2; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return has_name ? 3 : max_rank; + case BTF_KIND_FUNC_PROTO: + return has_name ? 4 : max_rank; + case BTF_KIND_ARRAY: + if (has_name) + return btf_type_rank(btf, btf_array(t)->type, has_name); + return max_rank; + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + if (has_name) + return btf_type_rank(btf, t->type, has_name); + return max_rank; + default: + return max_rank; + } +} + +static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool from_ref) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + + switch (btf_kind(t)) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: { + int name_off = t->name_off; + + if (!from_ref && !name_off && btf_vlen(t)) + name_off = btf_kind(t) == BTF_KIND_ENUM64 ? + btf_enum64(t)->name_off : + btf_enum(t)->name_off; + + return btf__name_by_offset(btf, name_off); + } + case BTF_KIND_ARRAY: + return btf_type_sort_name(btf, btf_array(t)->type, true); + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + return btf_type_sort_name(btf, t->type, true); + default: + return btf__name_by_offset(btf, t->name_off); + } + return NULL; +} + +static __u64 hasher(__u64 hash, __u64 val) +{ + return hash * 31 + val; +} + +static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off) +{ + if (!name_off) + return hash; + + return hasher(hash, str_hash(btf__name_by_offset(btf, name_off))); +} + +static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 id, bool include_members) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + int i; + size_t hash = 0; + + hash = btf_name_hasher(hash, btf, t->name_off); + + switch (btf_kind(t)) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + for (i = 0; i < btf_vlen(t); i++) { + __u32 name_off = btf_is_enum(t) ? + btf_enum(t)[i].name_off : + btf_enum64(t)[i].name_off; + + hash = btf_name_hasher(hash, btf, name_off); + } + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (!include_members) + break; + for (i = 0; i < btf_vlen(t); i++) { + const struct btf_member *m = btf_members(t) + i; + + hash = btf_name_hasher(hash, btf, m->name_off); + /* resolve field type's name and hash it as well */ + hash = hasher(hash, btf_type_disambig_hash(btf, m->type, false)); + } + break; + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + hash = hasher(hash, btf_type_disambig_hash(btf, t->type, include_members)); + break; + case BTF_KIND_ARRAY: { + struct btf_array *arr = btf_array(t); + + hash = hasher(hash, arr->nelems); + hash = hasher(hash, btf_type_disambig_hash(btf, arr->type, include_members)); + break; + } + default: + break; + } + return hash; +} + +static int btf_type_compare(const void *left, const void *right) +{ + const struct sort_datum *d1 = (const struct sort_datum *)left; + const struct sort_datum *d2 = (const struct sort_datum *)right; + int r; + + r = d1->type_rank - d2->type_rank; + r = r ?: strcmp(d1->sort_name, d2->sort_name); + r = r ?: strcmp(d1->own_name, d2->own_name); + if (r) + return r; + + if (d1->disambig_hash != d2->disambig_hash) + return d1->disambig_hash < d2->disambig_hash ? -1 : 1; + + return d1->index - d2->index; +} + +static struct sort_datum *sort_btf_c(const struct btf *btf) +{ + struct sort_datum *datums; + int n; + + n = btf__type_cnt(btf); + datums = malloc(sizeof(struct sort_datum) * n); + if (!datums) + return NULL; + + for (int i = 0; i < n; ++i) { + struct sort_datum *d = datums + i; + const struct btf_type *t = btf__type_by_id(btf, i); + + d->index = i; + d->type_rank = btf_type_rank(btf, i, false); + d->sort_name = btf_type_sort_name(btf, i, false); + d->own_name = btf__name_by_offset(btf, t->name_off); + d->disambig_hash = btf_type_disambig_hash(btf, i, true); + } + + qsort(datums, n, sizeof(struct sort_datum), btf_type_compare); + + return datums; +} + static int dump_btf_c(const struct btf *btf, - __u32 *root_type_ids, int root_type_cnt) + __u32 *root_type_ids, int root_type_cnt, bool sort_dump) { + struct sort_datum *datums = NULL; struct btf_dump *d; int err = 0, i; @@ -476,6 +785,19 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); printf("#endif\n\n"); + printf("#ifndef __ksym\n"); + printf("#define __ksym __attribute__((section(\".ksyms\")))\n"); + printf("#endif\n\n"); + printf("#ifndef __weak\n"); + printf("#define __weak __attribute__((weak))\n"); + printf("#endif\n\n"); + printf("#ifndef __bpf_fastcall\n"); + printf("#if __has_attribute(bpf_fastcall)\n"); + printf("#define __bpf_fastcall __attribute__((bpf_fastcall))\n"); + printf("#else\n"); + printf("#define __bpf_fastcall\n"); + printf("#endif\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { @@ -486,11 +808,19 @@ static int dump_btf_c(const struct btf *btf, } else { int cnt = btf__type_cnt(btf); + if (sort_dump) + datums = sort_btf_c(btf); for (i = 1; i < cnt; i++) { - err = btf_dump__dump_type(d, i); + int idx = datums ? datums[i].index : i; + + err = btf_dump__dump_type(d, idx); if (err) goto done; } + + err = dump_btf_kfuncs(d, btf); + if (err) + goto done; } printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); @@ -500,6 +830,7 @@ static int dump_btf_c(const struct btf *btf, printf("#endif /* __VMLINUX_H__ */\n"); done: + free(datums); btf_dump__free(d); return err; } @@ -537,7 +868,7 @@ static bool btf_is_kernel_module(__u32 btf_id) len = sizeof(btf_info); btf_info.name = ptr_to_u64(btf_name); btf_info.name_len = sizeof(btf_name); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len); close(btf_fd); if (err) { p_err("can't get BTF (ID %u) object info: %s", btf_id, strerror(errno)); @@ -549,14 +880,16 @@ static bool btf_is_kernel_module(__u32 btf_id) static int do_dump(int argc, char **argv) { + bool dump_c = false, sort_dump_c = true; struct btf *btf = NULL, *base = NULL; - __u32 root_type_ids[2]; + __u32 root_type_ids[MAX_ROOT_IDS]; + bool have_id_filtering; int root_type_cnt = 0; - bool dump_c = false; __u32 btf_id = -1; const char *src; int fd = -1; int err = 0; + int i; if (!REQ_ARGS(2)) { usage(); @@ -572,7 +905,8 @@ static int do_dump(int argc, char **argv) return -1; } - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, + BPF_F_RDONLY); if (fd < 0) return -1; @@ -606,7 +940,7 @@ static int do_dump(int argc, char **argv) if (fd < 0) return -1; - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_prog_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get prog info: %s", strerror(errno)); goto done; @@ -644,6 +978,8 @@ static int do_dump(int argc, char **argv) goto done; } + have_id_filtering = !!root_type_cnt; + while (argc) { if (is_prefix(*argv, "format")) { NEXT_ARG(); @@ -663,6 +999,39 @@ static int do_dump(int argc, char **argv) goto done; } NEXT_ARG(); + } else if (is_prefix(*argv, "root_id")) { + __u32 root_id; + char *end; + + if (have_id_filtering) { + p_err("cannot use root_id with other type filtering"); + err = -EINVAL; + goto done; + } else if (root_type_cnt == MAX_ROOT_IDS) { + p_err("only %d root_id are supported", MAX_ROOT_IDS); + err = -E2BIG; + goto done; + } + + NEXT_ARG(); + root_id = strtoul(*argv, &end, 0); + if (*end) { + err = -1; + p_err("can't parse %s as root ID", *argv); + goto done; + } + for (i = 0; i < root_type_cnt; i++) { + if (root_type_ids[i] == root_id) { + err = -EINVAL; + p_err("duplicate root_id %u supplied", root_id); + goto done; + } + } + root_type_ids[root_type_cnt++] = root_id; + NEXT_ARG(); + } else if (is_prefix(*argv, "unsorted")) { + sort_dump_c = false; + NEXT_ARG(); } else { p_err("unrecognized option: '%s'", *argv); err = -EINVAL; @@ -685,13 +1054,24 @@ static int do_dump(int argc, char **argv) } } + /* Invalid root IDs causes half emitted boilerplate and then unclean + * exit. It's an ugly user experience, so handle common error here. + */ + for (i = 0; i < root_type_cnt; i++) { + if (root_type_ids[i] >= btf__type_cnt(btf)) { + err = -EINVAL; + p_err("invalid root ID: %u", root_type_ids[i]); + goto done; + } + } + if (dump_c) { if (json_output) { p_err("JSON output for C-syntax dump is not supported"); err = -ENOTSUP; goto done; } - err = dump_btf_c(btf, root_type_ids, root_type_cnt); + err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c); } else { err = dump_btf_raw(btf, root_type_ids, root_type_cnt); } @@ -739,10 +1119,13 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro); __u32 btf_id, id = 0; int err; int fd; + opts_ro.open_flags = BPF_F_RDONLY; + while (true) { switch (type) { case BPF_OBJ_PROG: @@ -753,7 +1136,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, break; default: err = -1; - p_err("unexpected object type: %d", type); + p_err("unexpected object type: %u", type); goto err_free; } if (err) { @@ -772,11 +1155,11 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, fd = bpf_prog_get_fd_by_id(id); break; case BPF_OBJ_MAP: - fd = bpf_map_get_fd_by_id(id); + fd = bpf_map_get_fd_by_id_opts(id, &opts_ro); break; default: err = -1; - p_err("unexpected object type: %d", type); + p_err("unexpected object type: %u", type); goto err_free; } if (fd < 0) { @@ -789,7 +1172,10 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, } memset(info, 0, *len); - err = bpf_obj_get_info_by_fd(fd, info, len); + if (type == BPF_OBJ_PROG) + err = bpf_prog_get_info_by_fd(fd, info, len); + else + err = bpf_map_get_info_by_fd(fd, info, len); close(fd); if (err) { p_err("can't get %s info: %s", names[type], @@ -806,7 +1192,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, break; default: err = -1; - p_err("unexpected object type: %d", type); + p_err("unexpected object type: %u", type); goto err_free; } if (!btf_id) @@ -872,12 +1258,12 @@ show_btf_plain(struct bpf_btf_info *info, int fd, n = 0; hashmap__for_each_key_entry(btf_prog_table, entry, info->id) { - printf("%s%lu", n++ == 0 ? " prog_ids " : ",", entry->value); + printf("%s%lu", n++ == 0 ? " prog_ids " : ",", (unsigned long)entry->value); } n = 0; hashmap__for_each_key_entry(btf_map_table, entry, info->id) { - printf("%s%lu", n++ == 0 ? " map_ids " : ",", entry->value); + printf("%s%lu", n++ == 0 ? " map_ids " : ",", (unsigned long)entry->value); } emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); @@ -931,7 +1317,7 @@ show_btf(int fd, struct hashmap *btf_prog_table, int err; memset(&info, 0, sizeof(info)); - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get BTF object info: %s", strerror(errno)); return -1; @@ -943,7 +1329,7 @@ show_btf(int fd, struct hashmap *btf_prog_table, info.name = ptr_to_u64(name); len = sizeof(info); - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get BTF object info: %s", strerror(errno)); return -1; @@ -1056,11 +1442,11 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [id BTF_ID]\n" - " %1$s %2$s dump BTF_SRC [format FORMAT]\n" + " %1$s %2$s dump BTF_SRC [format FORMAT] [root_id ROOT_ID]\n" " %1$s %2$s help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" - " FORMAT := { raw | c }\n" + " FORMAT := { raw | c [unsorted] }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS " |\n" diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index eda71fdfe95a..def297e879f4 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -38,7 +38,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, __u32 info_len = sizeof(info); const char *prog_name = NULL; struct btf *prog_btf = NULL; - struct bpf_func_info finfo; + struct bpf_func_info finfo = {}; __u32 finfo_rec_size; char prog_str[1024]; int err; @@ -57,7 +57,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, if (prog_fd < 0) goto print; - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (err) goto print; @@ -70,7 +70,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, info.func_info_rec_size = finfo_rec_size; info.func_info = ptr_to_u64(&finfo); - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (err) goto print; @@ -127,7 +127,7 @@ static void btf_dumper_ptr(const struct btf_dumper *d, print_ptr_value: if (d->is_plain_text) - jsonw_printf(d->jw, "%p", (void *)value); + jsonw_printf(d->jw, "\"%p\"", (void *)value); else jsonw_printf(d->jw, "%lu", value); } @@ -590,7 +590,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_DATASEC: return btf_dumper_datasec(d, type_id, data); default: - jsonw_printf(d->jw, "(unsupported-kind"); + jsonw_printf(d->jw, "(unsupported-kind)"); return -EINVAL; } } @@ -653,7 +653,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, case BTF_KIND_ARRAY: array = (struct btf_array *)(t + 1); BTF_PRINT_TYPE(array->type); - BTF_PRINT_ARG("[%d]", array->nelems); + BTF_PRINT_ARG("[%u]", array->nelems); break; case BTF_KIND_PTR: BTF_PRINT_TYPE(t->type); @@ -821,3 +821,86 @@ void btf_dump_linfo_json(const struct btf *btf, BPF_LINE_INFO_LINE_COL(linfo->line_col)); } } + +static void dotlabel_puts(const char *s) +{ + for (; *s; ++s) { + switch (*s) { + case '\\': + case '"': + case '{': + case '}': + case '<': + case '>': + case '|': + case ' ': + putchar('\\'); + fallthrough; + default: + putchar(*s); + } + } +} + +static const char *shorten_path(const char *path) +{ + const unsigned int MAX_PATH_LEN = 32; + size_t len = strlen(path); + const char *shortpath; + + if (len <= MAX_PATH_LEN) + return path; + + /* Search for last '/' under the MAX_PATH_LEN limit */ + shortpath = strchr(path + len - MAX_PATH_LEN, '/'); + if (shortpath) { + if (shortpath < path + strlen("...")) + /* We removed a very short prefix, e.g. "/w", and we'll + * make the path longer by prefixing with the ellipsis. + * Not worth it, keep initial path. + */ + return path; + return shortpath; + } + + /* File base name length is > MAX_PATH_LEN, search for last '/' */ + shortpath = strrchr(path, '/'); + if (shortpath) + return shortpath; + + return path; +} + +void btf_dump_linfo_dotlabel(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (!line || !strlen(line)) + return; + line = ltrim(line); + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + const char *shortfile; + + /* More forgiving on file because linum option is + * expected to provide more info than the already + * available src line. + */ + if (!file) + shortfile = ""; + else + shortfile = shorten_path(file); + + printf("; [%s", shortfile > file ? "..." : ""); + dotlabel_puts(shortfile); + printf(" line:%u col:%u]\\l\\\n", + BPF_LINE_INFO_LINE_NUM(linfo->line_col), + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } + + printf("; "); + dotlabel_puts(line); + printf("\\l\\\n"); +} diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index 1951219a9af7..e3785f9a697d 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -302,6 +302,7 @@ static bool func_add_bb_edges(struct func_node *func) insn = bb->tail; if (!is_jmp_insn(insn->code) || + BPF_OP(insn->code) == BPF_CALL || BPF_OP(insn->code) == BPF_EXIT) { e->dst = bb_next(bb); e->flags |= EDGE_FLAG_FALLTHROUGH; @@ -380,7 +381,9 @@ static void cfg_destroy(struct cfg *cfg) } } -static void draw_bb_node(struct func_node *func, struct bb_node *bb) +static void +draw_bb_node(struct func_node *func, struct bb_node *bb, struct dump_data *dd, + bool opcodes, bool linum) { const char *shape; @@ -398,13 +401,10 @@ static void draw_bb_node(struct func_node *func, struct bb_node *bb) printf("EXIT"); } else { unsigned int start_idx; - struct dump_data dd = {}; - - printf("{"); - kernel_syms_load(&dd); + printf("{\\\n"); start_idx = bb->head - func->start; - dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx); - kernel_syms_destroy(&dd); + dump_xlated_for_graph(dd, bb->head, bb->tail, start_idx, + opcodes, linum); printf("}"); } @@ -430,12 +430,14 @@ static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb) } } -static void func_output_bb_def(struct func_node *func) +static void +func_output_bb_def(struct func_node *func, struct dump_data *dd, + bool opcodes, bool linum) { struct bb_node *bb; list_for_each_entry(bb, &func->bbs, l) { - draw_bb_node(func, bb); + draw_bb_node(func, bb, dd, opcodes, linum); } } @@ -455,7 +457,8 @@ static void func_output_edges(struct func_node *func) func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX); } -static void cfg_dump(struct cfg *cfg) +static void +cfg_dump(struct cfg *cfg, struct dump_data *dd, bool opcodes, bool linum) { struct func_node *func; @@ -463,14 +466,15 @@ static void cfg_dump(struct cfg *cfg) list_for_each_entry(func, &cfg->funcs, l) { printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n", func->idx, func->idx); - func_output_bb_def(func); + func_output_bb_def(func, dd, opcodes, linum); func_output_edges(func); printf("}\n"); } printf("}\n"); } -void dump_xlated_cfg(void *buf, unsigned int len) +void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len, + bool opcodes, bool linum) { struct bpf_insn *insn = buf; struct cfg cfg; @@ -479,7 +483,7 @@ void dump_xlated_cfg(void *buf, unsigned int len) if (cfg_build(&cfg, insn, len)) return; - cfg_dump(&cfg); + cfg_dump(&cfg, dd, opcodes, linum); cfg_destroy(&cfg); } diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h index e144257ea6d2..b3793f4e1783 100644 --- a/tools/bpf/bpftool/cfg.h +++ b/tools/bpf/bpftool/cfg.h @@ -4,6 +4,9 @@ #ifndef __BPF_TOOL_CFG_H #define __BPF_TOOL_CFG_H -void dump_xlated_cfg(void *buf, unsigned int len); +#include "xlated_dumper.h" + +void dump_xlated_cfg(struct dump_data *dd, void *buf, unsigned int len, + bool opcodes, bool linum); #endif /* __BPF_TOOL_CFG_H */ diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index b46a998d8f8d..ec356deb27c9 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -2,6 +2,10 @@ // Copyright (C) 2017 Facebook // Author: Roman Gushchin <guro@fb.com> +#undef GCC_VERSION +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #define _XOPEN_SOURCE 500 #include <errno.h> #include <fcntl.h> @@ -19,6 +23,38 @@ #include "main.h" +static const int cgroup_attach_types[] = { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_RELEASE, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_INET4_GETPEERNAME, + BPF_CGROUP_INET6_GETPEERNAME, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_INET4_GETSOCKNAME, + BPF_CGROUP_INET6_GETSOCKNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UDP4_RECVMSG, + BPF_CGROUP_UDP6_RECVMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_SOCK_OPS, + BPF_CGROUP_DEVICE, + BPF_CGROUP_SYSCTL, + BPF_CGROUP_GETSOCKOPT, + BPF_CGROUP_SETSOCKOPT, + BPF_LSM_CGROUP +}; + #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" @@ -28,13 +64,15 @@ " cgroup_device | cgroup_inet4_bind |\n" \ " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ - " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ - " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ - " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ - " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ - " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ - " cgroup_getsockopt | cgroup_setsockopt |\n" \ - " cgroup_inet_sock_release }" + " cgroup_inet6_connect | cgroup_unix_connect |\n" \ + " cgroup_inet4_getpeername | cgroup_inet6_getpeername |\n" \ + " cgroup_unix_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_unix_getsockname |\n" \ + " cgroup_udp4_sendmsg | cgroup_udp6_sendmsg |\n" \ + " cgroup_unix_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_unix_recvmsg |\n" \ + " cgroup_sysctl | cgroup_getsockopt |\n" \ + " cgroup_setsockopt | cgroup_inet_sock_release }" static unsigned int query_flags; static struct btf *btf_vmlinux; @@ -82,7 +120,7 @@ static void guess_vmlinux_btf_id(__u32 attach_btf_obj_id) if (fd < 0) return; - err = bpf_obj_get_info_by_fd(fd, &btf_info, &btf_len); + err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_len); if (err) goto out; @@ -108,7 +146,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, if (prog_fd < 0) return -1; - if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) { close(prog_fd); return -1; } @@ -157,7 +195,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, if (attach_btf_name) printf(" %-15s", attach_btf_name); else if (info.attach_btf_id) - printf(" attach_btf_obj_id=%d attach_btf_id=%d", + printf(" attach_btf_obj_id=%u attach_btf_id=%u", info.attach_btf_obj_id, info.attach_btf_id); printf("\n"); } @@ -181,11 +219,11 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) static int cgroup_has_attached_progs(int cgroup_fd) { - enum bpf_attach_type type; + unsigned int i = 0; bool no_prog = true; - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - int count = count_attached_bpf_progs(cgroup_fd, type); + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { + int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); if (count < 0 && errno != EINVAL) return -1; @@ -284,11 +322,11 @@ static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type, static int do_show(int argc, char **argv) { - enum bpf_attach_type type; int has_attached_progs; const char *path; int cgroup_fd; int ret = -1; + unsigned int i; query_flags = 0; @@ -336,14 +374,14 @@ static int do_show(int argc, char **argv) "AttachFlags", "Name"); btf_vmlinux = libbpf_find_kernel_btf(); - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { /* * Not all attach types may be supported, so it's expected, * that some requests will fail. * If we were able to get the show for at least one * attach type, let's return 0. */ - if (show_bpf_progs(cgroup_fd, type, 0) == 0) + if (show_bpf_progs(cgroup_fd, cgroup_attach_types[i], 0) == 0) ret = 0; } @@ -366,9 +404,9 @@ exit: static int do_show_tree_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftw) { - enum bpf_attach_type type; int has_attached_progs; int cgroup_fd; + unsigned int i; if (typeflag != FTW_D) return 0; @@ -400,8 +438,8 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, } btf_vmlinux = libbpf_find_kernel_btf(); - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) - show_bpf_progs(cgroup_fd, type, ftw->level); + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) + show_bpf_progs(cgroup_fd, cgroup_attach_types[i], ftw->level); if (errno == EINVAL) /* Last attach type does not support query. diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 620032042576..e8daf963ecef 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -4,6 +4,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -20,6 +21,7 @@ #include <sys/resource.h> #include <sys/stat.h> #include <sys/vfs.h> +#include <sys/utsname.h> #include <linux/filter.h> #include <linux/limits.h> @@ -30,6 +32,7 @@ #include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ #include <bpf/btf.h> +#include <zlib.h> #include "main.h" @@ -68,7 +71,7 @@ void p_info(const char *fmt, ...) va_end(ap); } -static bool is_bpffs(char *path) +static bool is_bpffs(const char *path) { struct statfs st_fs; @@ -193,7 +196,8 @@ int mount_tracefs(const char *target) return err; } -int open_obj_pinned(const char *path, bool quiet) +int open_obj_pinned(const char *path, bool quiet, + const struct bpf_obj_get_opts *opts) { char *pname; int fd = -1; @@ -205,7 +209,7 @@ int open_obj_pinned(const char *path, bool quiet) goto out_ret; } - fd = bpf_obj_get(pname); + fd = bpf_obj_get_opts(pname, opts); if (fd < 0) { if (!quiet) p_err("bpf obj get (%s): %s", pname, @@ -221,12 +225,13 @@ out_ret: return fd; } -int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) +int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type, + const struct bpf_obj_get_opts *opts) { enum bpf_obj_type type; int fd; - fd = open_obj_pinned(path, false); + fd = open_obj_pinned(path, false, opts); if (fd < 0) return -1; @@ -244,26 +249,101 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) return fd; } -int mount_bpffs_for_pin(const char *name) +int create_and_mount_bpffs_dir(const char *dir_name) { char err_str[ERR_MAX_LEN]; - char *file; + bool dir_exists; + int err = 0; + + if (is_bpffs(dir_name)) + return err; + + dir_exists = access(dir_name, F_OK) == 0; + + if (!dir_exists) { + char *temp_name; + char *parent_name; + + temp_name = strdup(dir_name); + if (!temp_name) { + p_err("mem alloc failed"); + return -1; + } + + parent_name = dirname(temp_name); + + if (is_bpffs(parent_name)) { + /* nothing to do if already mounted */ + free(temp_name); + return err; + } + + if (access(parent_name, F_OK) == -1) { + p_err("can't create dir '%s' to pin BPF object: parent dir '%s' doesn't exist", + dir_name, parent_name); + free(temp_name); + return -1; + } + + free(temp_name); + } + + if (block_mount) { + p_err("no BPF file system found, not mounting it due to --nomount option"); + return -1; + } + + if (!dir_exists) { + err = mkdir(dir_name, S_IRWXU); + if (err) { + p_err("failed to create dir '%s': %s", dir_name, strerror(errno)); + return err; + } + } + + err = mnt_fs(dir_name, "bpf", err_str, ERR_MAX_LEN); + if (err) { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system on given dir '%s': %s", + dir_name, err_str); + + if (!dir_exists) + rmdir(dir_name); + } + + return err; +} + +int mount_bpffs_for_file(const char *file_name) +{ + char err_str[ERR_MAX_LEN]; + char *temp_name; char *dir; int err = 0; - file = malloc(strlen(name) + 1); - if (!file) { + if (access(file_name, F_OK) != -1) { + p_err("can't pin BPF object: path '%s' already exists", file_name); + return -1; + } + + temp_name = strdup(file_name); + if (!temp_name) { p_err("mem alloc failed"); return -1; } - strcpy(file, name); - dir = dirname(file); + dir = dirname(temp_name); if (is_bpffs(dir)) /* nothing to do if already mounted */ goto out_free; + if (access(dir, F_OK) == -1) { + p_err("can't pin BPF object: dir '%s' doesn't exist", dir); + err = -1; + goto out_free; + } + if (block_mount) { p_err("no BPF file system found, not mounting it due to --nomount option"); err = -1; @@ -273,12 +353,12 @@ int mount_bpffs_for_pin(const char *name) err = mnt_fs(dir, "bpf", err_str, ERR_MAX_LEN); if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; - p_err("can't mount BPF file system to pin the object (%s): %s", - name, err_str); + p_err("can't mount BPF file system to pin the object '%s': %s", + file_name, err_str); } out_free: - free(file); + free(temp_name); return err; } @@ -286,7 +366,7 @@ int do_pin_fd(int fd, const char *name) { int err; - err = mount_bpffs_for_pin(name); + err = mount_bpffs_for_file(name); if (err) return err; @@ -335,7 +415,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, { const char *prog_name = prog_info->name; const struct btf_type *func_type; - const struct bpf_func_info finfo = {}; + struct bpf_func_info finfo = {}; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); struct btf *prog_btf = NULL; @@ -353,7 +433,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, info.func_info_rec_size = sizeof(finfo); info.func_info = ptr_to_u64(&finfo); - if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) + if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) goto copy_name; prog_btf = btf__load_from_kernel_by_id(info.btf_id); @@ -386,10 +466,11 @@ int get_fd_type(int fd) p_err("can't read link type: %s", strerror(errno)); return -1; } - if (n == sizeof(path)) { + if (n == sizeof(buf)) { p_err("can't read link type: path too long!"); return -1; } + buf[n] = '\0'; if (strstr(buf, "bpf-map")) return BPF_OBJ_MAP; @@ -479,7 +560,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (typeflag != FTW_F) goto out_ret; - fd = open_obj_pinned(fpath, true); + fd = open_obj_pinned(fpath, true, NULL); if (fd < 0) goto out_ret; @@ -488,7 +569,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, goto out_close; memset(&pinned_info, 0, sizeof(pinned_info)); - if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) + if (bpf_prog_get_info_by_fd(fd, &pinned_info, &len)) goto out_close; path = strdup(fpath); @@ -638,7 +719,7 @@ ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt) int vendor_id; if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { - p_err("Can't get net device name for ifindex %d: %s", ifindex, + p_err("Can't get net device name for ifindex %u: %s", ifindex, strerror(errno)); return NULL; } @@ -663,7 +744,7 @@ ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt) /* No NFP support in LLVM, we have no valid triple to return. */ default: p_err("Can't get arch name for device vendor id 0x%04x", - vendor_id); + (unsigned int)vendor_id); return NULL; } } @@ -756,7 +837,7 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) goto err_close_fds; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_prog_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get prog info (%u): %s", id, strerror(errno)); @@ -852,7 +933,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds) path = **argv; NEXT_ARGP(); - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG, NULL); if ((*fds)[0] < 0) return -1; return 1; @@ -889,7 +970,8 @@ exit_free: return fd; } -static int map_fd_by_name(char *name, int **fds) +static int map_fd_by_name(char *name, int **fds, + const struct bpf_get_fd_by_id_opts *opts) { unsigned int id = 0; int fd, nb_fds = 0; @@ -897,6 +979,7 @@ static int map_fd_by_name(char *name, int **fds) int err; while (true) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro); struct bpf_map_info info = {}; __u32 len = sizeof(info); @@ -909,14 +992,16 @@ static int map_fd_by_name(char *name, int **fds) return nb_fds; } - fd = bpf_map_get_fd_by_id(id); + /* Request a read-only fd to query the map info */ + opts_ro.open_flags = BPF_F_RDONLY; + fd = bpf_map_get_fd_by_id_opts(id, &opts_ro); if (fd < 0) { p_err("can't get map by id (%u): %s", id, strerror(errno)); goto err_close_fds; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_map_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get map info (%u): %s", id, strerror(errno)); @@ -928,6 +1013,19 @@ static int map_fd_by_name(char *name, int **fds) continue; } + /* Get an fd with the requested options, if they differ + * from the read-only options used to get the fd above. + */ + if (memcmp(opts, &opts_ro, sizeof(opts_ro))) { + close(fd); + fd = bpf_map_get_fd_by_id_opts(id, opts); + if (fd < 0) { + p_err("can't get map by id (%u): %s", id, + strerror(errno)); + goto err_close_fds; + } + } + if (nb_fds > 0) { tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); if (!tmp) { @@ -947,8 +1045,13 @@ err_close_fds: return -1; } -int map_parse_fds(int *argc, char ***argv, int **fds) +int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); + + assert((open_flags & ~BPF_F_RDONLY) == 0); + opts.open_flags = open_flags; + if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -962,7 +1065,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds) } NEXT_ARGP(); - (*fds)[0] = bpf_map_get_fd_by_id(id); + (*fds)[0] = bpf_map_get_fd_by_id_opts(id, &opts); if ((*fds)[0] < 0) { p_err("get map by id (%u): %s", id, strerror(errno)); return -1; @@ -980,16 +1083,18 @@ int map_parse_fds(int *argc, char ***argv, int **fds) } NEXT_ARGP(); - return map_fd_by_name(name, fds); + return map_fd_by_name(name, fds, &opts); } else if (is_prefix(**argv, "pinned")) { char *path; + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + get_opts.file_flags = open_flags; NEXT_ARGP(); path = **argv; NEXT_ARGP(); - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP, &get_opts); if ((*fds)[0] < 0) return -1; return 1; @@ -999,7 +1104,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds) return -1; } -int map_parse_fd(int *argc, char ***argv) +int map_parse_fd(int *argc, char ***argv, __u32 open_flags) { int *fds = NULL; int nb_fds, fd; @@ -1009,7 +1114,7 @@ int map_parse_fd(int *argc, char ***argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(argc, argv, &fds); + nb_fds = map_parse_fds(argc, argv, &fds, open_flags); if (nb_fds != 1) { if (nb_fds > 1) { p_err("several maps match this handle"); @@ -1026,16 +1131,17 @@ exit_free: return fd; } -int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, + __u32 *info_len, __u32 open_flags) { int err; int fd; - fd = map_parse_fd(argc, argv); + fd = map_parse_fd(argc, argv, open_flags); if (fd < 0) return -1; - err = bpf_obj_get_info_by_fd(fd, info, info_len); + err = bpf_map_get_info_by_fd(fd, info, info_len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); @@ -1090,3 +1196,108 @@ const char *bpf_attach_type_input_str(enum bpf_attach_type t) default: return libbpf_bpf_attach_type_str(t); } } + +int pathname_concat(char *buf, int buf_sz, const char *path, + const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); + if (len < 0) + return -EINVAL; + if (len >= buf_sz) + return -ENAMETOOLONG; + + return 0; +} + +static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n, + char **value) +{ + char *sep; + + while (gzgets(file, buf, n)) { + if (strncmp(buf, "CONFIG_", 7)) + continue; + + sep = strchr(buf, '='); + if (!sep) + continue; + + /* Trim ending '\n' */ + buf[strlen(buf) - 1] = '\0'; + + /* Split on '=' and ensure that a value is present. */ + *sep = '\0'; + if (!sep[1]) + continue; + + *value = sep + 1; + return true; + } + + return false; +} + +int read_kernel_config(const struct kernel_config_option *requested_options, + size_t num_options, char **out_values, + const char *define_prefix) +{ + struct utsname utsn; + char path[PATH_MAX]; + gzFile file = NULL; + char buf[4096]; + char *value; + size_t i; + int ret = 0; + + if (!requested_options || !out_values || num_options == 0) + return -1; + + if (!uname(&utsn)) { + snprintf(path, sizeof(path), "/boot/config-%s", utsn.release); + + /* gzopen also accepts uncompressed files. */ + file = gzopen(path, "r"); + } + + if (!file) { + /* Some distributions build with CONFIG_IKCONFIG=y and put the + * config file at /proc/config.gz. + */ + file = gzopen("/proc/config.gz", "r"); + } + + if (!file) { + p_info("skipping kernel config, can't open file: %s", + strerror(errno)); + return -1; + } + + if (!gzgets(file, buf, sizeof(buf)) || !gzgets(file, buf, sizeof(buf))) { + p_info("skipping kernel config, can't read from file: %s", + strerror(errno)); + ret = -1; + goto end_parse; + } + + if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) { + p_info("skipping kernel config, can't find correct file"); + ret = -1; + goto end_parse; + } + + while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) { + for (i = 0; i < num_options; i++) { + if ((define_prefix && !requested_options[i].macro_dump) || + out_values[i] || strcmp(buf, requested_options[i].name)) + continue; + + out_values[i] = strdup(value); + } + } + +end_parse: + gzclose(file); + return ret; +} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 36cf0f1517c9..0f6070a0c8e7 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -10,7 +10,6 @@ #ifdef USE_LIBCAP #include <sys/capability.h> #endif -#include <sys/utsname.h> #include <sys/vfs.h> #include <linux/filter.h> @@ -18,7 +17,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include <zlib.h> #include "main.h" @@ -167,12 +165,12 @@ static int get_vendor_id(int ifindex) return strtol(buf, NULL, 0); } -static int read_procfs(const char *path) +static long read_procfs(const char *path) { char *endptr, *line = NULL; size_t len = 0; FILE *fd; - int res; + long res; fd = fopen(path, "r"); if (!fd) @@ -194,9 +192,9 @@ static int read_procfs(const char *path) static void probe_unprivileged_disabled(void) { - int res; + long res; - /* No support for C-style ouptut */ + /* No support for C-style output */ res = read_procfs("/proc/sys/kernel/unprivileged_bpf_disabled"); if (json_output) { @@ -216,16 +214,16 @@ static void probe_unprivileged_disabled(void) printf("Unable to retrieve required privileges for bpf() syscall\n"); break; default: - printf("bpf() syscall restriction has unknown value %d\n", res); + printf("bpf() syscall restriction has unknown value %ld\n", res); } } } static void probe_jit_enable(void) { - int res; + long res; - /* No support for C-style ouptut */ + /* No support for C-style output */ res = read_procfs("/proc/sys/net/core/bpf_jit_enable"); if (json_output) { @@ -245,7 +243,7 @@ static void probe_jit_enable(void) printf("Unable to retrieve JIT-compiler status\n"); break; default: - printf("JIT-compiler status has unknown value %d\n", + printf("JIT-compiler status has unknown value %ld\n", res); } } @@ -253,9 +251,9 @@ static void probe_jit_enable(void) static void probe_jit_harden(void) { - int res; + long res; - /* No support for C-style ouptut */ + /* No support for C-style output */ res = read_procfs("/proc/sys/net/core/bpf_jit_harden"); if (json_output) { @@ -275,7 +273,7 @@ static void probe_jit_harden(void) printf("Unable to retrieve JIT hardening status\n"); break; default: - printf("JIT hardening status has unknown value %d\n", + printf("JIT hardening status has unknown value %ld\n", res); } } @@ -283,9 +281,9 @@ static void probe_jit_harden(void) static void probe_jit_kallsyms(void) { - int res; + long res; - /* No support for C-style ouptut */ + /* No support for C-style output */ res = read_procfs("/proc/sys/net/core/bpf_jit_kallsyms"); if (json_output) { @@ -302,16 +300,16 @@ static void probe_jit_kallsyms(void) printf("Unable to retrieve JIT kallsyms export status\n"); break; default: - printf("JIT kallsyms exports status has unknown value %d\n", res); + printf("JIT kallsyms exports status has unknown value %ld\n", res); } } } static void probe_jit_limit(void) { - int res; + long res; - /* No support for C-style ouptut */ + /* No support for C-style output */ res = read_procfs("/proc/sys/net/core/bpf_jit_limit"); if (json_output) { @@ -322,45 +320,14 @@ static void probe_jit_limit(void) printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n"); break; default: - printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res); + printf("Global memory limit for JIT compiler for unprivileged users is %ld bytes\n", res); } } } -static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n, - char **value) -{ - char *sep; - - while (gzgets(file, buf, n)) { - if (strncmp(buf, "CONFIG_", 7)) - continue; - - sep = strchr(buf, '='); - if (!sep) - continue; - - /* Trim ending '\n' */ - buf[strlen(buf) - 1] = '\0'; - - /* Split on '=' and ensure that a value is present. */ - *sep = '\0'; - if (!sep[1]) - continue; - - *value = sep + 1; - return true; - } - - return false; -} - static void probe_kernel_image_config(const char *define_prefix) { - static const struct { - const char * const name; - bool macro_dump; - } options[] = { + struct kernel_config_option options[] = { /* Enable BPF */ { "CONFIG_BPF", }, /* Enable bpf() syscall */ @@ -426,10 +393,6 @@ static void probe_kernel_image_config(const char *define_prefix) { "CONFIG_BPF_STREAM_PARSER", }, /* xt_bpf module for passing BPF programs to netfilter */ { "CONFIG_NETFILTER_XT_MATCH_BPF", }, - /* bpfilter back-end for iptables */ - { "CONFIG_BPFILTER", }, - /* bpftilter module with "user mode helper" */ - { "CONFIG_BPFILTER_UMH", }, /* test_bpf module for BPF tests */ { "CONFIG_TEST_BPF", }, @@ -439,56 +402,11 @@ static void probe_kernel_image_config(const char *define_prefix) { "CONFIG_HZ", true, } }; char *values[ARRAY_SIZE(options)] = { }; - struct utsname utsn; - char path[PATH_MAX]; - gzFile file = NULL; - char buf[4096]; - char *value; size_t i; - if (!uname(&utsn)) { - snprintf(path, sizeof(path), "/boot/config-%s", utsn.release); - - /* gzopen also accepts uncompressed files. */ - file = gzopen(path, "r"); - } - - if (!file) { - /* Some distributions build with CONFIG_IKCONFIG=y and put the - * config file at /proc/config.gz. - */ - file = gzopen("/proc/config.gz", "r"); - } - if (!file) { - p_info("skipping kernel config, can't open file: %s", - strerror(errno)); - goto end_parse; - } - /* Sanity checks */ - if (!gzgets(file, buf, sizeof(buf)) || - !gzgets(file, buf, sizeof(buf))) { - p_info("skipping kernel config, can't read from file: %s", - strerror(errno)); - goto end_parse; - } - if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) { - p_info("skipping kernel config, can't find correct file"); - goto end_parse; - } - - while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) { - for (i = 0; i < ARRAY_SIZE(options); i++) { - if ((define_prefix && !options[i].macro_dump) || - values[i] || strcmp(buf, options[i].name)) - continue; - - values[i] = strdup(value); - } - } - -end_parse: - if (file) - gzclose(file); + if (read_kernel_config(options, ARRAY_SIZE(options), values, + define_prefix)) + return; for (i = 0; i < ARRAY_SIZE(options); i++) { if (define_prefix && !options[i].macro_dump) @@ -668,7 +586,8 @@ probe_helper_ifindex(enum bpf_func_id id, enum bpf_prog_type prog_type, probe_prog_load_ifindex(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); - res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); + res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ") && + !grep(buf, "program of this type cannot use helper "); switch (get_vendor_id(ifindex)) { case 0x19ee: /* Netronome specific */ @@ -757,7 +676,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, case BPF_FUNC_probe_write_user: if (!full_mode) continue; - /* fallthrough */ + fallthrough; default: probe_res |= probe_helper_for_progtype(prog_type, supported_type, define_prefix, id, prog_type_str, @@ -888,6 +807,28 @@ probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) "V3_ISA_EXTENSION"); } +/* + * Probe for the v4 instruction set extension introduced in commit 1f9a1ea821ff + * ("bpf: Support new sign-extension load insns"). + */ +static void +probe_v4_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[5] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 1, 1), + BPF_JMP32_A(1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v4_isa_extension", + "ISA extension v4", + "V4_ISA_EXTENSION"); +} + static void section_system_config(enum probe_component target, const char *define_prefix) { @@ -1032,6 +973,7 @@ static void section_misc(const char *define_prefix, __u32 ifindex) probe_bounded_loops(define_prefix, ifindex); probe_v2_isa_extension(define_prefix, ifindex); probe_v3_isa_extension(define_prefix, ifindex); + probe_v4_isa_extension(define_prefix, ifindex); print_end_section(); } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 2883660d6b67..993c7d9484a4 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -7,6 +7,7 @@ #include <ctype.h> #include <errno.h> #include <fcntl.h> +#include <libgen.h> #include <linux/err.h> #include <stdbool.h> #include <stdio.h> @@ -54,11 +55,27 @@ static bool str_has_suffix(const char *str, const char *suffix) return true; } +static const struct btf_type * +resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t; + + t = skip_mods_and_typedefs(btf, id, NULL); + if (!btf_is_ptr(t)) + return NULL; + + t = skip_mods_and_typedefs(btf, t->type, res_id); + + return btf_is_func_proto(t) ? t : NULL; +} + static void get_obj_name(char *name, const char *file) { - /* Using basename() GNU version which doesn't modify arg. */ - strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1); - name[MAX_OBJ_NAME_LEN - 1] = '\0'; + char file_copy[PATH_MAX]; + + /* Using basename() POSIX version to be more portable. */ + strncpy(file_copy, file, PATH_MAX - 1)[PATH_MAX - 1] = '\0'; + strncpy(name, basename(file_copy), MAX_OBJ_NAME_LEN - 1)[MAX_OBJ_NAME_LEN - 1] = '\0'; if (str_has_suffix(name, ".o")) name[strlen(name) - 2] = '\0'; sanitize_identifier(name); @@ -103,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; int i, n; + /* recognize hard coded LLVM section name */ + if (strcmp(sec_name, ".addr_space.1") == 0) { + /* this is the name to use in skeleton */ + snprintf(buf, buf_sz, "arena"); + return true; + } for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { const char *pfx = pfxs[i]; @@ -231,8 +254,15 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map return NULL; } -static bool is_internal_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) +static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz) { + size_t tmp_sz; + + if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) { + snprintf(buf, sz, "arena"); + return true; + } + if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) return false; @@ -257,7 +287,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -310,7 +340,7 @@ static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -356,7 +386,7 @@ static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name */ needs_typeof = btf_is_array(var) || btf_is_ptr_to_func_proto(btf, var); if (needs_typeof) - printf("typeof("); + printf("__typeof__("); err = btf_dump__emit_type_decl(d, var_type_id, &opts); if (err) @@ -487,7 +517,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name) ", obj_name); bpf_object__for_each_map(map, obj) { - if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident))) + if (!is_mmapable_map(map, map_ident, sizeof(map_ident))) continue; sec = find_type_for_map(btf, map_ident); @@ -640,7 +670,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) continue; if (bpf_map__is_internal(map) && (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) - printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zd);\n", + printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zu);\n", ident, bpf_map_mmap_sz(map)); codegen("\ \n\ @@ -658,10 +688,17 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard) { DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); + struct bpf_load_and_run_opts sopts = {}; + char sig_buf[MAX_SIG_SIZE]; + __u8 prog_sha[SHA256_DIGEST_LENGTH]; struct bpf_map *map; + char ident[256]; int err = 0; + if (sign_progs) + opts.gen_hash = true; + err = bpf_object__gen_loader(obj, &opts); if (err) return err; @@ -671,6 +708,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h p_err("failed to load object file"); goto out; } + /* If there was no error during load then gen_loader_opts * are populated with the loader program. */ @@ -703,22 +741,27 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h const void *mmap_data = NULL; size_t mmap_size = 0; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; codegen("\ \n\ - skel->%1$s = skel_prep_map_data((void *)\"\\ \n\ - ", ident); + { \n\ + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + "); mmap_data = bpf_map__initial_value(map, &mmap_size); print_hex(mmap_data, mmap_size); codegen("\ \n\ - \", %1$zd, %2$zd); \n\ - if (!skel->%3$s) \n\ - goto cleanup; \n\ - skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\ - ", bpf_map_mmap_sz(map), mmap_size, ident); + \"; \n\ + \n\ + skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\ + sizeof(data) - 1);\n\ + if (!skel->%1$s) \n\ + goto cleanup; \n\ + skel->maps.%1$s.initial_value = (__u64) (long) skel->%1$s;\n\ + } \n\ + ", ident, bpf_map_mmap_sz(map)); } codegen("\ \n\ @@ -733,36 +776,78 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h { \n\ struct bpf_load_and_run_opts opts = {}; \n\ int err; \n\ - \n\ - opts.ctx = (struct bpf_loader_ctx *)skel; \n\ - opts.data_sz = %2$d; \n\ - opts.data = (void *)\"\\ \n\ + static const char opts_data[] __attribute__((__aligned__(8))) = \"\\\n\ ", - obj_name, opts.data_sz); + obj_name); print_hex(opts.data, opts.data_sz); codegen("\ \n\ \"; \n\ + static const char opts_insn[] __attribute__((__aligned__(8))) = \"\\\n\ "); - + print_hex(opts.insns, opts.insns_sz); codegen("\ \n\ - opts.insns_sz = %d; \n\ - opts.insns = (void *)\"\\ \n\ - ", - opts.insns_sz); - print_hex(opts.insns, opts.insns_sz); + \";\n"); + + if (sign_progs) { + sopts.insns = opts.insns; + sopts.insns_sz = opts.insns_sz; + sopts.excl_prog_hash = prog_sha; + sopts.excl_prog_hash_sz = sizeof(prog_sha); + sopts.signature = sig_buf; + sopts.signature_sz = MAX_SIG_SIZE; + + err = bpftool_prog_sign(&sopts); + if (err < 0) { + p_err("failed to sign program"); + goto out; + } + + codegen("\ + \n\ + static const char opts_sig[] __attribute__((__aligned__(8))) = \"\\\n\ + "); + print_hex((const void *)sig_buf, sopts.signature_sz); + codegen("\ + \n\ + \";\n"); + + codegen("\ + \n\ + static const char opts_excl_hash[] __attribute__((__aligned__(8))) = \"\\\n\ + "); + print_hex((const void *)prog_sha, sizeof(prog_sha)); + codegen("\ + \n\ + \";\n"); + + codegen("\ + \n\ + opts.signature = (void *)opts_sig; \n\ + opts.signature_sz = sizeof(opts_sig) - 1; \n\ + opts.excl_prog_hash = (void *)opts_excl_hash; \n\ + opts.excl_prog_hash_sz = sizeof(opts_excl_hash) - 1; \n\ + opts.keyring_id = skel->keyring_id; \n\ + "); + } + codegen("\ \n\ - \"; \n\ + opts.ctx = (struct bpf_loader_ctx *)skel; \n\ + opts.data_sz = sizeof(opts_data) - 1; \n\ + opts.data = (void *)opts_data; \n\ + opts.insns_sz = sizeof(opts_insn) - 1; \n\ + opts.insns = (void *)opts_insn; \n\ + \n\ err = bpf_load_and_run(&opts); \n\ if (err < 0) \n\ return err; \n\ - ", obj_name); + "); bpf_object__for_each_map(map, obj) { const char *mmap_flags; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG) @@ -815,28 +900,45 @@ out: } static void -codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) +codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool populate_links) { struct bpf_map *map; char ident[256]; - size_t i; + size_t i, map_sz; if (!map_cnt) return; + /* for backward compatibility with old libbpf versions that don't + * handle new BPF skeleton with new struct bpf_map_skeleton definition + * that includes link field, avoid specifying new increased size, + * unless we absolutely have to (i.e., if there are struct_ops maps + * present) + */ + map_sz = offsetof(struct bpf_map_skeleton, link); + if (populate_links) { + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + map_sz = sizeof(struct bpf_map_skeleton); + break; + } + } + } + codegen("\ \n\ - \n\ + \n\ /* maps */ \n\ s->map_cnt = %zu; \n\ - s->map_skel_sz = sizeof(*s->maps); \n\ - s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ + s->map_skel_sz = %zu; \n\ + s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt,\n\ + sizeof(*s->maps) > %zu ? sizeof(*s->maps) : %zu);\n\ if (!s->maps) { \n\ err = -ENOMEM; \n\ goto err; \n\ } \n\ ", - map_cnt + map_cnt, map_sz, map_sz, map_sz ); i = 0; bpf_object__for_each_map(map, obj) { @@ -845,15 +947,22 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) codegen("\ \n\ - \n\ - s->maps[%zu].name = \"%s\"; \n\ - s->maps[%zu].map = &obj->maps.%s; \n\ + \n\ + map = (struct bpf_map_skeleton *)((char *)s->maps + %zu * s->map_skel_sz);\n\ + map->name = \"%s\"; \n\ + map->map = &obj->maps.%s; \n\ ", - i, bpf_map__name(map), i, ident); + i, bpf_map__name(map), ident); /* memory-mapped internal maps */ - if (mmaped && is_internal_mmapable_map(map, ident, sizeof(ident))) { - printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", - i, ident); + if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) { + printf("\tmap->mmaped = (void **)&obj->%s;\n", ident); + } + + if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + codegen("\ + \n\ + map->link = &obj->links.%s; \n\ + ", ident); } i++; } @@ -903,10 +1012,212 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li } } +static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident, + const struct btf_type *map_type, __u32 map_type_id) +{ + LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, .indent_level = 3); + const struct btf_type *member_type; + __u32 offset, next_offset = 0; + const struct btf_member *m; + struct btf_dump *d = NULL; + const char *member_name; + __u32 member_type_id; + int i, err = 0, n; + int size; + + d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); + if (!d) + return -errno; + + n = btf_vlen(map_type); + for (i = 0, m = btf_members(map_type); i < n; i++, m++) { + member_type = skip_mods_and_typedefs(btf, m->type, &member_type_id); + member_name = btf__name_by_offset(btf, m->name_off); + + offset = m->offset / 8; + if (next_offset < offset) + printf("\t\t\tchar __padding_%d[%u];\n", i, offset - next_offset); + + switch (btf_kind(member_type)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* scalar type */ + printf("\t\t\t"); + opts.field_name = member_name; + err = btf_dump__emit_type_decl(d, member_type_id, &opts); + if (err) { + p_err("Failed to emit type declaration for %s: %d", member_name, err); + goto out; + } + printf(";\n"); + + size = btf__resolve_size(btf, member_type_id); + if (size < 0) { + p_err("Failed to resolve size of %s: %d\n", member_name, size); + err = size; + goto out; + } + + next_offset = offset + size; + break; + + case BTF_KIND_PTR: + if (resolve_func_ptr(btf, m->type, NULL)) { + /* Function pointer */ + printf("\t\t\tstruct bpf_program *%s;\n", member_name); + + next_offset = offset + sizeof(void *); + break; + } + /* All pointer types are unsupported except for + * function pointers. + */ + fallthrough; + + default: + /* Unsupported types + * + * Types other than scalar types and function + * pointers are currently not supported in order to + * prevent conflicts in the generated code caused + * by multiple definitions. For instance, if the + * struct type FOO is used in a struct_ops map, + * bpftool has to generate definitions for FOO, + * which may result in conflicts if FOO is defined + * in different skeleton files. + */ + size = btf__resolve_size(btf, member_type_id); + if (size < 0) { + p_err("Failed to resolve size of %s: %d\n", member_name, size); + err = size; + goto out; + } + printf("\t\t\tchar __unsupported_%d[%d];\n", i, size); + + next_offset = offset + size; + break; + } + } + + /* Cannot fail since it must be a struct type */ + size = btf__resolve_size(btf, map_type_id); + if (next_offset < (__u32)size) + printf("\t\t\tchar __padding_end[%u];\n", size - next_offset); + +out: + btf_dump__free(d); + + return err; +} + +/* Generate the pointer of the shadow type for a struct_ops map. + * + * This function adds a pointer of the shadow type for a struct_ops map. + * The members of a struct_ops map can be exported through a pointer to a + * shadow type. The user can access these members through the pointer. + * + * A shadow type includes not all members, only members of some types. + * They are scalar types and function pointers. The function pointers are + * translated to the pointer of the struct bpf_program. The scalar types + * are translated to the original type without any modifiers. + * + * Unsupported types will be translated to a char array to occupy the same + * space as the original field, being renamed as __unsupported_*. The user + * should treat these fields as opaque data. + */ +static int gen_st_ops_shadow_type(const char *obj_name, struct btf *btf, const char *ident, + const struct bpf_map *map) +{ + const struct btf_type *map_type; + const char *type_name; + __u32 map_type_id; + int err; + + map_type_id = bpf_map__btf_value_type_id(map); + if (map_type_id == 0) + return -EINVAL; + map_type = btf__type_by_id(btf, map_type_id); + if (!map_type) + return -EINVAL; + + type_name = btf__name_by_offset(btf, map_type->name_off); + + printf("\t\tstruct %s__%s__%s {\n", obj_name, ident, type_name); + + err = walk_st_ops_shadow_vars(btf, ident, map_type, map_type_id); + if (err) + return err; + + printf("\t\t} *%s;\n", ident); + + return 0; +} + +static int gen_st_ops_shadow(const char *obj_name, struct btf *btf, struct bpf_object *obj) +{ + int err, st_ops_cnt = 0; + struct bpf_map *map; + char ident[256]; + + if (!btf) + return 0; + + /* Generate the pointers to shadow types of + * struct_ops maps. + */ + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + + if (st_ops_cnt == 0) /* first struct_ops map */ + printf("\tstruct {\n"); + st_ops_cnt++; + + err = gen_st_ops_shadow_type(obj_name, btf, ident, map); + if (err) + return err; + } + + if (st_ops_cnt) + printf("\t} struct_ops;\n"); + + return 0; +} + +/* Generate the code to initialize the pointers of shadow types. */ +static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj) +{ + struct bpf_map *map; + char ident[256]; + + if (!btf) + return; + + /* Initialize the pointers to_ops shadow types of + * struct_ops maps. + */ + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + codegen("\ + \n\ + obj->struct_ops.%1$s = (__typeof__(obj->struct_ops.%1$s))\n\ + bpf_map__initial_value(obj->maps.%1$s, NULL);\n\ + \n\ + ", ident); + } +} + static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; - size_t map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; + size_t map_cnt = 0, prog_cnt = 0, attach_map_cnt = 0, file_sz, mmap_sz; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; @@ -981,7 +1292,7 @@ static int do_skeleton(int argc, char **argv) err = -errno; libbpf_strerror(err, err_buf, sizeof(err_buf)); p_err("failed to open BPF object file: %s", err_buf); - goto out; + goto out_obj; } bpf_object__for_each_map(map, obj) { @@ -990,6 +1301,10 @@ static int do_skeleton(int argc, char **argv) bpf_map__name(map)); continue; } + + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) + attach_map_cnt++; + map_cnt++; } bpf_object__for_each_program(prog, obj) { @@ -1025,6 +1340,8 @@ static int do_skeleton(int argc, char **argv) #include <stdlib.h> \n\ #include <bpf/libbpf.h> \n\ \n\ + #define BPF_SKEL_SUPPORTS_MAP_AUTO_ATTACH 1 \n\ + \n\ struct %1$s { \n\ struct bpf_object_skeleton *skeleton; \n\ struct bpf_object *obj; \n\ @@ -1046,6 +1363,11 @@ static int do_skeleton(int argc, char **argv) printf("\t} maps;\n"); } + btf = bpf_object__btf(obj); + err = gen_st_ops_shadow(obj_name, btf, obj); + if (err) + goto out; + if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { @@ -1057,6 +1379,9 @@ static int do_skeleton(int argc, char **argv) bpf_program__name(prog)); } printf("\t} progs;\n"); + } + + if (prog_cnt + attach_map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { if (use_loader) @@ -1066,10 +1391,29 @@ static int do_skeleton(int argc, char **argv) printf("\t\tstruct bpf_link *%s;\n", bpf_program__name(prog)); } + + bpf_object__for_each_map(map, obj) { + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + + if (use_loader) + printf("t\tint %s_fd;\n", ident); + else + printf("\t\tstruct bpf_link *%s;\n", ident); + } + printf("\t} links;\n"); } - btf = bpf_object__btf(obj); + if (sign_progs) { + codegen("\ + \n\ + __s32 keyring_id; \n\ + "); + } + if (btf) { err = codegen_datasecs(obj, obj_name); if (err) @@ -1127,6 +1471,12 @@ static int do_skeleton(int argc, char **argv) if (err) \n\ goto err_out; \n\ \n\ + ", obj_name); + + gen_st_ops_shadow_init(btf, obj); + + codegen("\ + \n\ return obj; \n\ err_out: \n\ %1$s__destroy(obj); \n\ @@ -1188,6 +1538,7 @@ static int do_skeleton(int argc, char **argv) %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ struct bpf_object_skeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ @@ -1203,13 +1554,13 @@ static int do_skeleton(int argc, char **argv) obj_name ); - codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/, true /*links*/); codegen_progs_skeleton(obj, prog_cnt, true /*populate_links*/); codegen("\ \n\ \n\ - s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ + s->data = %1$s__elf_bytes(&s->data_sz); \n\ \n\ obj->skeleton = s; \n\ return 0; \n\ @@ -1218,12 +1569,12 @@ static int do_skeleton(int argc, char **argv) return err; \n\ } \n\ \n\ - static inline const void *%2$s__elf_bytes(size_t *sz) \n\ + static inline const void *%1$s__elf_bytes(size_t *sz) \n\ { \n\ - *sz = %1$d; \n\ - return (const void *)\"\\ \n\ - " - , file_sz, obj_name); + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + ", + obj_name + ); /* embed contents of BPF object file */ print_hex(obj_data, file_sz); @@ -1231,6 +1582,9 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \"; \n\ + \n\ + *sz = sizeof(data) - 1; \n\ + return (const void *)data; \n\ } \n\ \n\ #ifdef __cplusplus \n\ @@ -1257,6 +1611,7 @@ static int do_skeleton(int argc, char **argv) err = 0; out: bpf_object__close(obj); +out_obj: if (obj_data) munmap(obj_data, mmap_sz); close(fd); @@ -1383,7 +1738,7 @@ static int do_subskeleton(int argc, char **argv) /* Also count all maps that have a name */ map_cnt++; - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; map_type_id = bpf_map__btf_value_type_id(map); @@ -1433,6 +1788,10 @@ static int do_subskeleton(int argc, char **argv) printf("\t} maps;\n"); } + err = gen_st_ops_shadow(obj_name, btf, obj); + if (err) + goto out; + if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { @@ -1471,6 +1830,7 @@ static int do_subskeleton(int argc, char **argv) { \n\ struct %1$s *obj; \n\ struct bpf_object_subskeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ @@ -1501,7 +1861,7 @@ static int do_subskeleton(int argc, char **argv) /* walk through each symbol and emit the runtime representation */ bpf_object__for_each_map(map, obj) { - if (!is_internal_mmapable_map(map, ident, sizeof(ident))) + if (!is_mmapable_map(map, ident, sizeof(ident))) continue; map_type_id = bpf_map__btf_value_type_id(map); @@ -1534,7 +1894,7 @@ static int do_subskeleton(int argc, char **argv) } } - codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/, false /*links*/); codegen_progs_skeleton(obj, prog_cnt, false /*links*/); codegen("\ @@ -1544,6 +1904,12 @@ static int do_subskeleton(int argc, char **argv) if (err) \n\ goto err; \n\ \n\ + "); + + gen_st_ops_shadow_init(btf, obj); + + codegen("\ + \n\ return obj; \n\ err: \n\ %1$s__destroy(obj); \n\ @@ -1624,7 +1990,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " " HELP_SPEC_OPTIONS " |\n" - " {-L|--use-loader} }\n" + " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ]}\n" "", bin_name, "gen"); @@ -1789,7 +2155,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi break; /* tells if some other type needs to be handled */ default: - p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id); + p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id); return -EINVAL; } @@ -1841,7 +2207,7 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp btf_type = btf__type_by_id(btf, type_id); break; default: - p_err("unsupported kind: %s (%d)", + p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), btf_type->type); return -EINVAL; } @@ -1940,7 +2306,7 @@ static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool } /* tells if some other type needs to be handled */ default: - p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id); + p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id); return -EINVAL; } @@ -2121,15 +2487,6 @@ out: return err; } -static int btfgen_remap_id(__u32 *type_id, void *ctx) -{ - unsigned int *ids = ctx; - - *type_id = ids[*type_id]; - - return 0; -} - /* Generate BTF from relocation information previously recorded */ static struct btf *btfgen_get_btf(struct btfgen_info *info) { @@ -2209,10 +2566,15 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info) /* second pass: fix up type ids */ for (i = 1; i < btf__type_cnt(btf_new); i++) { struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i); + struct btf_field_iter it; + __u32 *type_id; - err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids); + err = btf_field_iter_init(&it, btf_type, BTF_FIELD_ITER_IDS); if (err) goto err_out; + + while ((type_id = btf_field_iter_next(&it))) + *type_id = ids[*type_id]; } free(ids); diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 9a1d2365a297..df5f0d1e07e8 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -37,7 +37,7 @@ static int do_pin(int argc, char **argv) return -1; } - map_fd = map_parse_fd(&argc, &argv); + map_fd = map_parse_fd(&argc, &argv, BPF_F_RDONLY); if (map_fd < 0) return -1; @@ -76,7 +76,7 @@ static int do_pin(int argc, char **argv) goto close_obj; } - err = mount_bpffs_for_pin(path); + err = mount_bpffs_for_file(path); if (err) goto close_link; diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 7b8d9ec89ebd..8895b4e1f690 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -80,7 +80,8 @@ symbol_lookup_callback(__maybe_unused void *disasm_info, static int init_context(disasm_ctx_t *ctx, const char *arch, __maybe_unused const char *disassembler_options, - __maybe_unused unsigned char *image, __maybe_unused ssize_t len) + __maybe_unused unsigned char *image, __maybe_unused ssize_t len, + __maybe_unused __u64 func_ksym) { char *triple; @@ -109,12 +110,13 @@ static void destroy_context(disasm_ctx_t *ctx) } static int -disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc) +disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc, + __u64 func_ksym) { char buf[256]; int count; - count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc, + count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, func_ksym + pc, buf, sizeof(buf)); if (json_output) printf_json(buf); @@ -136,8 +138,21 @@ int disasm_init(void) #ifdef HAVE_LIBBFD_SUPPORT #define DISASM_SPACER "\t" +struct disasm_info { + struct disassemble_info info; + __u64 func_ksym; +}; + +static void disasm_print_addr(bfd_vma addr, struct disassemble_info *info) +{ + struct disasm_info *dinfo = container_of(info, struct disasm_info, info); + + addr += dinfo->func_ksym; + generic_print_address(addr, info); +} + typedef struct { - struct disassemble_info *info; + struct disasm_info *info; disassembler_ftype disassemble; bfd *bfdf; } disasm_ctx_t; @@ -215,7 +230,7 @@ static int fprintf_json_styled(void *out, static int init_context(disasm_ctx_t *ctx, const char *arch, const char *disassembler_options, - unsigned char *image, ssize_t len) + unsigned char *image, ssize_t len, __u64 func_ksym) { struct disassemble_info *info; char tpath[PATH_MAX]; @@ -238,12 +253,13 @@ static int init_context(disasm_ctx_t *ctx, const char *arch, } bfdf = ctx->bfdf; - ctx->info = malloc(sizeof(struct disassemble_info)); + ctx->info = malloc(sizeof(struct disasm_info)); if (!ctx->info) { p_err("mem alloc failed"); goto err_close; } - info = ctx->info; + ctx->info->func_ksym = func_ksym; + info = &ctx->info->info; if (json_output) init_disassemble_info_compat(info, stdout, @@ -272,6 +288,7 @@ static int init_context(disasm_ctx_t *ctx, const char *arch, info->disassembler_options = disassembler_options; info->buffer = image; info->buffer_length = len; + info->print_address_func = disasm_print_addr; disassemble_init_for_target(info); @@ -304,9 +321,10 @@ static void destroy_context(disasm_ctx_t *ctx) static int disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image, - __maybe_unused ssize_t len, int pc) + __maybe_unused ssize_t len, int pc, + __maybe_unused __u64 func_ksym) { - return ctx->disassemble(pc, ctx->info); + return ctx->disassemble(pc, &ctx->info->info); } int disasm_init(void) @@ -325,13 +343,14 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, { const struct bpf_line_info *linfo = NULL; unsigned int nr_skip = 0; - int count, i, pc = 0; + int count, i; + unsigned int pc = 0; disasm_ctx_t ctx; if (!len) return -1; - if (init_context(&ctx, arch, disassembler_options, image, len)) + if (init_context(&ctx, arch, disassembler_options, image, len, func_ksym)) return -1; if (json_output) @@ -360,7 +379,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, printf("%4x:" DISASM_SPACER, pc); } - count = disassemble_insn(&ctx, image, len, pc); + count = disassemble_insn(&ctx, image, len, pc, func_ksym); if (json_output) { /* Operand array, was started in fprintf_json. Before diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index 7fea83bedf48..be379613d118 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -75,14 +75,11 @@ static void jsonw_puts(json_writer_t *self, const char *str) fputs("\\b", self->out); break; case '\\': - fputs("\\n", self->out); + fputs("\\\\", self->out); break; case '"': fputs("\\\"", self->out); break; - case '\'': - fputs("\\\'", self->out); - break; default: putc(*str, self->out); } diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index 8ace65cdb92f..5aaffd3b837b 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -14,6 +14,7 @@ #include <stdbool.h> #include <stdint.h> #include <stdarg.h> +#include <stdio.h> #include <linux/compiler.h> /* Opaque class structure */ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 6f4cfe01cad4..bdcd717b0348 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -3,6 +3,9 @@ #include <errno.h> #include <linux/err.h> +#include <linux/netfilter.h> +#include <linux/netfilter_arp.h> +#include <linux/perf_event.h> #include <net/if.h> #include <stdio.h> #include <unistd.h> @@ -12,8 +15,78 @@ #include "json_writer.h" #include "main.h" +#include "xlated_dumper.h" + +#define PERF_HW_CACHE_LEN 128 static struct hashmap *link_table; +static struct dump_data dd; + +static const char *perf_type_name[PERF_TYPE_MAX] = { + [PERF_TYPE_HARDWARE] = "hardware", + [PERF_TYPE_SOFTWARE] = "software", + [PERF_TYPE_TRACEPOINT] = "tracepoint", + [PERF_TYPE_HW_CACHE] = "hw-cache", + [PERF_TYPE_RAW] = "raw", + [PERF_TYPE_BREAKPOINT] = "breakpoint", +}; + +const char *event_symbols_hw[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = "cpu-cycles", + [PERF_COUNT_HW_INSTRUCTIONS] = "instructions", + [PERF_COUNT_HW_CACHE_REFERENCES] = "cache-references", + [PERF_COUNT_HW_CACHE_MISSES] = "cache-misses", + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "branch-instructions", + [PERF_COUNT_HW_BRANCH_MISSES] = "branch-misses", + [PERF_COUNT_HW_BUS_CYCLES] = "bus-cycles", + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "stalled-cycles-frontend", + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "stalled-cycles-backend", + [PERF_COUNT_HW_REF_CPU_CYCLES] = "ref-cycles", +}; + +const char *event_symbols_sw[PERF_COUNT_SW_MAX] = { + [PERF_COUNT_SW_CPU_CLOCK] = "cpu-clock", + [PERF_COUNT_SW_TASK_CLOCK] = "task-clock", + [PERF_COUNT_SW_PAGE_FAULTS] = "page-faults", + [PERF_COUNT_SW_CONTEXT_SWITCHES] = "context-switches", + [PERF_COUNT_SW_CPU_MIGRATIONS] = "cpu-migrations", + [PERF_COUNT_SW_PAGE_FAULTS_MIN] = "minor-faults", + [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = "major-faults", + [PERF_COUNT_SW_ALIGNMENT_FAULTS] = "alignment-faults", + [PERF_COUNT_SW_EMULATION_FAULTS] = "emulation-faults", + [PERF_COUNT_SW_DUMMY] = "dummy", + [PERF_COUNT_SW_BPF_OUTPUT] = "bpf-output", + [PERF_COUNT_SW_CGROUP_SWITCHES] = "cgroup-switches", +}; + +const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] = { + [PERF_COUNT_HW_CACHE_L1D] = "L1-dcache", + [PERF_COUNT_HW_CACHE_L1I] = "L1-icache", + [PERF_COUNT_HW_CACHE_LL] = "LLC", + [PERF_COUNT_HW_CACHE_DTLB] = "dTLB", + [PERF_COUNT_HW_CACHE_ITLB] = "iTLB", + [PERF_COUNT_HW_CACHE_BPU] = "branch", + [PERF_COUNT_HW_CACHE_NODE] = "node", +}; + +const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] = { + [PERF_COUNT_HW_CACHE_OP_READ] = "load", + [PERF_COUNT_HW_CACHE_OP_WRITE] = "store", + [PERF_COUNT_HW_CACHE_OP_PREFETCH] = "prefetch", +}; + +const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [PERF_COUNT_HW_CACHE_RESULT_ACCESS] = "refs", + [PERF_COUNT_HW_CACHE_RESULT_MISS] = "misses", +}; + +#define perf_event_name(array, id) ({ \ + const char *event_str = NULL; \ + \ + if ((id) < ARRAY_SIZE(array)) \ + event_str = array[id]; \ + event_str; \ +}) static int link_parse_fd(int *argc, char ***argv) { @@ -34,7 +107,7 @@ static int link_parse_fd(int *argc, char ***argv) fd = bpf_link_get_fd_by_id(id); if (fd < 0) - p_err("failed to get link with ID %d: %s", id, strerror(errno)); + p_err("failed to get link with ID %u: %s", id, strerror(errno)); return fd; } else if (is_prefix(**argv, "pinned")) { char *path; @@ -44,7 +117,7 @@ static int link_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_LINK); + return open_obj_pinned_any(path, BPF_OBJ_LINK, NULL); } p_err("expected 'id' or 'pinned', got: '%s'?", **argv); @@ -77,6 +150,18 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr) jsonw_uint_field(wtr, "attach_type", attach_type); } +static void show_link_ifindex_json(__u32 ifindex, json_writer_t *wtr) +{ + char devname[IF_NAMESIZE] = "(unknown)"; + + if (ifindex) + if_indextoname(ifindex, devname); + else + snprintf(devname, sizeof(devname), "(detached)"); + jsonw_string_field(wtr, "devname", devname); + jsonw_uint_field(wtr, "ifindex", ifindex); +} + static bool is_iter_map_target(const char *target_name) { return strcmp(target_name, "bpf_map_elem") == 0 || @@ -135,6 +220,18 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) } } +void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr) +{ + jsonw_uint_field(json_wtr, "pf", + info->netfilter.pf); + jsonw_uint_field(json_wtr, "hook", + info->netfilter.hooknum); + jsonw_int_field(json_wtr, "prio", + info->netfilter.priority); + jsonw_uint_field(json_wtr, "flags", + info->netfilter.flags); +} + static int get_prog_info(int prog_id, struct bpf_prog_info *info) { __u32 len = sizeof(*info); @@ -145,13 +242,278 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info) return prog_fd; memset(info, 0, sizeof(*info)); - err = bpf_obj_get_info_by_fd(prog_fd, info, &len); + err = bpf_prog_get_info_by_fd(prog_fd, info, &len); if (err) p_err("can't get prog info: %s", strerror(errno)); close(prog_fd); return err; } +struct addr_cookie { + __u64 addr; + __u64 cookie; +}; + +static int cmp_addr_cookie(const void *A, const void *B) +{ + const struct addr_cookie *a = A, *b = B; + + if (a->addr == b->addr) + return 0; + return a->addr < b->addr ? -1 : 1; +} + +static struct addr_cookie * +get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count) +{ + struct addr_cookie *data; + __u32 i; + + data = calloc(count, sizeof(data[0])); + if (!data) { + p_err("mem alloc failed"); + return NULL; + } + for (i = 0; i < count; i++) { + data[i].addr = addrs[i]; + data[i].cookie = cookies[i]; + } + qsort(data, count, sizeof(data[0]), cmp_addr_cookie); + return data; +} + +static bool is_x86_ibt_enabled(void) +{ +#if defined(__x86_64__) + struct kernel_config_option options[] = { + { "CONFIG_X86_KERNEL_IBT", }, + }; + char *values[ARRAY_SIZE(options)] = { }; + bool ret; + + if (read_kernel_config(options, ARRAY_SIZE(options), values, NULL)) + return false; + + ret = !!values[0]; + free(values[0]); + return ret; +#else + return false; +#endif +} + +static bool +symbol_matches_target(__u64 sym_addr, __u64 target_addr, bool is_ibt_enabled) +{ + if (sym_addr == target_addr) + return true; + + /* + * On x86_64 architectures with CET (Control-flow Enforcement Technology), + * function entry points have a 4-byte 'endbr' instruction prefix. + * This causes kprobe hooks to target the address *after* 'endbr' + * (symbol address + 4), preserving the CET instruction. + * Here we check if the symbol address matches the hook target address + * minus 4, indicating a CET-enabled function entry point. + */ + if (is_ibt_enabled && sym_addr == target_addr - 4) + return true; + + return false; +} + +static void +show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + struct addr_cookie *data; + __u32 i, j = 0; + bool is_ibt_enabled; + + jsonw_bool_field(json_wtr, "retprobe", + info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN); + jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count); + jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed); + jsonw_name(json_wtr, "funcs"); + jsonw_start_array(json_wtr); + data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs), + u64_to_ptr(info->kprobe_multi.cookies), + info->kprobe_multi.count); + if (!data) + return; + + /* Load it once for all. */ + if (!dd.sym_count) + kernel_syms_load(&dd); + if (!dd.sym_count) + goto error; + + is_ibt_enabled = is_x86_ibt_enabled(); + for (i = 0; i < dd.sym_count; i++) { + if (!symbol_matches_target(dd.sym_mapping[i].address, + data[j].addr, is_ibt_enabled)) + continue; + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "addr", (unsigned long)data[j].addr); + jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name); + /* Print null if it is vmlinux */ + if (dd.sym_mapping[i].module[0] == '\0') { + jsonw_name(json_wtr, "module"); + jsonw_null(json_wtr); + } else { + jsonw_string_field(json_wtr, "module", dd.sym_mapping[i].module); + } + jsonw_uint_field(json_wtr, "cookie", data[j].cookie); + jsonw_end_object(json_wtr); + if (j++ == info->kprobe_multi.count) + break; + } + jsonw_end_array(json_wtr); +error: + free(data); +} + +static __u64 *u64_to_arr(__u64 val) +{ + return (__u64 *) u64_to_ptr(val); +} + +static void +show_uprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + __u32 i; + + jsonw_bool_field(json_wtr, "retprobe", + info->uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN); + jsonw_string_field(json_wtr, "path", (char *) u64_to_ptr(info->uprobe_multi.path)); + jsonw_uint_field(json_wtr, "func_cnt", info->uprobe_multi.count); + jsonw_int_field(json_wtr, "pid", (int) info->uprobe_multi.pid); + jsonw_name(json_wtr, "funcs"); + jsonw_start_array(json_wtr); + + for (i = 0; i < info->uprobe_multi.count; i++) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "offset", + u64_to_arr(info->uprobe_multi.offsets)[i]); + jsonw_uint_field(json_wtr, "ref_ctr_offset", + u64_to_arr(info->uprobe_multi.ref_ctr_offsets)[i]); + jsonw_uint_field(json_wtr, "cookie", + u64_to_arr(info->uprobe_multi.cookies)[i]); + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); +} + +static void +show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + jsonw_bool_field(wtr, "retprobe", info->perf_event.type == BPF_PERF_EVENT_KRETPROBE); + jsonw_uint_field(wtr, "addr", info->perf_event.kprobe.addr); + jsonw_string_field(wtr, "func", + u64_to_ptr(info->perf_event.kprobe.func_name)); + jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset); + jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed); + jsonw_uint_field(wtr, "cookie", info->perf_event.kprobe.cookie); +} + +static void +show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + jsonw_bool_field(wtr, "retprobe", info->perf_event.type == BPF_PERF_EVENT_URETPROBE); + jsonw_string_field(wtr, "file", + u64_to_ptr(info->perf_event.uprobe.file_name)); + jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset); + jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie); + jsonw_uint_field(wtr, "ref_ctr_offset", info->perf_event.uprobe.ref_ctr_offset); +} + +static void +show_perf_event_tracepoint_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + jsonw_string_field(wtr, "tracepoint", + u64_to_ptr(info->perf_event.tracepoint.tp_name)); + jsonw_uint_field(wtr, "cookie", info->perf_event.tracepoint.cookie); +} + +static char *perf_config_hw_cache_str(__u64 config) +{ + const char *hw_cache, *result, *op; + char *str = malloc(PERF_HW_CACHE_LEN); + + if (!str) { + p_err("mem alloc failed"); + return NULL; + } + + hw_cache = perf_event_name(evsel__hw_cache, config & 0xff); + if (hw_cache) + snprintf(str, PERF_HW_CACHE_LEN, "%s-", hw_cache); + else + snprintf(str, PERF_HW_CACHE_LEN, "%llu-", config & 0xff); + + op = perf_event_name(evsel__hw_cache_op, (config >> 8) & 0xff); + if (op) + snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str), + "%s-", op); + else + snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str), + "%llu-", (config >> 8) & 0xff); + + result = perf_event_name(evsel__hw_cache_result, config >> 16); + if (result) + snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str), + "%s", result); + else + snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str), + "%llu", config >> 16); + return str; +} + +static const char *perf_config_str(__u32 type, __u64 config) +{ + const char *perf_config; + + switch (type) { + case PERF_TYPE_HARDWARE: + perf_config = perf_event_name(event_symbols_hw, config); + break; + case PERF_TYPE_SOFTWARE: + perf_config = perf_event_name(event_symbols_sw, config); + break; + case PERF_TYPE_HW_CACHE: + perf_config = perf_config_hw_cache_str(config); + break; + default: + perf_config = NULL; + break; + } + return perf_config; +} + +static void +show_perf_event_event_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + __u64 config = info->perf_event.event.config; + __u32 type = info->perf_event.event.type; + const char *perf_type, *perf_config; + + perf_type = perf_event_name(perf_type_name, type); + if (perf_type) + jsonw_string_field(wtr, "event_type", perf_type); + else + jsonw_uint_field(wtr, "event_type", type); + + perf_config = perf_config_str(type, config); + if (perf_config) + jsonw_string_field(wtr, "event_config", perf_config); + else + jsonw_uint_field(wtr, "event_config", config); + + jsonw_uint_field(wtr, "cookie", info->perf_event.event.cookie); + + if (type == PERF_TYPE_HW_CACHE && perf_config) + free((void *)perf_config); +} + static int show_link_close_json(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; @@ -166,6 +528,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_RAW_TRACEPOINT: jsonw_string_field(json_wtr, "tp_name", u64_to_ptr(info->raw_tracepoint.tp_name)); + jsonw_uint_field(json_wtr, "cookie", info->raw_tracepoint.cookie); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -181,6 +544,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) show_link_attach_type_json(info->tracing.attach_type, json_wtr); + jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); + jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); + jsonw_uint_field(json_wtr, "cookie", info->tracing.cookie); break; case BPF_LINK_TYPE_CGROUP: jsonw_lluint_field(json_wtr, "cgroup_id", @@ -195,6 +561,54 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) info->netns.netns_ino); show_link_attach_type_json(info->netns.attach_type, json_wtr); break; + case BPF_LINK_TYPE_NETFILTER: + netfilter_dump_json(info, json_wtr); + break; + case BPF_LINK_TYPE_TCX: + show_link_ifindex_json(info->tcx.ifindex, json_wtr); + show_link_attach_type_json(info->tcx.attach_type, json_wtr); + break; + case BPF_LINK_TYPE_NETKIT: + show_link_ifindex_json(info->netkit.ifindex, json_wtr); + show_link_attach_type_json(info->netkit.attach_type, json_wtr); + break; + case BPF_LINK_TYPE_SOCKMAP: + jsonw_uint_field(json_wtr, "map_id", info->sockmap.map_id); + show_link_attach_type_json(info->sockmap.attach_type, json_wtr); + break; + case BPF_LINK_TYPE_XDP: + show_link_ifindex_json(info->xdp.ifindex, json_wtr); + break; + case BPF_LINK_TYPE_STRUCT_OPS: + jsonw_uint_field(json_wtr, "map_id", + info->struct_ops.map_id); + break; + case BPF_LINK_TYPE_KPROBE_MULTI: + show_kprobe_multi_json(info, json_wtr); + break; + case BPF_LINK_TYPE_UPROBE_MULTI: + show_uprobe_multi_json(info, json_wtr); + break; + case BPF_LINK_TYPE_PERF_EVENT: + switch (info->perf_event.type) { + case BPF_PERF_EVENT_EVENT: + show_perf_event_event_json(info, json_wtr); + break; + case BPF_PERF_EVENT_TRACEPOINT: + show_perf_event_tracepoint_json(info, json_wtr); + break; + case BPF_PERF_EVENT_KPROBE: + case BPF_PERF_EVENT_KRETPROBE: + show_perf_event_kprobe_json(info, json_wtr); + break; + case BPF_PERF_EVENT_UPROBE: + case BPF_PERF_EVENT_URETPROBE: + show_perf_event_uprobe_json(info, json_wtr); + break; + default: + break; + } + break; default: break; } @@ -227,7 +641,10 @@ static void show_link_header_plain(struct bpf_link_info *info) else printf("type %u ", info->type); - printf("prog %u ", info->prog_id); + if (info->type == BPF_LINK_TYPE_STRUCT_OPS) + printf("map %u ", info->struct_ops.map_id); + else + printf("prog %u ", info->prog_id); } static void show_link_attach_type_plain(__u32 attach_type) @@ -241,6 +658,22 @@ static void show_link_attach_type_plain(__u32 attach_type) printf("attach_type %u ", attach_type); } +static void show_link_ifindex_plain(__u32 ifindex) +{ + char devname[IF_NAMESIZE * 2] = "(unknown)"; + char tmpname[IF_NAMESIZE]; + char *ret = NULL; + + if (ifindex) + ret = if_indextoname(ifindex, tmpname); + else + snprintf(devname, sizeof(devname), "(detached)"); + if (ret) + snprintf(devname, sizeof(devname), "%s(%u)", + tmpname, ifindex); + printf("ifindex %s ", devname); +} + static void show_iter_plain(struct bpf_link_info *info) { const char *target_name = u64_to_ptr(info->iter.target_name); @@ -263,6 +696,225 @@ static void show_iter_plain(struct bpf_link_info *info) } } +static const char * const pf2name[] = { + [NFPROTO_INET] = "inet", + [NFPROTO_IPV4] = "ip", + [NFPROTO_ARP] = "arp", + [NFPROTO_NETDEV] = "netdev", + [NFPROTO_BRIDGE] = "bridge", + [NFPROTO_IPV6] = "ip6", +}; + +static const char * const inethook2name[] = { + [NF_INET_PRE_ROUTING] = "prerouting", + [NF_INET_LOCAL_IN] = "input", + [NF_INET_FORWARD] = "forward", + [NF_INET_LOCAL_OUT] = "output", + [NF_INET_POST_ROUTING] = "postrouting", +}; + +static const char * const arphook2name[] = { + [NF_ARP_IN] = "input", + [NF_ARP_OUT] = "output", +}; + +void netfilter_dump_plain(const struct bpf_link_info *info) +{ + const char *hookname = NULL, *pfname = NULL; + unsigned int hook = info->netfilter.hooknum; + unsigned int pf = info->netfilter.pf; + + if (pf < ARRAY_SIZE(pf2name)) + pfname = pf2name[pf]; + + switch (pf) { + case NFPROTO_BRIDGE: /* bridge shares numbers with enum nf_inet_hooks */ + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + if (hook < ARRAY_SIZE(inethook2name)) + hookname = inethook2name[hook]; + break; + case NFPROTO_ARP: + if (hook < ARRAY_SIZE(arphook2name)) + hookname = arphook2name[hook]; + default: + break; + } + + if (pfname) + printf("\n\t%s", pfname); + else + printf("\n\tpf: %u", pf); + + if (hookname) + printf(" %s", hookname); + else + printf(", hook %u,", hook); + + printf(" prio %d", info->netfilter.priority); + + if (info->netfilter.flags) + printf(" flags 0x%x", info->netfilter.flags); +} + +static void show_kprobe_multi_plain(struct bpf_link_info *info) +{ + struct addr_cookie *data; + __u32 i, j = 0; + bool is_ibt_enabled; + + if (!info->kprobe_multi.count) + return; + + if (info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN) + printf("\n\tkretprobe.multi "); + else + printf("\n\tkprobe.multi "); + printf("func_cnt %u ", info->kprobe_multi.count); + if (info->kprobe_multi.missed) + printf("missed %llu ", info->kprobe_multi.missed); + data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs), + u64_to_ptr(info->kprobe_multi.cookies), + info->kprobe_multi.count); + if (!data) + return; + + /* Load it once for all. */ + if (!dd.sym_count) + kernel_syms_load(&dd); + if (!dd.sym_count) + goto error; + + is_ibt_enabled = is_x86_ibt_enabled(); + printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]"); + for (i = 0; i < dd.sym_count; i++) { + if (!symbol_matches_target(dd.sym_mapping[i].address, + data[j].addr, is_ibt_enabled)) + continue; + printf("\n\t%016lx %-16llx %s", + (unsigned long)data[j].addr, data[j].cookie, dd.sym_mapping[i].name); + if (dd.sym_mapping[i].module[0] != '\0') + printf(" [%s] ", dd.sym_mapping[i].module); + else + printf(" "); + + if (j++ == info->kprobe_multi.count) + break; + } +error: + free(data); +} + +static void show_uprobe_multi_plain(struct bpf_link_info *info) +{ + __u32 i; + + if (!info->uprobe_multi.count) + return; + + if (info->uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN) + printf("\n\turetprobe.multi "); + else + printf("\n\tuprobe.multi "); + + printf("path %s ", (char *) u64_to_ptr(info->uprobe_multi.path)); + printf("func_cnt %u ", info->uprobe_multi.count); + + if (info->uprobe_multi.pid) + printf("pid %u ", info->uprobe_multi.pid); + + printf("\n\t%-16s %-16s %-16s", "offset", "ref_ctr_offset", "cookies"); + for (i = 0; i < info->uprobe_multi.count; i++) { + printf("\n\t0x%-16llx 0x%-16llx 0x%-16llx", + u64_to_arr(info->uprobe_multi.offsets)[i], + u64_to_arr(info->uprobe_multi.ref_ctr_offsets)[i], + u64_to_arr(info->uprobe_multi.cookies)[i]); + } +} + +static void show_perf_event_kprobe_plain(struct bpf_link_info *info) +{ + const char *buf; + + buf = u64_to_ptr(info->perf_event.kprobe.func_name); + if (buf[0] == '\0' && !info->perf_event.kprobe.addr) + return; + + if (info->perf_event.type == BPF_PERF_EVENT_KRETPROBE) + printf("\n\tkretprobe "); + else + printf("\n\tkprobe "); + if (info->perf_event.kprobe.addr) + printf("%llx ", info->perf_event.kprobe.addr); + printf("%s", buf); + if (info->perf_event.kprobe.offset) + printf("+%#x", info->perf_event.kprobe.offset); + if (info->perf_event.kprobe.missed) + printf(" missed %llu", info->perf_event.kprobe.missed); + if (info->perf_event.kprobe.cookie) + printf(" cookie %llu", info->perf_event.kprobe.cookie); + printf(" "); +} + +static void show_perf_event_uprobe_plain(struct bpf_link_info *info) +{ + const char *buf; + + buf = u64_to_ptr(info->perf_event.uprobe.file_name); + if (buf[0] == '\0') + return; + + if (info->perf_event.type == BPF_PERF_EVENT_URETPROBE) + printf("\n\turetprobe "); + else + printf("\n\tuprobe "); + printf("%s+%#x ", buf, info->perf_event.uprobe.offset); + if (info->perf_event.uprobe.cookie) + printf("cookie %llu ", info->perf_event.uprobe.cookie); + if (info->perf_event.uprobe.ref_ctr_offset) + printf("ref_ctr_offset 0x%llx ", info->perf_event.uprobe.ref_ctr_offset); +} + +static void show_perf_event_tracepoint_plain(struct bpf_link_info *info) +{ + const char *buf; + + buf = u64_to_ptr(info->perf_event.tracepoint.tp_name); + if (buf[0] == '\0') + return; + + printf("\n\ttracepoint %s ", buf); + if (info->perf_event.tracepoint.cookie) + printf("cookie %llu ", info->perf_event.tracepoint.cookie); +} + +static void show_perf_event_event_plain(struct bpf_link_info *info) +{ + __u64 config = info->perf_event.event.config; + __u32 type = info->perf_event.event.type; + const char *perf_type, *perf_config; + + printf("\n\tevent "); + perf_type = perf_event_name(perf_type_name, type); + if (perf_type) + printf("%s:", perf_type); + else + printf("%u :", type); + + perf_config = perf_config_str(type, config); + if (perf_config) + printf("%s ", perf_config); + else + printf("%llu ", config); + + if (info->perf_event.event.cookie) + printf("cookie %llu ", info->perf_event.event.cookie); + + if (type == PERF_TYPE_HW_CACHE && perf_config) + free((void *)perf_config); +} + static int show_link_close_plain(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; @@ -275,6 +927,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_RAW_TRACEPOINT: printf("\n\ttp '%s' ", (const char *)u64_to_ptr(info->raw_tracepoint.tp_name)); + if (info->raw_tracepoint.cookie) + printf("cookie %llu ", info->raw_tracepoint.cookie); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -289,6 +943,12 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tprog_type %u ", prog_info.type); show_link_attach_type_plain(info->tracing.attach_type); + if (info->tracing.target_obj_id || info->tracing.target_btf_id) + printf("\n\ttarget_obj_id %u target_btf_id %u ", + info->tracing.target_obj_id, + info->tracing.target_btf_id); + if (info->tracing.cookie) + printf("\n\tcookie %llu ", info->tracing.cookie); break; case BPF_LINK_TYPE_CGROUP: printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id); @@ -301,6 +961,54 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tnetns_ino %u ", info->netns.netns_ino); show_link_attach_type_plain(info->netns.attach_type); break; + case BPF_LINK_TYPE_NETFILTER: + netfilter_dump_plain(info); + break; + case BPF_LINK_TYPE_TCX: + printf("\n\t"); + show_link_ifindex_plain(info->tcx.ifindex); + show_link_attach_type_plain(info->tcx.attach_type); + break; + case BPF_LINK_TYPE_NETKIT: + printf("\n\t"); + show_link_ifindex_plain(info->netkit.ifindex); + show_link_attach_type_plain(info->netkit.attach_type); + break; + case BPF_LINK_TYPE_SOCKMAP: + printf("\n\t"); + printf("map_id %u ", info->sockmap.map_id); + show_link_attach_type_plain(info->sockmap.attach_type); + break; + case BPF_LINK_TYPE_XDP: + printf("\n\t"); + show_link_ifindex_plain(info->xdp.ifindex); + break; + case BPF_LINK_TYPE_KPROBE_MULTI: + show_kprobe_multi_plain(info); + break; + case BPF_LINK_TYPE_UPROBE_MULTI: + show_uprobe_multi_plain(info); + break; + case BPF_LINK_TYPE_PERF_EVENT: + switch (info->perf_event.type) { + case BPF_PERF_EVENT_EVENT: + show_perf_event_event_plain(info); + break; + case BPF_PERF_EVENT_TRACEPOINT: + show_perf_event_tracepoint_plain(info); + break; + case BPF_PERF_EVENT_KPROBE: + case BPF_PERF_EVENT_KRETPROBE: + show_perf_event_kprobe_plain(info); + break; + case BPF_PERF_EVENT_UPROBE: + case BPF_PERF_EVENT_URETPROBE: + show_perf_event_uprobe_plain(info); + break; + default: + break; + } + break; default: break; } @@ -320,14 +1028,19 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) static int do_show_link(int fd) { + __u64 *ref_ctr_offsets = NULL, *offsets = NULL, *cookies = NULL; struct bpf_link_info info; __u32 len = sizeof(info); - char buf[256]; + char path_buf[PATH_MAX]; + __u64 *addrs = NULL; + char buf[PATH_MAX]; + int count; int err; memset(&info, 0, sizeof(info)); + buf[0] = '\0'; again: - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_link_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get link info: %s", strerror(errno)); @@ -336,22 +1049,110 @@ again: } if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT && !info.raw_tracepoint.tp_name) { - info.raw_tracepoint.tp_name = (unsigned long)&buf; + info.raw_tracepoint.tp_name = ptr_to_u64(&buf); info.raw_tracepoint.tp_name_len = sizeof(buf); goto again; } if (info.type == BPF_LINK_TYPE_ITER && !info.iter.target_name) { - info.iter.target_name = (unsigned long)&buf; + info.iter.target_name = ptr_to_u64(&buf); info.iter.target_name_len = sizeof(buf); goto again; } + if (info.type == BPF_LINK_TYPE_KPROBE_MULTI && + !info.kprobe_multi.addrs) { + count = info.kprobe_multi.count; + if (count) { + addrs = calloc(count, sizeof(__u64)); + if (!addrs) { + p_err("mem alloc failed"); + close(fd); + return -ENOMEM; + } + info.kprobe_multi.addrs = ptr_to_u64(addrs); + cookies = calloc(count, sizeof(__u64)); + if (!cookies) { + p_err("mem alloc failed"); + free(addrs); + close(fd); + return -ENOMEM; + } + info.kprobe_multi.cookies = ptr_to_u64(cookies); + goto again; + } + } + if (info.type == BPF_LINK_TYPE_UPROBE_MULTI && + !info.uprobe_multi.offsets) { + count = info.uprobe_multi.count; + if (count) { + offsets = calloc(count, sizeof(__u64)); + if (!offsets) { + p_err("mem alloc failed"); + close(fd); + return -ENOMEM; + } + info.uprobe_multi.offsets = ptr_to_u64(offsets); + ref_ctr_offsets = calloc(count, sizeof(__u64)); + if (!ref_ctr_offsets) { + p_err("mem alloc failed"); + free(offsets); + close(fd); + return -ENOMEM; + } + info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets); + cookies = calloc(count, sizeof(__u64)); + if (!cookies) { + p_err("mem alloc failed"); + free(ref_ctr_offsets); + free(offsets); + close(fd); + return -ENOMEM; + } + info.uprobe_multi.cookies = ptr_to_u64(cookies); + info.uprobe_multi.path = ptr_to_u64(path_buf); + info.uprobe_multi.path_size = sizeof(path_buf); + goto again; + } + } + if (info.type == BPF_LINK_TYPE_PERF_EVENT) { + switch (info.perf_event.type) { + case BPF_PERF_EVENT_TRACEPOINT: + if (!info.perf_event.tracepoint.tp_name) { + info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); + info.perf_event.tracepoint.name_len = sizeof(buf); + goto again; + } + break; + case BPF_PERF_EVENT_KPROBE: + case BPF_PERF_EVENT_KRETPROBE: + if (!info.perf_event.kprobe.func_name) { + info.perf_event.kprobe.func_name = ptr_to_u64(&buf); + info.perf_event.kprobe.name_len = sizeof(buf); + goto again; + } + break; + case BPF_PERF_EVENT_UPROBE: + case BPF_PERF_EVENT_URETPROBE: + if (!info.perf_event.uprobe.file_name) { + info.perf_event.uprobe.file_name = ptr_to_u64(&buf); + info.perf_event.uprobe.name_len = sizeof(buf); + goto again; + } + break; + default: + break; + } + } if (json_output) show_link_close_json(fd, &info); else show_link_close_plain(fd, &info); + free(ref_ctr_offsets); + free(cookies); + free(offsets); + free(addrs); close(fd); return 0; } @@ -376,7 +1177,8 @@ static int do_show(int argc, char **argv) fd = link_parse_fd(&argc, &argv); if (fd < 0) return fd; - return do_show_link(fd); + do_show_link(fd); + goto out; } if (argc) @@ -415,6 +1217,9 @@ static int do_show(int argc, char **argv) if (show_pinned) delete_pinned_obj_table(link_table); +out: + if (dd.sym_count) + kernel_syms_destroy(&dd); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 08d0ac543c67..a829a6a49037 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -33,6 +33,9 @@ bool relaxed_maps; bool use_loader; struct btf *base_btf; struct hashmap *refs_table; +bool sign_progs; +const char *private_key_path; +const char *cert_path; static void __noreturn clean_and_exit(int i) { @@ -61,7 +64,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n" + " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n" " " HELP_SPEC_OPTIONS " |\n" " {-V|--version} }\n" "", @@ -87,6 +90,7 @@ static const struct cmd commands[] = { { "gen", do_gen }, { "struct_ops", do_struct_ops }, { "iter", do_iter }, + { "token", do_token }, { "version", do_version }, { 0 } }; @@ -152,7 +156,7 @@ static int do_version(int argc, char **argv) BPFTOOL_MINOR_VERSION, BPFTOOL_PATCH_VERSION); #endif jsonw_name(json_wtr, "libbpf_version"); - jsonw_printf(json_wtr, "\"%d.%d\"", + jsonw_printf(json_wtr, "\"%u.%u\"", libbpf_major_version(), libbpf_minor_version()); jsonw_name(json_wtr, "features"); @@ -370,7 +374,7 @@ static int do_batch(int argc, char **argv) while ((cp = strstr(buf, "\\\n")) != NULL) { if (!fgets(contline, sizeof(contline), fp) || strlen(contline) == 0) { - p_err("missing continuation line on command %d", + p_err("missing continuation line on command %u", lines); err = -1; goto err_close; @@ -381,7 +385,7 @@ static int do_batch(int argc, char **argv) *cp = '\0'; if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) { - p_err("command %d is too long", lines); + p_err("command %u is too long", lines); err = -1; goto err_close; } @@ -423,7 +427,7 @@ static int do_batch(int argc, char **argv) err = -1; } else { if (!json_output) - printf("processed %d commands\n", lines); + printf("processed %u commands\n", lines); } err_close: if (fp != stdin) @@ -447,6 +451,7 @@ int main(int argc, char **argv) { "nomount", no_argument, NULL, 'n' }, { "debug", no_argument, NULL, 'd' }, { "use-loader", no_argument, NULL, 'L' }, + { "sign", no_argument, NULL, 'S' }, { "base-btf", required_argument, NULL, 'B' }, { 0 } }; @@ -473,7 +478,7 @@ int main(int argc, char **argv) bin_name = "bpftool"; opterr = 0; - while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", + while ((opt = getopt_long(argc, argv, "VhpjfLmndSi:k:B:l", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -519,6 +524,16 @@ int main(int argc, char **argv) case 'L': use_loader = true; break; + case 'S': + sign_progs = true; + use_loader = true; + break; + case 'k': + private_key_path = optarg; + break; + case 'i': + cert_path = optarg; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -533,10 +548,20 @@ int main(int argc, char **argv) if (argc < 0) usage(); - if (version_requested) - return do_version(argc, argv); + if (sign_progs && (private_key_path == NULL || cert_path == NULL)) { + p_err("-i <identity_x509_cert> and -k <private_key> must be supplied with -S for signing"); + return -EINVAL; + } + + if (!sign_progs && (private_key_path != NULL || cert_path != NULL)) { + p_err("--sign (or -S) must be explicitly passed with -i <identity_x509_cert> and -k <private_key> to sign the programs"); + return -EINVAL; + } - ret = cmd_select(commands, argc, argv, do_help); + if (version_requested) + ret = do_version(argc, argv); + else + ret = cmd_select(commands, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index a84224b6a604..1130299cede0 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -6,15 +6,21 @@ /* BFD and kernel.h both define GCC_VERSION, differently */ #undef GCC_VERSION +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <stdbool.h> #include <stdio.h> +#include <errno.h> #include <stdlib.h> +#include <bpf/skel_internal.h> #include <linux/bpf.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <bpf/hashmap.h> #include <bpf/libbpf.h> +#include <bpf/bpf.h> #include "json_writer.h" @@ -51,6 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) }) #define ERR_MAX_LEN 1024 +#define MAX_SIG_SIZE 4096 #define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" @@ -84,6 +91,9 @@ extern bool relaxed_maps; extern bool use_loader; extern struct btf *base_btf; extern struct hashmap *refs_table; +extern bool sign_progs; +extern const char *private_key_path; +extern const char *cert_path; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -140,9 +150,12 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); -int open_obj_pinned(const char *path, bool quiet); -int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); -int mount_bpffs_for_pin(const char *name); +int open_obj_pinned(const char *path, bool quiet, + const struct bpf_obj_get_opts *opts); +int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type, + const struct bpf_obj_get_opts *opts); +int mount_bpffs_for_file(const char *file_name); +int create_and_mount_bpffs_dir(const char *dir_name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); @@ -162,13 +175,15 @@ int do_tracelog(int argc, char **arg) __weak; int do_feature(int argc, char **argv) __weak; int do_struct_ops(int argc, char **argv) __weak; int do_iter(int argc, char **argv) __weak; +int do_token(int argc, char **argv) __weak; int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); int prog_parse_fds(int *argc, char ***argv, int **fds); -int map_parse_fd(int *argc, char ***argv); -int map_parse_fds(int *argc, char ***argv, int **fds); -int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); +int map_parse_fd(int *argc, char ***argv, __u32 open_flags); +int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags); +int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, + __u32 *info_len, __u32 open_flags); struct bpf_prog_linfo; #if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT) @@ -228,6 +243,8 @@ void btf_dump_linfo_plain(const struct btf *btf, const char *prefix, bool linum); void btf_dump_linfo_json(const struct btf *btf, const struct bpf_line_info *linfo, bool linum); +void btf_dump_linfo_dotlabel(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum); struct nlattr; struct ifinfomsg; @@ -261,4 +278,21 @@ static inline bool hashmap__empty(struct hashmap *map) return map ? hashmap__size(map) == 0 : true; } +int pathname_concat(char *buf, int buf_sz, const char *path, + const char *name); + +/* print netfilter bpf_link info */ +void netfilter_dump_plain(const struct bpf_link_info *info); +void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr); + +struct kernel_config_option { + const char *name; + bool macro_dump; +}; + +int read_kernel_config(const struct kernel_config_option *requested_options, + size_t num_options, char **out_values, + const char *define_prefix); +int bpftool_prog_sign(struct bpf_load_and_run_opts *opts); +__u32 register_session_key(const char *key_der_path); #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 88911d3aa2d9..7ebf7dbcfba4 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -139,6 +139,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, print_hex_data_json(key, info->key_size); jsonw_name(json_wtr, "value"); print_hex_data_json(value, info->value_size); + if (map_is_map_of_maps(info->type)) + jsonw_uint_field(json_wtr, "inner_map_id", + *(unsigned int *)value); if (btf) { struct btf_dumper d = { .btf = btf, @@ -259,8 +262,13 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, } if (info->value_size) { - printf("value:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, value, info->value_size, " "); + if (map_is_map_of_maps(info->type)) { + printf("inner_map_id:%c", break_names ? '\n' : ' '); + printf("%u ", *(unsigned int *)value); + } else { + printf("value:%c", break_names ? '\n' : ' '); + fprint_hex(stdout, value, info->value_size, " "); + } } printf("\n"); @@ -277,7 +285,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, } if (info->value_size) { for (i = 0; i < n; i++) { - printf("value (CPU %02d):%c", + printf("value (CPU %02u):%c", i, info->value_size > 16 ? '\n' : ' '); fprint_hex(stdout, value + i * step, info->value_size, " "); @@ -308,7 +316,7 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, } if (i != n) { - p_err("%s expected %d bytes got %d", name, n, i); + p_err("%s expected %u bytes got %u", name, n, i); return NULL; } @@ -329,9 +337,9 @@ static void fill_per_cpu_value(struct bpf_map_info *info, void *value) memcpy(value + i * step, value, info->value_size); } -static int parse_elem(char **argv, struct bpf_map_info *info, - void *key, void *value, __u32 key_size, __u32 value_size, - __u32 *flags, __u32 **value_fd) +static int parse_elem(char **argv, struct bpf_map_info *info, void *key, + void *value, __u32 key_size, __u32 value_size, + __u32 *flags, __u32 **value_fd, __u32 open_flags) { if (!*argv) { if (!key && !value) @@ -354,7 +362,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; return parse_elem(argv, info, NULL, value, key_size, value_size, - flags, value_fd); + flags, value_fd, open_flags); } else if (is_prefix(*argv, "value")) { int fd; @@ -380,7 +388,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; } - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, open_flags); if (fd < 0) return -1; @@ -416,7 +424,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, } return parse_elem(argv, info, key, NULL, key_size, value_size, - flags, NULL); + flags, NULL, open_flags); } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") || is_prefix(*argv, "exist")) { if (!flags) { @@ -432,7 +440,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, *flags = BPF_EXIST; return parse_elem(argv + 1, info, key, value, key_size, - value_size, NULL, value_fd); + value_size, NULL, value_fd, open_flags); } p_err("expected key or value, got: %s", *argv); @@ -454,7 +462,7 @@ static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) jsonw_string_field(wtr, "name", info->name); jsonw_name(wtr, "flags"); - jsonw_printf(wtr, "%d", info->map_flags); + jsonw_printf(wtr, "%u", info->map_flags); } static int show_map_close_json(int fd, struct bpf_map_info *info) @@ -580,7 +588,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (prog_type_str) printf("owner_prog_type %s ", prog_type_str); else - printf("owner_prog_type %d ", prog_type); + printf("owner_prog_type %u ", prog_type); } if (owner_jited) printf("owner%s jited", @@ -607,7 +615,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("\n\t"); if (info->btf_id) - printf("btf_id %d", info->btf_id); + printf("btf_id %u", info->btf_id); if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); @@ -631,14 +639,14 @@ static int do_show_subset(int argc, char **argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(&argc, &argv, &fds); + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); if (nb_fds < 1) goto exit_free; if (json_output && nb_fds > 1) jsonw_start_array(json_wtr); /* root array */ for (i = 0; i < nb_fds; i++) { - err = bpf_obj_get_info_by_fd(fds[i], &info, &len); + err = bpf_map_get_info_by_fd(fds[i], &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); @@ -664,12 +672,15 @@ exit_free: static int do_show(int argc, char **argv) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); struct bpf_map_info info = {}; __u32 len = sizeof(info); __u32 id = 0; int err; int fd; + opts.open_flags = BPF_F_RDONLY; + if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); @@ -699,7 +710,7 @@ static int do_show(int argc, char **argv) break; } - fd = bpf_map_get_fd_by_id(id); + fd = bpf_map_get_fd_by_id_opts(id, &opts); if (fd < 0) { if (errno == ENOENT) continue; @@ -708,7 +719,7 @@ static int do_show(int argc, char **argv) break; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_map_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); @@ -764,7 +775,7 @@ static int maps_have_btf(int *fds, int nb_fds) int err, i; for (i = 0; i < nb_fds; i++) { - err = bpf_obj_get_info_by_fd(fds[i], &info, &len); + err = bpf_map_get_info_by_fd(fds[i], &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); return -1; @@ -901,7 +912,7 @@ static int do_dump(int argc, char **argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(&argc, &argv, &fds); + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); if (nb_fds < 1) goto exit_free; @@ -925,7 +936,7 @@ static int do_dump(int argc, char **argv) if (wtr && nb_fds > 1) jsonw_start_array(wtr); /* root array */ for (i = 0; i < nb_fds; i++) { - if (bpf_obj_get_info_by_fd(fds[i], &info, &len)) { + if (bpf_map_get_info_by_fd(fds[i], &info, &len)) { p_err("can't get map info: %s", strerror(errno)); break; } @@ -989,7 +1000,7 @@ static int do_update(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -998,7 +1009,7 @@ static int do_update(int argc, char **argv) goto exit_free; err = parse_elem(argv, &info, key, value, info.key_size, - info.value_size, &flags, &value_fd); + info.value_size, &flags, &value_fd, 0); if (err) goto exit_free; @@ -1068,7 +1079,7 @@ static int do_lookup(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY); if (fd < 0) return -1; @@ -1076,7 +1087,8 @@ static int do_lookup(int argc, char **argv) if (err) goto exit_free; - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL, + BPF_F_RDONLY); if (err) goto exit_free; @@ -1119,7 +1131,7 @@ static int do_getnext(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY); if (fd < 0) return -1; @@ -1132,8 +1144,8 @@ static int do_getnext(int argc, char **argv) } if (argc) { - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, - NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, + NULL, BPF_F_RDONLY); if (err) goto exit_free; } else { @@ -1190,7 +1202,7 @@ static int do_delete(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1201,7 +1213,8 @@ static int do_delete(int argc, char **argv) goto exit_free; } - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL, + 0); if (err) goto exit_free; @@ -1218,11 +1231,16 @@ exit_free: return err; } +static int map_parse_read_only_fd(int *argc, char ***argv) +{ + return map_parse_fd(argc, argv, BPF_F_RDONLY); +} + static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, map_parse_fd); + err = do_pin_any(argc, argv, map_parse_read_only_fd); if (!err && json_output) jsonw_null(json_wtr); return err; @@ -1262,6 +1280,10 @@ static int do_create(int argc, char **argv) } else if (is_prefix(*argv, "name")) { NEXT_ARG(); map_name = GET_ARG(); + if (strlen(map_name) > BPF_OBJ_NAME_LEN - 1) { + p_info("Warning: map name is longer than %u characters, it will be truncated.", + BPF_OBJ_NAME_LEN - 1); + } } else if (is_prefix(*argv, "key")) { if (parse_u32_arg(&argc, &argv, &key_size, "key size")) @@ -1279,6 +1301,11 @@ static int do_create(int argc, char **argv) "flags")) goto exit; } else if (is_prefix(*argv, "dev")) { + p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n" + "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map."); + goto offload_dev; + } else if (is_prefix(*argv, "offload_dev")) { +offload_dev: NEXT_ARG(); if (attr.map_ifindex) { @@ -1302,7 +1329,7 @@ static int do_create(int argc, char **argv) if (!REQ_ARGS(2)) usage(); inner_map_fd = map_parse_fd_and_info(&argc, &argv, - &info, &len); + &info, &len, BPF_F_RDONLY); if (inner_map_fd < 0) return -1; attr.inner_map_fd = inner_map_fd; @@ -1351,7 +1378,7 @@ static int do_pop_dequeue(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1390,7 +1417,7 @@ static int do_freeze(int argc, char **argv) if (!REQ_ARGS(2)) return -1; - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, 0); if (fd < 0) return -1; @@ -1423,7 +1450,7 @@ static int do_help(int argc, char **argv) "Usage: %1$s %2$s { show | list } [MAP]\n" " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" - " [inner_map MAP] [dev NAME]\n" + " [inner_map MAP] [offload_dev NAME]\n" " %1$s %2$s dump MAP\n" " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" " %1$s %2$s lookup MAP [key DATA]\n" @@ -1450,7 +1477,8 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n" + " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n" + " insn_array }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 21d7d447e1f3..bcb767e2d673 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -91,15 +91,15 @@ print_bpf_output(void *private_data, int cpu, struct perf_event_header *event) jsonw_end_object(json_wtr); } else { if (e->header.type == PERF_RECORD_SAMPLE) { - printf("== @%lld.%09lld CPU: %d index: %d =====\n", + printf("== @%llu.%09llu CPU: %d index: %d =====\n", e->time / 1000000000ULL, e->time % 1000000000ULL, cpu, idx); fprint_hex(stdout, e->data, e->size, " "); printf("\n"); } else if (e->header.type == PERF_RECORD_LOST) { - printf("lost %lld events\n", lost->lost); + printf("lost %llu events\n", lost->lost); } else { - printf("unknown event type=%d size=%d\n", + printf("unknown event type=%u size=%u\n", e->header.type, e->header.size); } } @@ -128,7 +128,8 @@ int do_event_pipe(int argc, char **argv) int err, map_fd; map_info_len = sizeof(map_info); - map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len); + map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len, + 0); if (map_fd < 0) return -1; diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index c40e44c938ae..cfc6f944f7c3 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -67,6 +67,8 @@ enum net_attach_type { NET_ATTACH_TYPE_XDP_GENERIC, NET_ATTACH_TYPE_XDP_DRIVER, NET_ATTACH_TYPE_XDP_OFFLOAD, + NET_ATTACH_TYPE_TCX_INGRESS, + NET_ATTACH_TYPE_TCX_EGRESS, }; static const char * const attach_type_strings[] = { @@ -74,6 +76,15 @@ static const char * const attach_type_strings[] = { [NET_ATTACH_TYPE_XDP_GENERIC] = "xdpgeneric", [NET_ATTACH_TYPE_XDP_DRIVER] = "xdpdrv", [NET_ATTACH_TYPE_XDP_OFFLOAD] = "xdpoffload", + [NET_ATTACH_TYPE_TCX_INGRESS] = "tcx_ingress", + [NET_ATTACH_TYPE_TCX_EGRESS] = "tcx_egress", +}; + +static const char * const attach_loc_strings[] = { + [BPF_TCX_INGRESS] = "tcx/ingress", + [BPF_TCX_EGRESS] = "tcx/egress", + [BPF_NETKIT_PRIMARY] = "netkit/primary", + [BPF_NETKIT_PEER] = "netkit/peer", }; const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings); @@ -355,17 +366,18 @@ static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; struct ifinfomsg *ifinfo = msg; + struct ip_devname_ifindex *tmp; if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) return 0; if (netinfo->used_len == netinfo->array_len) { - netinfo->devices = realloc(netinfo->devices, - (netinfo->array_len + 16) * - sizeof(struct ip_devname_ifindex)); - if (!netinfo->devices) + tmp = realloc(netinfo->devices, + (netinfo->array_len + 16) * sizeof(struct ip_devname_ifindex)); + if (!tmp) return -ENOMEM; + netinfo->devices = tmp; netinfo->array_len += 16; } netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; @@ -384,6 +396,7 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_tcinfo_t *tcinfo = cookie; struct tcmsg *info = msg; + struct tc_kind_handle *tmp; if (tcinfo->is_qdisc) { /* skip clsact qdisc */ @@ -395,11 +408,12 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) } if (tcinfo->used_len == tcinfo->array_len) { - tcinfo->handle_array = realloc(tcinfo->handle_array, + tmp = realloc(tcinfo->handle_array, (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); - if (!tcinfo->handle_array) + if (!tmp) return -ENOMEM; + tcinfo->handle_array = tmp; tcinfo->array_len += 16; } tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; @@ -422,8 +436,92 @@ static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb) filter_info->devname, filter_info->ifindex); } -static int show_dev_tc_bpf(int sock, unsigned int nl_pid, - struct ip_devname_ifindex *dev) +static int __show_dev_tc_bpf_name(__u32 id, char *name, size_t len) +{ + struct bpf_prog_info info = {}; + __u32 ilen = sizeof(info); + int fd, ret; + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) + return fd; + ret = bpf_obj_get_info_by_fd(fd, &info, &ilen); + if (ret < 0) + goto out; + ret = -ENOENT; + if (info.name[0]) { + get_prog_full_name(&info, fd, name, len); + ret = 0; + } +out: + close(fd); + return ret; +} + +static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev, + const enum bpf_attach_type loc) +{ + __u32 prog_flags[64] = {}, link_flags[64] = {}, i, j; + __u32 prog_ids[64] = {}, link_ids[64] = {}; + LIBBPF_OPTS(bpf_prog_query_opts, optq); + char prog_name[MAX_PROG_FULL_NAME]; + int ret; + + optq.prog_ids = prog_ids; + optq.prog_attach_flags = prog_flags; + optq.link_ids = link_ids; + optq.link_attach_flags = link_flags; + optq.count = ARRAY_SIZE(prog_ids); + + ret = bpf_prog_query_opts(dev->ifindex, loc, &optq); + if (ret) + return; + for (i = 0; i < optq.count; i++) { + NET_START_OBJECT; + NET_DUMP_STR("devname", "%s", dev->devname); + NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)dev->ifindex); + NET_DUMP_STR("kind", " %s", attach_loc_strings[loc]); + ret = __show_dev_tc_bpf_name(prog_ids[i], prog_name, + sizeof(prog_name)); + if (!ret) + NET_DUMP_STR("name", " %s", prog_name); + NET_DUMP_UINT("prog_id", " prog_id %u ", prog_ids[i]); + if (prog_flags[i] || json_output) { + NET_START_ARRAY("prog_flags", "%s "); + for (j = 0; prog_flags[i] && j < 32; j++) { + if (!(prog_flags[i] & (1U << j))) + continue; + NET_DUMP_UINT_ONLY(1U << j); + } + NET_END_ARRAY(""); + } + if (link_ids[i] || json_output) { + NET_DUMP_UINT("link_id", "link_id %u ", link_ids[i]); + if (link_flags[i] || json_output) { + NET_START_ARRAY("link_flags", "%s "); + for (j = 0; link_flags[i] && j < 32; j++) { + if (!(link_flags[i] & (1U << j))) + continue; + NET_DUMP_UINT_ONLY(1U << j); + } + NET_END_ARRAY(""); + } + } + NET_END_OBJECT_FINAL; + } +} + +static void show_dev_tc_bpf(struct ip_devname_ifindex *dev) +{ + __show_dev_tc_bpf(dev, BPF_TCX_INGRESS); + __show_dev_tc_bpf(dev, BPF_TCX_EGRESS); + + __show_dev_tc_bpf(dev, BPF_NETKIT_PRIMARY); + __show_dev_tc_bpf(dev, BPF_NETKIT_PEER); +} + +static int show_dev_tc_bpf_classic(int sock, unsigned int nl_pid, + struct ip_devname_ifindex *dev) { struct bpf_filter_t filter_info; struct bpf_tcinfo_t tcinfo; @@ -556,6 +654,32 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type, return bpf_xdp_attach(ifindex, progfd, flags, NULL); } +static int get_tcx_type(enum net_attach_type attach_type) +{ + switch (attach_type) { + case NET_ATTACH_TYPE_TCX_INGRESS: + return BPF_TCX_INGRESS; + case NET_ATTACH_TYPE_TCX_EGRESS: + return BPF_TCX_EGRESS; + default: + return -1; + } +} + +static int do_attach_tcx(int progfd, enum net_attach_type attach_type, int ifindex) +{ + int type = get_tcx_type(attach_type); + + return bpf_prog_attach(progfd, ifindex, type, 0); +} + +static int do_detach_tcx(int targetfd, enum net_attach_type attach_type) +{ + int type = get_tcx_type(attach_type); + + return bpf_prog_detach(targetfd, type); +} + static int do_attach(int argc, char **argv) { enum net_attach_type attach_type; @@ -593,10 +717,23 @@ static int do_attach(int argc, char **argv) } } + switch (attach_type) { /* attach xdp prog */ - if (is_prefix("xdp", attach_type_strings[attach_type])) - err = do_attach_detach_xdp(progfd, attach_type, ifindex, - overwrite); + case NET_ATTACH_TYPE_XDP: + case NET_ATTACH_TYPE_XDP_GENERIC: + case NET_ATTACH_TYPE_XDP_DRIVER: + case NET_ATTACH_TYPE_XDP_OFFLOAD: + err = do_attach_detach_xdp(progfd, attach_type, ifindex, overwrite); + break; + /* attach tcx prog */ + case NET_ATTACH_TYPE_TCX_INGRESS: + case NET_ATTACH_TYPE_TCX_EGRESS: + err = do_attach_tcx(progfd, attach_type, ifindex); + break; + default: + break; + } + if (err) { p_err("interface %s attach failed: %s", attach_type_strings[attach_type], strerror(-err)); @@ -630,10 +767,23 @@ static int do_detach(int argc, char **argv) if (ifindex < 1) return -EINVAL; + switch (attach_type) { /* detach xdp prog */ - progfd = -1; - if (is_prefix("xdp", attach_type_strings[attach_type])) + case NET_ATTACH_TYPE_XDP: + case NET_ATTACH_TYPE_XDP_GENERIC: + case NET_ATTACH_TYPE_XDP_DRIVER: + case NET_ATTACH_TYPE_XDP_OFFLOAD: + progfd = -1; err = do_attach_detach_xdp(progfd, attach_type, ifindex, NULL); + break; + /* detach tcx prog */ + case NET_ATTACH_TYPE_TCX_INGRESS: + case NET_ATTACH_TYPE_TCX_EGRESS: + err = do_detach_tcx(ifindex, attach_type); + break; + default: + break; + } if (err < 0) { p_err("interface %s detach failed: %s", @@ -647,6 +797,111 @@ static int do_detach(int argc, char **argv) return 0; } +static int netfilter_link_compar(const void *a, const void *b) +{ + const struct bpf_link_info *nfa = a; + const struct bpf_link_info *nfb = b; + int delta; + + delta = nfa->netfilter.pf - nfb->netfilter.pf; + if (delta) + return delta; + + delta = nfa->netfilter.hooknum - nfb->netfilter.hooknum; + if (delta) + return delta; + + if (nfa->netfilter.priority < nfb->netfilter.priority) + return -1; + if (nfa->netfilter.priority > nfb->netfilter.priority) + return 1; + + return nfa->netfilter.flags - nfb->netfilter.flags; +} + +static void show_link_netfilter(void) +{ + unsigned int nf_link_len = 0, nf_link_count = 0; + struct bpf_link_info *nf_link_info = NULL; + __u32 id = 0; + + while (true) { + struct bpf_link_info info; + int fd, err; + __u32 len; + + err = bpf_link_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + p_err("can't get next link: %s (id %u)", strerror(errno), id); + break; + } + + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get link by id (%u): %s", id, strerror(errno)); + continue; + } + + memset(&info, 0, sizeof(info)); + len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + + close(fd); + + if (err) { + p_err("can't get link info for fd %d: %s", fd, strerror(errno)); + continue; + } + + if (info.type != BPF_LINK_TYPE_NETFILTER) + continue; + + if (nf_link_count >= nf_link_len) { + static const unsigned int max_link_count = INT_MAX / sizeof(info); + struct bpf_link_info *expand; + + if (nf_link_count > max_link_count) { + p_err("cannot handle more than %u links\n", max_link_count); + break; + } + + nf_link_len += 16; + + expand = realloc(nf_link_info, nf_link_len * sizeof(info)); + if (!expand) { + p_err("realloc: %s", strerror(errno)); + break; + } + + nf_link_info = expand; + } + + nf_link_info[nf_link_count] = info; + nf_link_count++; + } + + if (!nf_link_info) + return; + + qsort(nf_link_info, nf_link_count, sizeof(*nf_link_info), netfilter_link_compar); + + for (id = 0; id < nf_link_count; id++) { + NET_START_OBJECT; + if (json_output) + netfilter_dump_json(&nf_link_info[id], json_wtr); + else + netfilter_dump_plain(&nf_link_info[id]); + + NET_DUMP_UINT("id", " prog_id %u", nf_link_info[id].prog_id); + NET_END_OBJECT; + } + + free(nf_link_info); +} + static int do_show(int argc, char **argv) { struct bpf_attach_info attach_info = {}; @@ -688,8 +943,9 @@ static int do_show(int argc, char **argv) if (!ret) { NET_START_ARRAY("tc", "%s:\n"); for (i = 0; i < dev_array.used_len; i++) { - ret = show_dev_tc_bpf(sock, nl_pid, - &dev_array.devices[i]); + show_dev_tc_bpf(&dev_array.devices[i]); + ret = show_dev_tc_bpf_classic(sock, nl_pid, + &dev_array.devices[i]); if (ret) break; } @@ -701,6 +957,10 @@ static int do_show(int argc, char **argv) NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id); NET_END_ARRAY("\n"); + NET_START_ARRAY("netfilter", "%s:\n"); + show_link_netfilter(); + NET_END_ARRAY("\n"); + NET_END_OBJECT; if (json_output) jsonw_end_array(json_wtr); @@ -730,10 +990,12 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" - " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n" + " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload | tcx_ingress\n" + " | tcx_egress }\n" " " HELP_SPEC_OPTIONS " }\n" "\n" - "Note: Only xdp and tc attachments are supported now.\n" + "Note: Only xdp, tcx, tc, netkit, flow_dissector and netfilter attachments\n" + " are currently supported.\n" " For progs attached to cgroups, use \"bpftool cgroup\"\n" " to dump program attachments. For program types\n" " sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n" diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c index 5f65140b003b..0a3c7e96c797 100644 --- a/tools/bpf/bpftool/netlink_dumper.c +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -45,7 +45,7 @@ static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex, NET_START_OBJECT; if (name) NET_DUMP_STR("devname", "%s", name); - NET_DUMP_UINT("ifindex", "(%d)", ifindex); + NET_DUMP_UINT("ifindex", "(%u)", ifindex); if (mode == XDP_ATTACHED_MULTI) { if (json_output) { @@ -74,7 +74,7 @@ int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb) if (!tb[IFLA_XDP]) return 0; - return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index, + return do_xdp_dump_one(tb[IFLA_XDP], (unsigned int)ifinfo->ifi_index, libbpf_nla_getattr_str(tb[IFLA_IFNAME])); } @@ -168,7 +168,7 @@ int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind, NET_START_OBJECT; if (devname[0] != '\0') NET_DUMP_STR("devname", "%s", devname); - NET_DUMP_UINT("ifindex", "(%u)", ifindex); + NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)ifindex); NET_DUMP_STR("kind", " %s", kind); ret = do_bpf_filter_dump(tb[TCA_OPTIONS]); NET_END_OBJECT_FINAL; diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h index 774af6c62ef5..96318106fb49 100644 --- a/tools/bpf/bpftool/netlink_dumper.h +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -76,6 +76,14 @@ fprintf(stdout, fmt_str, val); \ } +#define NET_DUMP_UINT_ONLY(str) \ +{ \ + if (json_output) \ + jsonw_uint(json_wtr, str); \ + else \ + fprintf(stdout, "%u ", str); \ +} + #define NET_DUMP_STR(name, fmt_str, str) \ { \ if (json_output) \ diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index 91743445e4c7..80de2874dabe 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -236,7 +236,7 @@ static int do_help(int argc, char **argv) { fprintf(stderr, "Usage: %1$s %2$s { show | list }\n" - " %1$s %2$s help }\n" + " %1$s %2$s help\n" "\n" " " HELP_SPEC_OPTIONS " }\n" "", diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 00c77edb6331..23f488cf1740 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -54,6 +54,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[refs->ref_cnt]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt++; return; @@ -77,6 +78,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[0]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt = 1; refs->has_bpf_cookie = e->has_bpf_cookie; refs->bpf_cookie = e->bpf_cookie; @@ -101,7 +103,6 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) char buf[4096 / sizeof(*e) * sizeof(*e)]; struct pid_iter_bpf *skel; int err, ret, fd = -1, i; - libbpf_print_fn_t default_print; *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); if (IS_ERR(*map)) { @@ -118,12 +119,18 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) skel->rodata->obj_type = type; - /* we don't want output polluted with libbpf errors if bpf_iter is not - * supported - */ - default_print = libbpf_set_print(libbpf_print_none); - err = pid_iter_bpf__load(skel); - libbpf_set_print(default_print); + if (!verifier_logs) { + libbpf_print_fn_t default_print; + + /* Unless debug information is on, we don't want the output to + * be polluted with libbpf errors if bpf_iter is not supported. + */ + default_print = libbpf_set_print(libbpf_print_none); + err = pid_iter_bpf__load(skel); + libbpf_set_print(default_print); + } else { + err = pid_iter_bpf__load(skel); + } if (err) { /* too bad, kernel doesn't support BPF iterators yet */ err = 0; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cfc9fdc1e863..6daf19809ca4 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -23,6 +23,7 @@ #include <linux/err.h> #include <linux/perf_event.h> #include <linux/sizes.h> +#include <linux/keyctl.h> #include <bpf/bpf.h> #include <bpf/btf.h> @@ -198,7 +199,7 @@ static void show_prog_maps(int fd, __u32 num_maps) info.nr_map_ids = num_maps; info.map_ids = ptr_to_u64(map_ids); - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_prog_get_info_by_fd(fd, &info, &len); if (err || !info.nr_map_ids) return; @@ -231,7 +232,7 @@ static void *find_metadata(int prog_fd, struct bpf_map_info *map_info) memset(&prog_info, 0, sizeof(prog_info)); prog_info_len = sizeof(prog_info); - ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); if (ret) return NULL; @@ -248,7 +249,7 @@ static void *find_metadata(int prog_fd, struct bpf_map_info *map_info) prog_info.map_ids = ptr_to_u64(map_ids); prog_info_len = sizeof(prog_info); - ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); if (ret) goto free_map_ids; @@ -259,7 +260,7 @@ static void *find_metadata(int prog_fd, struct bpf_map_info *map_info) memset(map_info, 0, sizeof(*map_info)); map_info_len = sizeof(*map_info); - ret = bpf_obj_get_info_by_fd(map_fd, map_info, &map_info_len); + ret = bpf_map_get_info_by_fd(map_fd, map_info, &map_info_len); if (ret < 0) { close(map_fd); goto free_map_ids; @@ -442,7 +443,7 @@ static void print_prog_header_json(struct bpf_prog_info *info, int fd) jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses); } -static void print_prog_json(struct bpf_prog_info *info, int fd) +static void print_prog_json(struct bpf_prog_info *info, int fd, bool orphaned) { char *memlock; @@ -461,6 +462,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_uint_field(json_wtr, "uid", info->created_by_uid); } + jsonw_bool_field(json_wtr, "orphaned", orphaned); jsonw_uint_field(json_wtr, "bytes_xlated", info->xlated_prog_len); if (info->jited_prog_len) { @@ -520,14 +522,14 @@ static void print_prog_header_plain(struct bpf_prog_info *info, int fd) print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("%s", info->gpl_compatible ? " gpl" : ""); if (info->run_time_ns) - printf(" run_time_ns %lld run_cnt %lld", + printf(" run_time_ns %llu run_cnt %llu", info->run_time_ns, info->run_cnt); if (info->recursion_misses) - printf(" recursion_misses %lld", info->recursion_misses); + printf(" recursion_misses %llu", info->recursion_misses); printf("\n"); } -static void print_prog_plain(struct bpf_prog_info *info, int fd) +static void print_prog_plain(struct bpf_prog_info *info, int fd, bool orphaned) { char *memlock; @@ -554,6 +556,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf(" memlock %sB", memlock); free(memlock); + if (orphaned) + printf(" orphaned"); + if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); @@ -565,7 +570,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) } if (info->btf_id) - printf("\n\tbtf_id %d", info->btf_id); + printf("\n\tbtf_id %u", info->btf_id); emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); @@ -580,16 +585,16 @@ static int show_prog(int fd) __u32 len = sizeof(info); int err; - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { + err = bpf_prog_get_info_by_fd(fd, &info, &len); + if (err && err != -ENODEV) { p_err("can't get prog info: %s", strerror(errno)); return -1; } if (json_output) - print_prog_json(&info, fd); + print_prog_json(&info, fd, err == -ENODEV); else - print_prog_plain(&info, fd); + print_prog_plain(&info, fd, err == -ENODEV); return 0; } @@ -710,7 +715,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (mode == DUMP_JITED) { if (info->jited_prog_len == 0 || !info->jited_prog_insns) { - p_info("no instructions returned"); + p_err("error retrieving jit dump: no instructions returned or kernel.kptr_restrict set?"); return -1; } buf = u64_to_ptr(info->jited_prog_insns); @@ -818,11 +823,18 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, printf("%s:\n", sym_name); } - if (disasm_print_insn(img, lens[i], opcodes, - name, disasm_opt, btf, - prog_linfo, ksyms[i], i, - linum)) - goto exit_free; + if (ksyms) { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum)) + goto exit_free; + } else { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + NULL, 0, 0, false)) + goto exit_free; + } img += lens[i]; @@ -840,11 +852,6 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, false)) goto exit_free; } - } else if (visual) { - if (json_output) - jsonw_null(json_wtr); - else - dump_xlated_cfg(buf, member_len); } else { kernel_syms_load(&dd); dd.nr_jited_ksyms = info->nr_jited_ksyms; @@ -855,11 +862,11 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, dd.prog_linfo = prog_linfo; if (json_output) - dump_xlated_json(&dd, buf, member_len, opcodes, - linum); + dump_xlated_json(&dd, buf, member_len, opcodes, linum); + else if (visual) + dump_xlated_cfg(&dd, buf, member_len, opcodes, linum); else - dump_xlated_plain(&dd, buf, member_len, opcodes, - linum); + dump_xlated_plain(&dd, buf, member_len, opcodes, linum); kernel_syms_destroy(&dd); } @@ -910,37 +917,46 @@ static int do_dump(int argc, char **argv) if (nb_fds < 1) goto exit_free; - if (is_prefix(*argv, "file")) { - NEXT_ARG(); - if (!argc) { - p_err("expected file path"); - goto exit_close; - } - if (nb_fds > 1) { - p_err("several programs matched"); - goto exit_close; - } + while (argc) { + if (is_prefix(*argv, "file")) { + NEXT_ARG(); + if (!argc) { + p_err("expected file path"); + goto exit_close; + } + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } - filepath = *argv; - NEXT_ARG(); - } else if (is_prefix(*argv, "opcodes")) { - opcodes = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "visual")) { - if (nb_fds > 1) { - p_err("several programs matched"); + filepath = *argv; + NEXT_ARG(); + } else if (is_prefix(*argv, "opcodes")) { + opcodes = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "visual")) { + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } + + visual = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); + } else { + usage(); goto exit_close; } - - visual = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "linum")) { - linum = true; - NEXT_ARG(); } - if (argc) { - usage(); + if (filepath && (opcodes || visual || linum)) { + p_err("'file' is not compatible with 'opcodes', 'visual', or 'linum'"); + goto exit_close; + } + if (json_output && visual) { + p_err("'visual' is not compatible with JSON output"); goto exit_close; } @@ -949,7 +965,7 @@ static int do_dump(int argc, char **argv) for (i = 0; i < nb_fds; i++) { memset(&info, 0, sizeof(info)); - err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); + err = bpf_prog_get_info_by_fd(fds[i], &info, &info_len); if (err) { p_err("can't get prog info: %s", strerror(errno)); break; @@ -961,7 +977,7 @@ static int do_dump(int argc, char **argv) break; } - err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); + err = bpf_prog_get_info_by_fd(fds[i], &info, &info_len); if (err) { p_err("can't get prog info: %s", strerror(errno)); break; @@ -1047,7 +1063,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, if (!REQ_ARGS(2)) return -EINVAL; - *mapfd = map_parse_fd(&argc, &argv); + *mapfd = map_parse_fd(&argc, &argv, 0); if (*mapfd < 0) return *mapfd; @@ -1098,6 +1114,52 @@ static int do_detach(int argc, char **argv) return 0; } +enum prog_tracelog_mode { + TRACE_STDOUT, + TRACE_STDERR, +}; + +static int +prog_tracelog_stream(int prog_fd, enum prog_tracelog_mode mode) +{ + FILE *file = mode == TRACE_STDOUT ? stdout : stderr; + int stream_id = mode == TRACE_STDOUT ? 1 : 2; + char buf[512]; + int ret; + + ret = 0; + do { + ret = bpf_prog_stream_read(prog_fd, stream_id, buf, sizeof(buf), NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, file); + } while (ret > 0); + + fflush(file); + return ret ? -1 : 0; +} + +static int do_tracelog_any(int argc, char **argv) +{ + enum prog_tracelog_mode mode; + int fd; + + if (argc == 0) + return do_tracelog(argc, argv); + if (!is_prefix(*argv, "stdout") && !is_prefix(*argv, "stderr")) + usage(); + mode = is_prefix(*argv, "stdout") ? TRACE_STDOUT : TRACE_STDERR; + NEXT_ARG(); + + if (!REQ_ARGS(2)) + return -1; + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + return prog_tracelog_stream(fd, mode); +} + static int check_single_stdin(char *file_data_in, char *file_ctx_in) { if (file_data_in && file_ctx_in && @@ -1149,7 +1211,7 @@ static int get_run_data(const char *fname, void **data_ptr, unsigned int *size) } if (nb_read > buf_size - block_size) { if (buf_size == UINT32_MAX) { - p_err("data_in/ctx_in is too long (max: %d)", + p_err("data_in/ctx_in is too long (max: %u)", UINT32_MAX); goto err_free; } @@ -1472,19 +1534,6 @@ auto_attach_program(struct bpf_program *prog, const char *path) return err; } -static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) -{ - int len; - - len = snprintf(buf, buf_sz, "%s/%s", path, name); - if (len < 0) - return -EINVAL; - if ((size_t)len >= buf_sz) - return -ENAMETOOLONG; - - return 0; -} - static int auto_attach_programs(struct bpf_object *obj, const char *path) { @@ -1526,12 +1575,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; const char *pinmaps = NULL; + __u32 xdpmeta_ifindex = 0; + __u32 offload_ifindex = 0; bool auto_attach = false; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; unsigned int i, j; - __u32 ifindex = 0; const char *file; int idx, err; @@ -1605,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } NEXT_ARG(); - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, 0); if (fd < 0) goto err_free_reuse_maps; @@ -1623,17 +1673,46 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) map_replace[old_map_fds].fd = fd; old_map_fds++; } else if (is_prefix(*argv, "dev")) { + p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n" + "Going further, please use 'offload_dev <ifname>' to offload program to device.\n" + "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'."); + goto offload_dev; + } else if (is_prefix(*argv, "offload_dev")) { +offload_dev: + NEXT_ARG(); + + if (offload_ifindex) { + p_err("offload_dev already specified"); + goto err_free_reuse_maps; + } else if (xdpmeta_ifindex) { + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); + goto err_free_reuse_maps; + } + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + offload_ifindex = if_nametoindex(*argv); + if (!offload_ifindex) { + p_err("unrecognized netdevice '%s': %s", + *argv, strerror(errno)); + goto err_free_reuse_maps; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "xdpmeta_dev")) { NEXT_ARG(); - if (ifindex) { - p_err("offload device already specified"); + if (xdpmeta_ifindex) { + p_err("xdpmeta_dev already specified"); + goto err_free_reuse_maps; + } else if (offload_ifindex) { + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); goto err_free_reuse_maps; } if (!REQ_ARGS(1)) goto err_free_reuse_maps; - ifindex = if_nametoindex(*argv); - if (!ifindex) { + xdpmeta_ifindex = if_nametoindex(*argv); + if (!xdpmeta_ifindex) { p_err("unrecognized netdevice '%s': %s", *argv, strerror(errno)); goto err_free_reuse_maps; @@ -1649,8 +1728,17 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } else if (is_prefix(*argv, "autoattach")) { auto_attach = true; NEXT_ARG(); + } else if (is_prefix(*argv, "kernel_btf")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + open_opts.btf_custom_path = GET_ARG(); } else { - p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", + p_err("expected no more arguments, " + "'type', 'map', 'offload_dev', 'xdpmeta_dev', 'pinmaps', " + "'autoattach', or 'kernel_btf', got: '%s'?", *argv); goto err_free_reuse_maps; } @@ -1680,8 +1768,14 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - bpf_program__set_ifindex(pos, ifindex); - bpf_program__set_type(pos, prog_type); + if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) { + bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY); + bpf_program__set_ifindex(pos, xdpmeta_ifindex); + } else { + bpf_program__set_ifindex(pos, offload_ifindex); + } + if (bpf_program__type(pos) != prog_type) + bpf_program__set_type(pos, prog_type); bpf_program__set_expected_attach_type(pos, expected_attach_type); } @@ -1717,7 +1811,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) idx = 0; bpf_object__for_each_map(map, obj) { if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) - bpf_map__set_ifindex(map, ifindex); + bpf_map__set_ifindex(map, offload_ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { err = bpf_map__reuse_fd(map, map_replace[j++].fd); @@ -1747,7 +1841,10 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = mount_bpffs_for_pin(pinfile); + if (first_prog_only) + err = mount_bpffs_for_file(pinfile); + else + err = create_and_mount_bpffs_dir(pinfile); if (err) goto err_close_obj; @@ -1779,6 +1876,10 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } if (pinmaps) { + err = create_and_mount_bpffs_dir(pinmaps); + if (err) + goto err_unpin; + err = bpf_object__pin_maps(obj, pinmaps); if (err) { p_err("failed to pin all maps"); @@ -1830,6 +1931,8 @@ static int try_loader(struct gen_loader_opts *gen) { struct bpf_load_and_run_opts opts = {}; struct bpf_loader_ctx *ctx; + char sig_buf[MAX_SIG_SIZE]; + __u8 prog_sha[SHA256_DIGEST_LENGTH]; int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc), sizeof(struct bpf_prog_desc)); int log_buf_sz = (1u << 24) - 1; @@ -1853,6 +1956,26 @@ static int try_loader(struct gen_loader_opts *gen) opts.insns = gen->insns; opts.insns_sz = gen->insns_sz; fds_before = count_open_fds(); + + if (sign_progs) { + opts.excl_prog_hash = prog_sha; + opts.excl_prog_hash_sz = sizeof(prog_sha); + opts.signature = sig_buf; + opts.signature_sz = MAX_SIG_SIZE; + opts.keyring_id = KEY_SPEC_SESSION_KEYRING; + + err = bpftool_prog_sign(&opts); + if (err < 0) { + p_err("failed to sign program"); + goto out; + } + + err = register_session_key(cert_path); + if (err < 0) { + p_err("failed to add session key"); + goto out; + } + } err = bpf_load_and_run(&opts); fd_delta = count_open_fds() - fds_before; if (err < 0 || verifier_logs) { @@ -1861,6 +1984,7 @@ static int try_loader(struct gen_loader_opts *gen) fprintf(stderr, "loader prog leaked %d FDs\n", fd_delta); } +out: free(log_buf); return err; } @@ -1883,10 +2007,14 @@ static int do_loader(int argc, char **argv) obj = bpf_object__open_file(file, &open_opts); if (!obj) { + err = -1; p_err("failed to open object file"); goto err_close_obj; } + if (sign_progs) + gen.gen_hash = true; + err = bpf_object__gen_loader(obj, &gen); if (err) goto err_close_obj; @@ -2047,7 +2175,7 @@ static int profile_parse_metrics(int argc, char **argv) NEXT_ARG(); } if (selected_cnt > MAX_NUM_PROFILE_METRICS) { - p_err("too many (%d) metrics, please specify no more than %d metrics at at time", + p_err("too many (%d) metrics, please specify no more than %d metrics at a time", selected_cnt, MAX_NUM_PROFILE_METRICS); return -1; } @@ -2161,7 +2289,7 @@ static void profile_print_readings(void) static char *profile_target_name(int tgt_fd) { - struct bpf_func_info func_info; + struct bpf_func_info func_info = {}; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); const struct btf_type *t; @@ -2170,9 +2298,9 @@ static char *profile_target_name(int tgt_fd) char *name = NULL; int err; - err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(tgt_fd, &info, &info_len); if (err) { - p_err("failed to bpf_obj_get_info_by_fd for prog FD %d", tgt_fd); + p_err("failed to get info for prog FD %d", tgt_fd); goto out; } @@ -2183,7 +2311,7 @@ static char *profile_target_name(int tgt_fd) func_info_rec_size = info.func_info_rec_size; if (info.nr_func_info == 0) { - p_err("bpf_obj_get_info_by_fd for prog FD %d found 0 func_info", tgt_fd); + p_err("found 0 func_info for prog FD %d", tgt_fd); goto out; } @@ -2192,7 +2320,7 @@ static char *profile_target_name(int tgt_fd) info.func_info_rec_size = func_info_rec_size; info.func_info = ptr_to_u64(&func_info); - err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(tgt_fd, &info, &info_len); if (err) { p_err("failed to get func_info for prog FD %d", tgt_fd); goto out; @@ -2206,7 +2334,7 @@ static char *profile_target_name(int tgt_fd) t = btf__type_by_id(btf, func_info.type_id); if (!t) { - p_err("btf %d doesn't have type %d", + p_err("btf %u doesn't have type %u", info.btf_id, func_info.type_id); goto out; } @@ -2233,13 +2361,41 @@ static void profile_close_perf_events(struct profiler_bpf *obj) profile_perf_event_cnt = 0; } +static int profile_open_perf_event(int mid, int cpu, int map_fd) +{ + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr, + -1 /*pid*/, cpu, -1 /*group_fd*/, 0); + if (pmu_fd < 0) { + if (errno == ENODEV) { + p_info("cpu %d may be offline, skip %s profiling.", + cpu, metrics[mid].name); + profile_perf_event_cnt++; + return 0; + } + return -1; + } + + if (bpf_map_update_elem(map_fd, + &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + close(pmu_fd); + return -1; + } + + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + return 0; +} + static int profile_open_perf_events(struct profiler_bpf *obj) { unsigned int cpu, m; - int map_fd, pmu_fd; + int map_fd; profile_perf_events = calloc( - sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int)); if (!profile_perf_events) { p_err("failed to allocate memory for perf_event array: %s", strerror(errno)); @@ -2255,17 +2411,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj) if (!metrics[m].selected) continue; for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { - pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, - -1/*pid*/, cpu, -1/*group_fd*/, 0); - if (pmu_fd < 0 || - bpf_map_update_elem(map_fd, &profile_perf_event_cnt, - &pmu_fd, BPF_ANY) || - ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { - p_err("failed to create event %s on cpu %d", + if (profile_open_perf_event(m, cpu, map_fd)) { + p_err("failed to create event %s on cpu %u", metrics[m].name, cpu); return -1; } - profile_perf_events[profile_perf_event_cnt++] = pmu_fd; } } return 0; @@ -2398,14 +2548,15 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [PROG]\n" - " %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n" - " %1$s %2$s dump jited PROG [{ file FILE | opcodes | linum }]\n" + " %1$s %2$s dump xlated PROG [{ file FILE | [opcodes] [linum] [visual] }]\n" + " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n" " %1$s %2$s pin PROG FILE\n" " %1$s %2$s { load | loadall } OBJ PATH \\\n" - " [type TYPE] [dev NAME] \\\n" + " [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n" " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" " [autoattach]\n" + " [kernel_btf BTF_FILE]\n" " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s run PROG \\\n" @@ -2415,6 +2566,7 @@ static int do_help(int argc, char **argv) " [repeat N]\n" " %1$s %2$s profile PROG [duration DURATION] METRICs\n" " %1$s %2$s tracelog\n" + " %1$s %2$s tracelog { stdout | stderr } PROG\n" " %1$s %2$s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -2426,9 +2578,10 @@ static int do_help(int argc, char **argv) " sk_reuseport | flow_dissector | cgroup/sysctl |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" - " cgroup/getpeername4 | cgroup/getpeername6 |\n" - " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" - " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" + " cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n" + " cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n" + " cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n" + " cgroup/sendmsg_unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" @@ -2436,7 +2589,7 @@ static int do_help(int argc, char **argv) " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" - " {-L|--use-loader} }\n" + " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ] \n" "", bin_name, argv[-2]); @@ -2453,7 +2606,7 @@ static const struct cmd cmds[] = { { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, - { "tracelog", do_tracelog }, + { "tracelog", do_tracelog_any }, { "run", do_run }, { "profile", do_profile }, { 0 } diff --git a/tools/bpf/bpftool/sign.c b/tools/bpf/bpftool/sign.c new file mode 100644 index 000000000000..f9b742f4bb10 --- /dev/null +++ b/tools/bpf/bpftool/sign.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* + * Copyright (C) 2025 Google LLC. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> +#include <getopt.h> +#include <err.h> +#include <openssl/opensslv.h> +#include <openssl/bio.h> +#include <openssl/evp.h> +#include <openssl/pem.h> +#include <openssl/err.h> +#include <openssl/cms.h> +#include <linux/keyctl.h> +#include <errno.h> + +#include <bpf/skel_internal.h> + +#include "main.h" + +#define OPEN_SSL_ERR_BUF_LEN 256 + +/* Use deprecated in 3.0 ERR_get_error_line_data for openssl < 3 */ +#if !defined(OPENSSL_VERSION_MAJOR) || (OPENSSL_VERSION_MAJOR < 3) +#define ERR_get_error_all(file, line, func, data, flags) \ + ERR_get_error_line_data(file, line, data, flags) +#endif + +static void display_openssl_errors(int l) +{ + char buf[OPEN_SSL_ERR_BUF_LEN]; + const char *file; + const char *data; + unsigned long e; + int flags; + int line; + + while ((e = ERR_get_error_all(&file, &line, NULL, &data, &flags))) { + ERR_error_string_n(e, buf, sizeof(buf)); + if (data && (flags & ERR_TXT_STRING)) { + p_err("OpenSSL %s: %s:%d: %s", buf, file, line, data); + } else { + p_err("OpenSSL %s: %s:%d", buf, file, line); + } + } +} + +#define DISPLAY_OSSL_ERR(cond) \ + do { \ + bool __cond = (cond); \ + if (__cond && ERR_peek_error()) \ + display_openssl_errors(__LINE__);\ + } while (0) + +static EVP_PKEY *read_private_key(const char *pkey_path) +{ + EVP_PKEY *private_key = NULL; + BIO *b; + + b = BIO_new_file(pkey_path, "rb"); + private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL); + BIO_free(b); + DISPLAY_OSSL_ERR(!private_key); + return private_key; +} + +static X509 *read_x509(const char *x509_name) +{ + unsigned char buf[2]; + X509 *x509 = NULL; + BIO *b; + int n; + + b = BIO_new_file(x509_name, "rb"); + if (!b) + goto cleanup; + + /* Look at the first two bytes of the file to determine the encoding */ + n = BIO_read(b, buf, 2); + if (n != 2) + goto cleanup; + + if (BIO_reset(b) != 0) + goto cleanup; + + if (buf[0] == 0x30 && buf[1] >= 0x81 && buf[1] <= 0x84) + /* Assume raw DER encoded X.509 */ + x509 = d2i_X509_bio(b, NULL); + else + /* Assume PEM encoded X.509 */ + x509 = PEM_read_bio_X509(b, NULL, NULL, NULL); + +cleanup: + BIO_free(b); + DISPLAY_OSSL_ERR(!x509); + return x509; +} + +__u32 register_session_key(const char *key_der_path) +{ + unsigned char *der_buf = NULL; + X509 *x509 = NULL; + int key_id = -1; + int der_len; + + if (!key_der_path) + return key_id; + x509 = read_x509(key_der_path); + if (!x509) + goto cleanup; + der_len = i2d_X509(x509, &der_buf); + if (der_len < 0) + goto cleanup; + key_id = syscall(__NR_add_key, "asymmetric", key_der_path, der_buf, + (size_t)der_len, KEY_SPEC_SESSION_KEYRING); +cleanup: + X509_free(x509); + OPENSSL_free(der_buf); + DISPLAY_OSSL_ERR(key_id == -1); + return key_id; +} + +int bpftool_prog_sign(struct bpf_load_and_run_opts *opts) +{ + BIO *bd_in = NULL, *bd_out = NULL; + EVP_PKEY *private_key = NULL; + CMS_ContentInfo *cms = NULL; + long actual_sig_len = 0; + X509 *x509 = NULL; + int err = 0; + + bd_in = BIO_new_mem_buf(opts->insns, opts->insns_sz); + if (!bd_in) { + err = -ENOMEM; + goto cleanup; + } + + private_key = read_private_key(private_key_path); + if (!private_key) { + err = -EINVAL; + goto cleanup; + } + + x509 = read_x509(cert_path); + if (!x509) { + err = -EINVAL; + goto cleanup; + } + + cms = CMS_sign(NULL, NULL, NULL, NULL, + CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED | + CMS_STREAM); + if (!cms) { + err = -EINVAL; + goto cleanup; + } + + if (!CMS_add1_signer(cms, x509, private_key, EVP_sha256(), + CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP | + CMS_USE_KEYID | CMS_NOATTR)) { + err = -EINVAL; + goto cleanup; + } + + if (CMS_final(cms, bd_in, NULL, CMS_NOCERTS | CMS_BINARY) != 1) { + err = -EIO; + goto cleanup; + } + + EVP_Digest(opts->insns, opts->insns_sz, opts->excl_prog_hash, + &opts->excl_prog_hash_sz, EVP_sha256(), NULL); + + bd_out = BIO_new(BIO_s_mem()); + if (!bd_out) { + err = -ENOMEM; + goto cleanup; + } + + if (!i2d_CMS_bio_stream(bd_out, cms, NULL, 0)) { + err = -EIO; + goto cleanup; + } + + actual_sig_len = BIO_get_mem_data(bd_out, NULL); + if (actual_sig_len <= 0) { + err = -EIO; + goto cleanup; + } + + if ((size_t)actual_sig_len > opts->signature_sz) { + err = -ENOSPC; + goto cleanup; + } + + if (BIO_read(bd_out, opts->signature, actual_sig_len) != actual_sig_len) { + err = -EIO; + goto cleanup; + } + + opts->signature_sz = actual_sig_len; +cleanup: + BIO_free(bd_out); + CMS_ContentInfo_free(cms); + X509_free(x509); + EVP_PKEY_free(private_key); + BIO_free(bd_in); + DISPLAY_OSSL_ERR(err < 0); + return err; +} diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index eb05ea53afb1..948dde25034e 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -15,7 +15,21 @@ enum bpf_obj_type { BPF_OBJ_BTF, }; +struct bpf_perf_link___local { + struct bpf_link link; + struct file *perf_file; +} __attribute__((preserve_access_index)); + +struct perf_event___local { + u64 bpf_cookie; +} __attribute__((preserve_access_index)); + +enum bpf_link_type___local { + BPF_LINK_TYPE_PERF_EVENT___local = 7, +}; + extern const void bpf_link_fops __ksym; +extern const void bpf_link_fops_poll __ksym __weak; extern const void bpf_map_fops __ksym; extern const void bpf_prog_fops __ksym; extern const void btf_fops __ksym; @@ -41,10 +55,10 @@ static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type) /* could be used only with BPF_LINK_TYPE_PERF_EVENT links */ static __u64 get_bpf_cookie(struct bpf_link *link) { - struct bpf_perf_link *perf_link; - struct perf_event *event; + struct bpf_perf_link___local *perf_link; + struct perf_event___local *event; - perf_link = container_of(link, struct bpf_perf_link, link); + perf_link = container_of(link, struct bpf_perf_link___local, link); event = BPF_CORE_READ(perf_link, perf_file, private_data); return BPF_CORE_READ(event, bpf_cookie); } @@ -71,7 +85,11 @@ int iter(struct bpf_iter__task_file *ctx) fops = &btf_fops; break; case BPF_OBJ_LINK: - fops = &bpf_link_fops; + if (&bpf_link_fops_poll && + file->f_op == &bpf_link_fops_poll) + fops = &bpf_link_fops_poll; + else + fops = &bpf_link_fops; break; default: return 0; @@ -84,10 +102,13 @@ int iter(struct bpf_iter__task_file *ctx) e.pid = task->tgid; e.id = get_obj_id(file->private_data, obj_type); - if (obj_type == BPF_OBJ_LINK) { + if (obj_type == BPF_OBJ_LINK && + bpf_core_enum_value_exists(enum bpf_link_type___local, + BPF_LINK_TYPE_PERF_EVENT___local)) { struct bpf_link *link = (struct bpf_link *) file->private_data; - if (BPF_CORE_READ(link, type) == BPF_LINK_TYPE_PERF_EVENT) { + if (BPF_CORE_READ(link, type) == bpf_core_enum_value(enum bpf_link_type___local, + BPF_LINK_TYPE_PERF_EVENT___local)) { e.has_bpf_cookie = true; e.bpf_cookie = get_bpf_cookie(link); } diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index ce5b65e07ab1..f48c783cb9f7 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -4,6 +4,12 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +struct bpf_perf_event_value___local { + __u64 counter; + __u64 enabled; + __u64 running; +} __attribute__((preserve_access_index)); + /* map of perf event fds, num_cpu * num_metric entries */ struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -15,14 +21,14 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(value_size, sizeof(struct bpf_perf_event_value___local)); } fentry_readings SEC(".maps"); /* accumulated readings */ struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(value_size, sizeof(struct bpf_perf_event_value___local)); } accum_readings SEC(".maps"); /* sample counts, one per cpu */ @@ -34,17 +40,17 @@ struct { const volatile __u32 num_cpu = 1; const volatile __u32 num_metric = 1; -#define MAX_NUM_MATRICS 4 +#define MAX_NUM_METRICS 4 SEC("fentry/XXX") int BPF_PROG(fentry_XXX) { - struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local *ptrs[MAX_NUM_METRICS]; u32 key = bpf_get_smp_processor_id(); u32 i; /* look up before reading, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { u32 flag = i; ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); @@ -52,11 +58,11 @@ int BPF_PROG(fentry_XXX) return 0; } - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { - struct bpf_perf_event_value reading; + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { + struct bpf_perf_event_value___local reading; int err; - err = bpf_perf_event_read_value(&events, key, &reading, + err = bpf_perf_event_read_value(&events, key, (void *)&reading, sizeof(reading)); if (err) return 0; @@ -68,14 +74,14 @@ int BPF_PROG(fentry_XXX) } static inline void -fexit_update_maps(u32 id, struct bpf_perf_event_value *after) +fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after) { - struct bpf_perf_event_value *before, diff; + struct bpf_perf_event_value___local *before, diff; before = bpf_map_lookup_elem(&fentry_readings, &id); /* only account samples with a valid fentry_reading */ if (before && before->counter) { - struct bpf_perf_event_value *accum; + struct bpf_perf_event_value___local *accum; diff.counter = after->counter - before->counter; diff.enabled = after->enabled - before->enabled; @@ -93,23 +99,24 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value *after) SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { - struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local readings[MAX_NUM_METRICS]; u32 cpu = bpf_get_smp_processor_id(); u32 i, zero = 0; int err; u64 *count; /* read all events before updating the maps, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, - readings + i, sizeof(*readings)); + (void *)(readings + i), + sizeof(*readings)); if (err) return 0; } count = bpf_map_lookup_elem(&counts, &zero); if (count) { *count += 1; - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) fexit_update_maps(i, &readings[i]); } return 0; diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 903b80ff4e9a..aa43dead249c 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -151,7 +151,7 @@ static int get_next_struct_ops_map(const char *name, int *res_fd, return -1; } - err = bpf_obj_get_info_by_fd(fd, info, &info_len); + err = bpf_map_get_info_by_fd(fd, info, &info_len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); @@ -262,7 +262,7 @@ static struct res do_one_id(const char *id_str, work_func func, void *data, goto done; } - if (bpf_obj_get_info_by_fd(fd, info, &info_len)) { + if (bpf_map_get_info_by_fd(fd, info, &info_len)) { p_err("can't get map info: %s", strerror(errno)); res.nr_errs++; goto done; @@ -276,6 +276,9 @@ static struct res do_one_id(const char *id_str, work_func func, void *data, res.nr_maps++; + if (wtr) + jsonw_start_array(wtr); + if (func(fd, info, data, wtr)) res.nr_errs++; else if (!wtr && json_output) @@ -288,6 +291,9 @@ static struct res do_one_id(const char *id_str, work_func func, void *data, */ jsonw_null(json_wtr); + if (wtr) + jsonw_end_array(wtr); + done: free(info); close(fd); @@ -475,21 +481,44 @@ static int do_unregister(int argc, char **argv) return cmd_retval(&res, true); } +static int pin_link(struct bpf_link *link, const char *pindir, + const char *name) +{ + char pinfile[PATH_MAX]; + int err; + + err = pathname_concat(pinfile, sizeof(pinfile), pindir, name); + if (err) + return -1; + + return bpf_link__pin(link, pinfile); +} + static int do_register(int argc, char **argv) { LIBBPF_OPTS(bpf_object_open_opts, open_opts); + __u32 link_info_len = sizeof(struct bpf_link_info); + struct bpf_link_info link_info = {}; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); int nr_errs = 0, nr_maps = 0; + const char *linkdir = NULL; struct bpf_object *obj; struct bpf_link *link; struct bpf_map *map; const char *file; - if (argc != 1) + if (argc != 1 && argc != 2) usage(); file = GET_ARG(); + if (argc == 1) + linkdir = GET_ARG(); + + if (linkdir && create_and_mount_bpffs_dir(linkdir)) { + p_err("can't mount bpffs for pinning"); + return -1; + } if (verifier_logs) /* log_level1 + log_level2 + stats, but not stable UAPI */ @@ -519,21 +548,44 @@ static int do_register(int argc, char **argv) } nr_maps++; - bpf_link__disconnect(link); - bpf_link__destroy(link); - - if (!bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, - &info_len)) - p_info("Registered %s %s id %u", - get_kern_struct_ops_name(&info), - bpf_map__name(map), - info.id); - else + if (bpf_map_get_info_by_fd(bpf_map__fd(map), &info, + &info_len)) { /* Not p_err. The struct_ops was attached * successfully. */ p_info("Registered %s but can't find id: %s", bpf_map__name(map), strerror(errno)); + goto clean_link; + } + if (!(bpf_map__map_flags(map) & BPF_F_LINK)) { + p_info("Registered %s %s id %u", + get_kern_struct_ops_name(&info), + info.name, + info.id); + goto clean_link; + } + if (bpf_link_get_info_by_fd(bpf_link__fd(link), + &link_info, + &link_info_len)) { + p_err("Registered %s but can't find link id: %s", + bpf_map__name(map), strerror(errno)); + nr_errs++; + goto clean_link; + } + if (linkdir && pin_link(link, linkdir, info.name)) { + p_err("can't pin link %u for %s: %s", + link_info.id, info.name, + strerror(errno)); + nr_errs++; + goto clean_link; + } + p_info("Registered %s %s map id %u link id %u", + get_kern_struct_ops_name(&info), + info.name, info.id, link_info.id); + +clean_link: + bpf_link__disconnect(link); + bpf_link__destroy(link); } bpf_object__close(obj); @@ -562,7 +614,7 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [STRUCT_OPS_MAP]\n" " %1$s %2$s dump [STRUCT_OPS_MAP]\n" - " %1$s %2$s register OBJ\n" + " %1$s %2$s register OBJ [LINK_DIR]\n" " %1$s %2$s unregister STRUCT_OPS_MAP\n" " %1$s %2$s help\n" "\n" diff --git a/tools/bpf/bpftool/token.c b/tools/bpf/bpftool/token.c new file mode 100644 index 000000000000..c08f34b9d51b --- /dev/null +++ b/tools/bpf/bpftool/token.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2025 Didi Technology Co., Tao Chen */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <mntent.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "json_writer.h" +#include "main.h" + +#define MOUNTS_FILE "/proc/mounts" + +static struct { + const char *header; + const char *key; +} sets[] = { + {"allowed_cmds", "delegate_cmds"}, + {"allowed_maps", "delegate_maps"}, + {"allowed_progs", "delegate_progs"}, + {"allowed_attachs", "delegate_attachs"}, +}; + +static bool has_delegate_options(const char *mnt_ops) +{ + return strstr(mnt_ops, "delegate_cmds") || + strstr(mnt_ops, "delegate_maps") || + strstr(mnt_ops, "delegate_progs") || + strstr(mnt_ops, "delegate_attachs"); +} + +static char *get_delegate_value(char *opts, const char *key) +{ + char *token, *rest, *ret = NULL; + + if (!opts) + return NULL; + + for (token = strtok_r(opts, ",", &rest); token; + token = strtok_r(NULL, ",", &rest)) { + if (strncmp(token, key, strlen(key)) == 0 && + token[strlen(key)] == '=') { + ret = token + strlen(key) + 1; + break; + } + } + + return ret; +} + +static void print_items_per_line(char *input, int items_per_line) +{ + char *str, *rest; + int cnt = 0; + + if (!input) + return; + + for (str = strtok_r(input, ":", &rest); str; + str = strtok_r(NULL, ":", &rest)) { + if (cnt % items_per_line == 0) + printf("\n\t "); + + printf("%-20s", str); + cnt++; + } +} + +#define ITEMS_PER_LINE 4 +static void show_token_info_plain(struct mntent *mntent) +{ + size_t i; + + printf("token_info %s", mntent->mnt_dir); + + for (i = 0; i < ARRAY_SIZE(sets); i++) { + char *opts, *value; + + printf("\n\t%s:", sets[i].header); + opts = strdup(mntent->mnt_opts); + value = get_delegate_value(opts, sets[i].key); + print_items_per_line(value, ITEMS_PER_LINE); + free(opts); + } + + printf("\n"); +} + +static void split_json_array_str(char *input) +{ + char *str, *rest; + + if (!input) { + jsonw_start_array(json_wtr); + jsonw_end_array(json_wtr); + return; + } + + jsonw_start_array(json_wtr); + for (str = strtok_r(input, ":", &rest); str; + str = strtok_r(NULL, ":", &rest)) { + jsonw_string(json_wtr, str); + } + jsonw_end_array(json_wtr); +} + +static void show_token_info_json(struct mntent *mntent) +{ + size_t i; + + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "token_info", mntent->mnt_dir); + + for (i = 0; i < ARRAY_SIZE(sets); i++) { + char *opts, *value; + + jsonw_name(json_wtr, sets[i].header); + opts = strdup(mntent->mnt_opts); + value = get_delegate_value(opts, sets[i].key); + split_json_array_str(value); + free(opts); + } + + jsonw_end_object(json_wtr); +} + +static int __show_token_info(struct mntent *mntent) +{ + if (json_output) + show_token_info_json(mntent); + else + show_token_info_plain(mntent); + + return 0; +} + +static int show_token_info(void) +{ + FILE *fp; + struct mntent *ent; + + fp = setmntent(MOUNTS_FILE, "r"); + if (!fp) { + p_err("Failed to open: %s", MOUNTS_FILE); + return -1; + } + + if (json_output) + jsonw_start_array(json_wtr); + + while ((ent = getmntent(fp)) != NULL) { + if (strncmp(ent->mnt_type, "bpf", 3) == 0) { + if (has_delegate_options(ent->mnt_opts)) + __show_token_info(ent); + } + } + + if (json_output) + jsonw_end_array(json_wtr); + + endmntent(fp); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + if (argc) + return BAD_ARG(); + + return show_token_info(); +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %1$s %2$s { show | list }\n" + " %1$s %2$s help\n" + " " HELP_SPEC_OPTIONS " }\n" + "\n" + "", + bin_name, argv[-2]); + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_token(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c index bf1f02212797..573a8d99f009 100644 --- a/tools/bpf/bpftool/tracelog.c +++ b/tools/bpf/bpftool/tracelog.c @@ -57,10 +57,8 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt) static bool get_tracefs_pipe(char *mnt) { static const char * const known_mnts[] = { - "/sys/kernel/debug/tracing", "/sys/kernel/tracing", - "/tracing", - "/trace", + "/sys/kernel/debug/tracing", }; const char *pipe_name = "/trace_pipe"; const char *fstype = "tracefs"; @@ -78,7 +76,7 @@ static bool get_tracefs_pipe(char *mnt) return false; /* Allow room for NULL terminating byte and pipe file name */ - snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n", + snprintf(format, sizeof(format), "%%*s %%%zus %%99s %%*s %%*d %%*d\\n", PATH_MAX - strlen(pipe_name) - 1); while (fscanf(fp, format, mnt, type) == 2) if (strcmp(type, fstype) == 0) { @@ -95,12 +93,7 @@ static bool get_tracefs_pipe(char *mnt) return false; p_info("could not find tracefs, attempting to mount it now"); - /* Most of the time, tracefs is automatically mounted by debugfs at - * /sys/kernel/debug/tracing when we try to access it. If we could not - * find it, it is likely that debugfs is not mounted. Let's give one - * attempt at mounting just tracefs at /sys/kernel/tracing. - */ - strcpy(mnt, known_mnts[1]); + strcpy(mnt, known_mnts[0]); if (mount_tracefs(mnt)) return false; diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 6fe3134ae45d..5e7cb8b36fef 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -46,7 +46,11 @@ out: } dd->sym_mapping = tmp; sym = &dd->sym_mapping[dd->sym_count]; - if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2) + + /* module is optional */ + sym->module[0] = '\0'; + /* trim the square brackets around the module name */ + if (sscanf(buff, "%p %*c %s [%[^]]s", &address, sym->name, sym->module) < 2) continue; sym->address = (unsigned long)address; if (!strcmp(sym->name, "__bpf_call_base")) { @@ -195,13 +199,13 @@ static const char *print_imm(void *private_data, if (insn->src_reg == BPF_PSEUDO_MAP_FD) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), - "map[id:%u]", insn->imm); + "map[id:%d]", insn->imm); else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), - "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm); + "map[id:%d][0]+%d", insn->imm, (insn + 1)->imm); else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), - "map[idx:%u]+%u", insn->imm, (insn + 1)->imm); + "map[idx:%d]+%d", insn->imm, (insn + 1)->imm); else if (insn->src_reg == BPF_PSEUDO_FUNC) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "subprog[%+d]", insn->imm); @@ -345,7 +349,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); - printf("% 4d: ", i); + printf("%4u: ", i); print_bpf_insn(&cbs, insn + i, true); if (opcodes) { @@ -361,7 +365,8 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, } void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, - unsigned int start_idx) + unsigned int start_idx, + bool opcodes, bool linum) { const struct bpf_insn_cbs cbs = { .cb_print = print_insn_for_graph, @@ -369,14 +374,61 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end, .cb_imm = print_imm, .private_data = dd, }; + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; + const struct bpf_line_info *last_linfo = NULL; + struct bpf_func_info *record = dd->func_info; struct bpf_insn *insn_start = buf_start; struct bpf_insn *insn_end = buf_end; struct bpf_insn *cur = insn_start; + struct btf *btf = dd->btf; + bool double_insn = false; + char func_sig[1024]; for (; cur <= insn_end; cur++) { - printf("% 4d: ", (int)(cur - insn_start + start_idx)); + unsigned int insn_off; + + if (double_insn) { + double_insn = false; + continue; + } + double_insn = cur->code == (BPF_LD | BPF_IMM | BPF_DW); + + insn_off = (unsigned int)(cur - insn_start + start_idx); + if (btf && record) { + if (record->insn_off == insn_off) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') + printf("; %s:\\l\\\n", func_sig); + record = (void *)record + dd->finfo_rec_size; + } + } + + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, insn_off, 0); + if (linfo && linfo != last_linfo) { + btf_dump_linfo_dotlabel(btf, linfo, linum); + last_linfo = linfo; + } + } + + printf("%u: ", insn_off); print_bpf_insn(&cbs, cur, true); + + if (opcodes) { + printf("\\ \\ \\ \\ "); + fprint_hex(stdout, cur, 8, " "); + if (double_insn && cur <= insn_end - 1) { + printf(" "); + fprint_hex(stdout, cur + 1, 8, " "); + } + printf("\\l\\\n"); + } + if (cur != insn_end) - printf(" | "); + printf("| "); } } diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index 54847e174273..db3ba0671501 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -5,12 +5,14 @@ #define __BPF_TOOL_XLATED_DUMPER_H #define SYM_MAX_NAME 256 +#define MODULE_MAX_NAME 64 struct bpf_prog_linfo; struct kernel_sym { unsigned long address; char name[SYM_MAX_NAME]; + char module[MODULE_MAX_NAME]; }; struct dump_data { @@ -34,6 +36,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, bool opcodes, bool linum); void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end, - unsigned int start_index); + unsigned int start_index, + bool opcodes, bool linum); #endif diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore index 16913fffc985..52d5e9721d92 100644 --- a/tools/bpf/resolve_btfids/.gitignore +++ b/tools/bpf/resolve_btfids/.gitignore @@ -1,3 +1,4 @@ /fixdep /resolve_btfids /libbpf/ +/libsubcmd/ diff --git a/tools/bpf/resolve_btfids/Build b/tools/bpf/resolve_btfids/Build index ae82da03f9bf..077de3829c72 100644 --- a/tools/bpf/resolve_btfids/Build +++ b/tools/bpf/resolve_btfids/Build @@ -1,3 +1,5 @@ +hostprogs := resolve_btfids + resolve_btfids-y += main.o resolve_btfids-y += rbtree.o resolve_btfids-y += zalloc.o @@ -7,4 +9,4 @@ resolve_btfids-y += str_error_r.o $(OUTPUT)%.o: ../../lib/%.c FORCE $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) + $(call if_changed_dep,host_cc_o_c) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 19a3112e271a..ce1b556dfa90 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch srctree := $(abspath $(CURDIR)/../../../) ifeq ($(V),1) - Q = msg = else - Q = @ ifeq ($(silent),1) msg = else @@ -17,15 +15,15 @@ else MAKEFLAGS=--no-print-directory endif -# always use the host compiler -AR = $(HOSTAR) -CC = $(HOSTCC) -LD = $(HOSTLD) -ARCH = $(HOSTARCH) +# Overrides for the prepare step libraries. +HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + RM ?= rm +HOSTCC ?= gcc +HOSTLD ?= ld +HOSTAR ?= ar CROSS_COMPILE = -CFLAGS := $(KBUILD_HOSTCFLAGS) -LDFLAGS := $(KBUILD_HOSTLDFLAGS) OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -35,51 +33,64 @@ SUBCMD_SRC := $(srctree)/tools/lib/subcmd/ BPFOBJ := $(OUTPUT)/libbpf/libbpf.a LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/ SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a +SUBCMD_OUT := $(abspath $(dir $(SUBCMDOBJ)))/ LIBBPF_DESTDIR := $(LIBBPF_OUT) LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)include +SUBCMD_DESTDIR := $(SUBCMD_OUT) +SUBCMD_INCLUDE := $(SUBCMD_DESTDIR)include + BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) +prepare: $(BPFOBJ) $(SUBCMDOBJ) + $(OUTPUT) $(OUTPUT)/libsubcmd $(LIBBPF_OUT): $(call msg,MKDIR,,$@) $(Q)mkdir -p $(@) $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd - $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(SUBCMD_OUT) \ + DESTDIR=$(SUBCMD_DESTDIR) $(HOST_OVERRIDES) prefix= subdir= \ + $(abspath $@) install_headers $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \ + DESTDIR=$(LIBBPF_DESTDIR) $(HOST_OVERRIDES) prefix= subdir= \ $(abspath $@) install_headers -CFLAGS += -g \ +LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) +LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) + +HOSTCFLAGS_resolve_btfids += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ -I$(LIBBPF_INCLUDE) \ - -I$(SUBCMD_SRC) + -I$(SUBCMD_INCLUDE) \ + $(LIBELF_FLAGS) -LIBS = -lelf -lz +LIBS = $(LIBELF_LIBS) -lz -export srctree OUTPUT CFLAGS Q +export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include -$(BINARY_IN): $(BPFOBJ) fixdep FORCE | $(OUTPUT) +$(BINARY_IN): fixdep FORCE prepare | $(OUTPUT) $(Q)$(MAKE) $(build)=resolve_btfids $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) $(call msg,LINK,$@) - $(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) + $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \ $(OUTPUT)/.*.o.d \ $(LIBBPF_OUT) \ $(LIBBPF_DESTDIR) \ - $(OUTPUT)/libsubcmd \ + $(SUBCMD_OUT) \ + $(SUBCMD_DESTDIR) \ $(OUTPUT)/resolve_btfids) ifneq ($(clean_objects),) @@ -96,4 +107,4 @@ tags: FORCE: -.PHONY: all FORCE clean tags +.PHONY: all FORCE clean tags prepare diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 80cd7843c677..d47191c6e55e 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* - * resolve_btfids scans Elf object for .BTF_ids section and resolves + * resolve_btfids scans ELF object for .BTF_ids section and resolves * its symbols with BTF ID values. * * Each symbol points to 4 bytes data and is expected to have @@ -70,15 +70,16 @@ #include <sys/stat.h> #include <fcntl.h> #include <errno.h> +#include <linux/btf_ids.h> #include <linux/rbtree.h> #include <linux/zalloc.h> #include <linux/err.h> #include <bpf/btf.h> #include <bpf/libbpf.h> -#include <parse-options.h> +#include <subcmd/parse-options.h> #define BTF_IDS_SECTION ".BTF_ids" -#define BTF_ID "__BTF_ID__" +#define BTF_ID_PREFIX "__BTF_ID__" #define BTF_STRUCT "struct" #define BTF_UNION "union" @@ -89,6 +90,14 @@ #define ADDR_CNT 100 +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATANATIVE ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATANATIVE ELFDATA2MSB +#else +# error "Unknown machine endianness!" +#endif + struct btf_id { struct rb_node rb_node; char *name; @@ -116,6 +125,7 @@ struct object { int idlist_shndx; size_t strtabidx; unsigned long idlist_addr; + int encoding; } efile; struct rb_root sets; @@ -131,6 +141,7 @@ struct object { }; static int verbose; +static int warnings; static int eprintf(int level, int var, const char *fmt, ...) { @@ -161,7 +172,7 @@ static int eprintf(int level, int var, const char *fmt, ...) static bool is_btf_id(const char *name) { - return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); + return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); } static struct btf_id *btf_id__find(struct rb_root *root, const char *name) @@ -319,6 +330,7 @@ static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; size_t shdrstrndx; + GElf_Ehdr ehdr; int idx = 0; Elf *elf; int fd; @@ -350,6 +362,13 @@ static int elf_collect(struct object *obj) return -1; } + if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { + pr_err("FAILED cannot get ELF header: %s\n", + elf_errmsg(-1)); + return -1; + } + obj->efile.encoding = ehdr.e_ident[EI_DATA]; + /* * Scan all the elf sections and look for save data * from .BTF_ids section and symbols. @@ -391,6 +410,14 @@ static int elf_collect(struct object *obj) obj->efile.idlist = data; obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; + } else if (!strcmp(name, BTF_BASE_ELF_SEC)) { + /* If a .BTF.base section is found, do not resolve + * BTF ids relative to vmlinux; resolve relative + * to the .BTF.base section instead. btf__parse_split() + * will take care of this once the base BTF it is + * passed is NULL. + */ + obj->base_btf_path = NULL; } if (compressed_section_fix(elf, scn, &sh)) @@ -441,7 +468,7 @@ static int symbols_collect(struct object *obj) * __BTF_ID__TYPE__vfs_truncate__0 * prefix = ^ */ - prefix = name + sizeof(BTF_ID) - 1; + prefix = name + sizeof(BTF_ID_PREFIX) - 1; /* struct */ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { @@ -578,6 +605,7 @@ static int symbols_resolve(struct object *obj) if (id->id) { pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", str, id->id, type_id, id->id); + warnings++; } else { id->id = type_id; (*nr)--; @@ -599,8 +627,10 @@ static int id_patch(struct object *obj, struct btf_id *id) int i; /* For set, set8, id->id may be 0 */ - if (!id->id && !id->is_set && !id->is_set8) + if (!id->id && !id->is_set && !id->is_set8) { pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); + warnings++; + } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; @@ -649,19 +679,18 @@ static int cmp_id(const void *pa, const void *pb) static int sets_patch(struct object *obj) { Elf_Data *data = obj->efile.idlist; - int *ptr = data->d_buf; struct rb_node *next; next = rb_first(&obj->sets); while (next) { - unsigned long addr, idx; + struct btf_id_set8 *set8 = NULL; + struct btf_id_set *set = NULL; + unsigned long addr, off; struct btf_id *id; - int *base; - int cnt; id = rb_entry(next, struct btf_id, rb_node); addr = id->addr[0]; - idx = addr - obj->efile.idlist_addr; + off = addr - obj->efile.idlist_addr; /* sets are unique */ if (id->addr_cnt != 1) { @@ -670,14 +699,39 @@ static int sets_patch(struct object *obj) return -1; } - idx = idx / sizeof(int); - base = &ptr[idx] + (id->is_set8 ? 2 : 1); - cnt = ptr[idx]; + if (id->is_set) { + set = data->d_buf + off; + qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); + } else { + set8 = data->d_buf + off; + /* + * Make sure id is at the beginning of the pairs + * struct, otherwise the below qsort would not work. + */ + BUILD_BUG_ON((u32 *)set8->pairs != &set8->pairs[0].id); + qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); - pr_debug("sorting addr %5lu: cnt %6d [%s]\n", - (idx + 1) * sizeof(int), cnt, id->name); + /* + * When ELF endianness does not match endianness of the + * host, libelf will do the translation when updating + * the ELF. This, however, corrupts SET8 flags which are + * already in the target endianness. So, let's bswap + * them to the host endianness and libelf will then + * correctly translate everything. + */ + if (obj->efile.encoding != ELFDATANATIVE) { + int i; + + set8->flags = bswap_32(set8->flags); + for (i = 0; i < set8->cnt; i++) { + set8->pairs[i].flags = + bswap_32(set8->pairs[i].flags); + } + } + } - qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); + pr_debug("sorting addr %5lu: cnt %6d [%s]\n", + off, id->is_set ? set->cnt : set8->cnt, id->name); next = rb_next(next); } @@ -686,7 +740,7 @@ static int sets_patch(struct object *obj) static int symbols_patch(struct object *obj) { - int err; + off_t err; if (__symbols_patch(obj, &obj->structs) || __symbols_patch(obj, &obj->unions) || @@ -732,6 +786,7 @@ int main(int argc, const char **argv) .funcs = RB_ROOT, .sets = RB_ROOT, }; + bool fatal_warnings = false; struct option btfid_options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show errors, etc)"), @@ -739,6 +794,8 @@ int main(int argc, const char **argv) "BTF data"), OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", "path of file providing base BTF"), + OPT_BOOLEAN(0, "fatal_warnings", &fatal_warnings, + "turn warnings into errors"), OPT_END() }; int err = -1; @@ -773,7 +830,8 @@ int main(int argc, const char **argv) if (symbols_patch(&obj)) goto out; - err = 0; + if (!(fatal_warnings && warnings)) + err = 0; out: if (obj.efile.elf) { elf_end(obj.efile.elf); diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore deleted file mode 100644 index ffdb70230c8b..000000000000 --- a/tools/bpf/runqslower/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -/.output diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile deleted file mode 100644 index 8b3d87b82b7a..000000000000 --- a/tools/bpf/runqslower/Makefile +++ /dev/null @@ -1,90 +0,0 @@ -# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -include ../../scripts/Makefile.include - -OUTPUT ?= $(abspath .output)/ - -BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ -DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool -BPFTOOL ?= $(DEFAULT_BPFTOOL) -LIBBPF_SRC := $(abspath ../../lib/bpf) -BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ -BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a -BPF_DESTDIR := $(BPFOBJ_OUTPUT) -BPF_INCLUDE := $(BPF_DESTDIR)/include -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) -CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) - -# Try to detect best kernel BTF source -KERNEL_REL := $(shell uname -r) -VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ - $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ - ../../../vmlinux /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(KERNEL_REL) -VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ - $(wildcard $(VMLINUX_BTF_PATHS)))) - -ifeq ($(V),1) -Q = -else -Q = @ -MAKEFLAGS += --no-print-directory -submake_extras := feature_display=0 -endif - -.DELETE_ON_ERROR: - -.PHONY: all clean runqslower libbpf_hdrs -all: runqslower - -runqslower: $(OUTPUT)/runqslower - -clean: - $(call QUIET_CLEAN, runqslower) - $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT) - $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h - $(Q)$(RM) $(OUTPUT)runqslower - $(Q)$(RM) -r .output - -libbpf_hdrs: $(BPFOBJ) - -$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) - $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ - -$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ - $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs - -$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs - -$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) - $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ - -$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) - $(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ - -c $(filter %.c,$^) -o $@ && \ - $(LLVM_STRIP) -g $@ - -$(OUTPUT)/%.o: %.c | $(OUTPUT) - $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ - -$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT): - $(QUIET_MKDIR)mkdir -p $@ - -$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) -ifeq ($(VMLINUX_H),) - $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ - echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ - "specify its location." >&2; \ - exit 1;\ - fi - $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ -else - $(Q)cp "$(VMLINUX_H)" $@ -endif - -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ - DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers - -$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c deleted file mode 100644 index 9a5c1f008fe6..000000000000 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2019 Facebook -#include "vmlinux.h" -#include <bpf/bpf_helpers.h> -#include "runqslower.h" - -#define TASK_RUNNING 0 -#define BPF_F_CURRENT_CPU 0xffffffffULL - -const volatile __u64 min_us = 0; -const volatile pid_t targ_pid = 0; - -struct { - __uint(type, BPF_MAP_TYPE_TASK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, u64); -} start SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(u32)); -} events SEC(".maps"); - -/* record enqueue timestamp */ -__always_inline -static int trace_enqueue(struct task_struct *t) -{ - u32 pid = t->pid; - u64 *ptr; - - if (!pid || (targ_pid && targ_pid != pid)) - return 0; - - ptr = bpf_task_storage_get(&start, t, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!ptr) - return 0; - - *ptr = bpf_ktime_get_ns(); - return 0; -} - -SEC("tp_btf/sched_wakeup") -int handle__sched_wakeup(u64 *ctx) -{ - /* TP_PROTO(struct task_struct *p) */ - struct task_struct *p = (void *)ctx[0]; - - return trace_enqueue(p); -} - -SEC("tp_btf/sched_wakeup_new") -int handle__sched_wakeup_new(u64 *ctx) -{ - /* TP_PROTO(struct task_struct *p) */ - struct task_struct *p = (void *)ctx[0]; - - return trace_enqueue(p); -} - -SEC("tp_btf/sched_switch") -int handle__sched_switch(u64 *ctx) -{ - /* TP_PROTO(bool preempt, struct task_struct *prev, - * struct task_struct *next) - */ - struct task_struct *prev = (struct task_struct *)ctx[1]; - struct task_struct *next = (struct task_struct *)ctx[2]; - struct runq_event event = {}; - u64 *tsp, delta_us; - long state; - u32 pid; - - /* ivcsw: treat like an enqueue event and store timestamp */ - if (prev->__state == TASK_RUNNING) - trace_enqueue(prev); - - pid = next->pid; - - /* For pid mismatch, save a bpf_task_storage_get */ - if (!pid || (targ_pid && targ_pid != pid)) - return 0; - - /* fetch timestamp and calculate delta */ - tsp = bpf_task_storage_get(&start, next, 0, 0); - if (!tsp) - return 0; /* missed enqueue */ - - delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; - if (min_us && delta_us <= min_us) - return 0; - - event.pid = pid; - event.delta_us = delta_us; - bpf_get_current_comm(&event.task, sizeof(event.task)); - - /* output */ - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, - &event, sizeof(event)); - - bpf_task_storage_delete(&start, next); - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c deleted file mode 100644 index 83c5993a139a..000000000000 --- a/tools/bpf/runqslower/runqslower.c +++ /dev/null @@ -1,171 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -// Copyright (c) 2019 Facebook -#include <argp.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> -#include <bpf/libbpf.h> -#include <bpf/bpf.h> -#include "runqslower.h" -#include "runqslower.skel.h" - -struct env { - pid_t pid; - __u64 min_us; - bool verbose; -} env = { - .min_us = 10000, -}; - -const char *argp_program_version = "runqslower 0.1"; -const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; -const char argp_program_doc[] = -"runqslower Trace long process scheduling delays.\n" -" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n" -"\n" -"This script traces high scheduling delays between tasks being\n" -"ready to run and them running on CPU after that.\n" -"\n" -"USAGE: runqslower [-p PID] [min_us]\n" -"\n" -"EXAMPLES:\n" -" runqslower # trace run queue latency higher than 10000 us (default)\n" -" runqslower 1000 # trace run queue latency higher than 1000 us\n" -" runqslower -p 123 # trace pid 123 only\n"; - -static const struct argp_option opts[] = { - { "pid", 'p', "PID", 0, "Process PID to trace"}, - { "verbose", 'v', NULL, 0, "Verbose debug output" }, - {}, -}; - -static error_t parse_arg(int key, char *arg, struct argp_state *state) -{ - static int pos_args; - int pid; - long long min_us; - - switch (key) { - case 'v': - env.verbose = true; - break; - case 'p': - errno = 0; - pid = strtol(arg, NULL, 10); - if (errno || pid <= 0) { - fprintf(stderr, "Invalid PID: %s\n", arg); - argp_usage(state); - } - env.pid = pid; - break; - case ARGP_KEY_ARG: - if (pos_args++) { - fprintf(stderr, - "Unrecognized positional argument: %s\n", arg); - argp_usage(state); - } - errno = 0; - min_us = strtoll(arg, NULL, 10); - if (errno || min_us <= 0) { - fprintf(stderr, "Invalid delay (in us): %s\n", arg); - argp_usage(state); - } - env.min_us = min_us; - break; - default: - return ARGP_ERR_UNKNOWN; - } - return 0; -} - -int libbpf_print_fn(enum libbpf_print_level level, - const char *format, va_list args) -{ - if (level == LIBBPF_DEBUG && !env.verbose) - return 0; - return vfprintf(stderr, format, args); -} - -void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) -{ - const struct runq_event *e = data; - struct tm *tm; - char ts[32]; - time_t t; - - time(&t); - tm = localtime(&t); - strftime(ts, sizeof(ts), "%H:%M:%S", tm); - printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us); -} - -void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) -{ - printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); -} - -int main(int argc, char **argv) -{ - static const struct argp argp = { - .options = opts, - .parser = parse_arg, - .doc = argp_program_doc, - }; - struct perf_buffer *pb = NULL; - struct runqslower_bpf *obj; - int err; - - err = argp_parse(&argp, argc, argv, 0, NULL, NULL); - if (err) - return err; - - libbpf_set_print(libbpf_print_fn); - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - obj = runqslower_bpf__open(); - if (!obj) { - fprintf(stderr, "failed to open and/or load BPF object\n"); - return 1; - } - - /* initialize global data (filtering options) */ - obj->rodata->targ_pid = env.pid; - obj->rodata->min_us = env.min_us; - - err = runqslower_bpf__load(obj); - if (err) { - fprintf(stderr, "failed to load BPF object: %d\n", err); - goto cleanup; - } - - err = runqslower_bpf__attach(obj); - if (err) { - fprintf(stderr, "failed to attach BPF programs\n"); - goto cleanup; - } - - printf("Tracing run queue latency higher than %llu us\n", env.min_us); - printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); - - pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, - handle_event, handle_lost_events, NULL, NULL); - err = libbpf_get_error(pb); - if (err) { - pb = NULL; - fprintf(stderr, "failed to open perf buffer: %d\n", err); - goto cleanup; - } - - while ((err = perf_buffer__poll(pb, 100)) >= 0) - ; - printf("Error polling perf buffer: %d\n", err); - -cleanup: - perf_buffer__free(pb); - runqslower_bpf__destroy(obj); - - return err != 0; -} diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h deleted file mode 100644 index 4f70f07200c2..000000000000 --- a/tools/bpf/runqslower/runqslower.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __RUNQSLOWER_H -#define __RUNQSLOWER_H - -#define TASK_COMM_LEN 16 - -struct runq_event { - char task[TASK_COMM_LEN]; - __u64 delta_us; - pid_t pid; -}; - -#endif /* __RUNQSLOWER_H */ |
