diff options
Diffstat (limited to 'samples')
205 files changed, 8415 insertions, 6651 deletions
diff --git a/samples/Kconfig b/samples/Kconfig index 0d81c00289ee..5bc7c9e5a59e 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -23,11 +23,11 @@ config SAMPLE_TRACE_CUSTOM_EVENTS This builds the custom trace event example module. config SAMPLE_TRACE_PRINTK - tristate "Build trace_printk module - tests various trace_printk formats" + tristate "Build trace_printk module - tests various trace_printk formats" depends on EVENT_TRACING && m help - This builds a module that calls trace_printk() and can be used to - test various trace_printk() calls from a module. + This builds a module that calls trace_printk() and can be used to + test various trace_printk() calls from a module. config SAMPLE_FTRACE_DIRECT tristate "Build register_ftrace_direct() example" @@ -38,7 +38,7 @@ config SAMPLE_FTRACE_DIRECT that hooks to wake_up_process and prints the parameters. config SAMPLE_FTRACE_DIRECT_MULTI - tristate "Build register_ftrace_direct_multi() example" + tristate "Build register_ftrace_direct() on multiple ips example" depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m depends on HAVE_SAMPLE_FTRACE_DIRECT_MULTI help @@ -46,12 +46,19 @@ config SAMPLE_FTRACE_DIRECT_MULTI that hooks to wake_up_process and schedule, and prints the function addresses. +config SAMPLE_FTRACE_OPS + tristate "Build custom ftrace ops example" + depends on FUNCTION_TRACER + help + This builds an ftrace ops example that hooks two functions and + measures the time taken to invoke one function a number of times. + config SAMPLE_TRACE_ARRAY - tristate "Build sample module for kernel access to Ftrace instancess" + tristate "Build sample module for kernel access to Ftrace instances" depends on EVENT_TRACING && m help - This builds a module that demonstrates the use of various APIs to - access Ftrace instances from within the kernel. + This builds a module that demonstrates the use of various APIs to + access Ftrace instances from within the kernel. config SAMPLE_KOBJECT tristate "Build kobject examples" @@ -177,6 +184,17 @@ config SAMPLE_TIMER bool "Timer sample" depends on CC_CAN_LINK && HEADERS_INSTALL +config SAMPLE_TSM_MR + tristate "TSM measurement sample" + select TSM_MEASUREMENTS + select VIRT_DRIVERS + help + Build a sample module that emulates MRs (Measurement Registers) and + exposes them to user mode applications through the TSM sysfs + interface (/sys/class/misc/tsm_mr_sample/emulated_mr/). + + The module name will be tsm-mr-sample when built as a module. + config SAMPLE_UHID bool "UHID sample" depends on CC_CAN_LINK && HEADERS_INSTALL @@ -184,32 +202,33 @@ config SAMPLE_UHID Build UHID sample program. config SAMPLE_VFIO_MDEV_MTTY - tristate "Build VFIO mtty example mediated device sample code -- loadable modules only" - depends on VFIO_MDEV && m + tristate "Build VFIO mtty example mediated device sample code" + depends on VFIO + select VFIO_MDEV help Build a virtual tty sample driver for use as a VFIO mediated device config SAMPLE_VFIO_MDEV_MDPY - tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" - depends on VFIO_MDEV && m + tristate "Build VFIO mdpy example mediated device sample code" + depends on VFIO + select VFIO_MDEV help Build a virtual display sample driver for use as a VFIO mediated device. It is a simple framebuffer and supports the region display interface (VFIO_GFX_PLANE_TYPE_REGION). config SAMPLE_VFIO_MDEV_MDPY_FB - tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only" - depends on FB && m - select FB_CFB_FILLRECT - select FB_CFB_COPYAREA - select FB_CFB_IMAGEBLIT + tristate "Build VFIO mdpy example guest fbdev driver" + depends on FB + select FB_IOMEM_HELPERS help Guest fbdev driver for the virtual display sample driver. config SAMPLE_VFIO_MDEV_MBOCHS - tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" - depends on VFIO_MDEV && m + tristate "Build VFIO mbochs example mediated device sample code" + depends on VFIO + select VFIO_MDEV select DMA_SHARED_BUFFER help Build a virtual display sample driver for use as a VFIO @@ -243,6 +262,13 @@ config SAMPLE_INTEL_MEI help Build a sample program to work with mei device. +config SAMPLE_TPS6594_PFSM + bool "Build example program working with TPS6594 PFSM driver" + depends on HEADERS_INSTALL + depends on CC_CAN_LINK + help + Build a sample program to work with PFSM devices. + config SAMPLE_WATCHDOG bool "watchdog sample" depends on CC_CAN_LINK @@ -263,8 +289,41 @@ config SAMPLE_CORESIGHT_SYSCFG This demonstrates how a user may create their own CoreSight configurations and easily load them into the system at runtime. +config SAMPLE_KMEMLEAK + tristate "Simple test for the kernel memory leak detector" + depends on DEBUG_KMEMLEAK && m + help + Build a sample program which have explicitly leaks memory to test + kmemleak. + +config SAMPLE_CGROUP + bool "Build cgroup sample code" + depends on CGROUPS && CC_CAN_LINK && HEADERS_INSTALL + help + Build samples that demonstrate the usage of the cgroup API. + +config SAMPLE_CHECK_EXEC + bool "Exec secure bits examples" + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Build a tool to easily configure SECBIT_EXEC_RESTRICT_FILE and + SECBIT_EXEC_DENY_INTERACTIVE, and a simple script interpreter to + demonstrate how they should be used with execveat(2) + + AT_EXECVE_CHECK. + +config SAMPLE_HUNG_TASK + tristate "Hung task detector test code" + depends on DETECT_HUNG_TASK && DEBUG_FS + help + Build a module that provides debugfs files (e.g., mutex, semaphore, + rw_semaphore_read, rw_semaphore_write) under <debugfs>/hung_task. + Reading these files with multiple processes triggers hung task + detection by holding locks for a long time (256 seconds). + source "samples/rust/Kconfig" +source "samples/damon/Kconfig" + endif # SAMPLES config HAVE_SAMPLE_FTRACE_DIRECT diff --git a/samples/Makefile b/samples/Makefile index 9832ef3f8fcb..07641e177bd8 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,6 +3,8 @@ subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs +subdir-$(CONFIG_SAMPLE_CHECK_EXEC) += check-exec +subdir-$(CONFIG_SAMPLE_CGROUP) += cgroup obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/ obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/ obj-$(CONFIG_SAMPLE_FANOTIFY_ERROR) += fanotify/ @@ -24,15 +26,22 @@ obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_events/ obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/ +obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace/ obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/ subdir-$(CONFIG_SAMPLE_UHID) += uhid obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/ obj-y += vfio-mdev/ subdir-$(CONFIG_SAMPLE_VFS) += vfs obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/ +obj-$(CONFIG_SAMPLE_TPS6594_PFSM) += pfsm/ subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue -obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ +obj-$(CONFIG_SAMPLE_KMEMLEAK) += kmemleak/ obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/ obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/ obj-$(CONFIG_SAMPLES_RUST) += rust/ +obj-$(CONFIG_SAMPLE_DAMON_WSSE) += damon/ +obj-$(CONFIG_SAMPLE_DAMON_PRCL) += damon/ +obj-$(CONFIG_SAMPLE_DAMON_MTIER) += damon/ +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task/ +obj-$(CONFIG_SAMPLE_TSM_MR) += tsm-mr/ diff --git a/samples/acrn/vm-sample.c b/samples/acrn/vm-sample.c index 7abd68b20153..c61e0f91456e 100644 --- a/samples/acrn/vm-sample.c +++ b/samples/acrn/vm-sample.c @@ -3,7 +3,7 @@ * A sample program to run a User VM on the ACRN hypervisor * * This sample runs in a Service VM, which is a privileged VM of ACRN. - * CONFIG_ACRN_HSM need to be enabled in the Service VM. + * CONFIG_ACRN_HSM needs to be enabled in the Service VM. * * Guest VM code in guest16.s will be executed after the VM launched. * @@ -13,7 +13,6 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> -#include <malloc.h> #include <fcntl.h> #include <unistd.h> #include <signal.h> @@ -54,9 +53,9 @@ int main(int argc, char **argv) argc = argc; argv = argv; - guest_memory = memalign(4096, GUEST_MEMORY_SIZE); - if (!guest_memory) { - printf("No enough memory!\n"); + ret = posix_memalign(&guest_memory, 4096, GUEST_MEMORY_SIZE); + if (ret < 0) { + printf("Not enough memory!\n"); return -1; } hsm_fd = open("/dev/acrn_hsm", O_RDWR|O_CLOEXEC); diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0e7bfdbff80a..0002cd359fb1 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -37,22 +37,10 @@ tracex4 tracex5 tracex6 tracex7 -xdp1 -xdp2 xdp_adjust_tail xdp_fwd -xdp_monitor -xdp_redirect -xdp_redirect_cpu -xdp_redirect_map -xdp_redirect_map_multi xdp_router_ipv4 -xdp_rxq_info -xdp_sample_pkts xdp_tx_iptunnel -xdpsock -xdpsock_ctrl_proc -xsk_fwd testfile.img hbm_out.log iperf.* diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 727da3c5879b..95a4fa1f1e44 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) +BPF_SAMPLES_PATH ?= $(abspath $(src)) TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools pound := \# @@ -13,27 +13,16 @@ tprogs-y += sockex1 tprogs-y += sockex2 tprogs-y += sockex3 tprogs-y += tracex1 -tprogs-y += tracex2 tprogs-y += tracex3 tprogs-y += tracex4 tprogs-y += tracex5 tprogs-y += tracex6 -tprogs-y += tracex7 -tprogs-y += test_probe_write_user tprogs-y += trace_output tprogs-y += lathist tprogs-y += offwaketime tprogs-y += spintest tprogs-y += map_perf_test -tprogs-y += test_overhead -tprogs-y += test_cgrp2_array_pin -tprogs-y += test_cgrp2_attach -tprogs-y += test_cgrp2_sock -tprogs-y += test_cgrp2_sock2 -tprogs-y += xdp1 -tprogs-y += xdp2 tprogs-y += xdp_router_ipv4 -tprogs-y += test_current_task_under_cgroup tprogs-y += trace_event tprogs-y += sampleip tprogs-y += tc_l2_redirect @@ -41,22 +30,14 @@ tprogs-y += lwt_len_hist tprogs-y += xdp_tx_iptunnel tprogs-y += test_map_in_map tprogs-y += per_socket_stats_example -tprogs-y += xdp_rxq_info tprogs-y += syscall_tp tprogs-y += cpustat tprogs-y += xdp_adjust_tail tprogs-y += xdp_fwd tprogs-y += task_fd_query -tprogs-y += xdp_sample_pkts tprogs-y += ibumad tprogs-y += hbm -tprogs-y += xdp_redirect_cpu -tprogs-y += xdp_redirect_map_multi -tprogs-y += xdp_redirect_map -tprogs-y += xdp_redirect -tprogs-y += xdp_monitor - # Libbpf dependencies LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf @@ -73,28 +54,16 @@ sockex1-objs := sockex1_user.o sockex2-objs := sockex2_user.o sockex3-objs := sockex3_user.o tracex1-objs := tracex1_user.o $(TRACE_HELPERS) -tracex2-objs := tracex2_user.o tracex3-objs := tracex3_user.o tracex4-objs := tracex4_user.o tracex5-objs := tracex5_user.o $(TRACE_HELPERS) tracex6-objs := tracex6_user.o -tracex7-objs := tracex7_user.o -test_probe_write_user-objs := test_probe_write_user_user.o trace_output-objs := trace_output_user.o lathist-objs := lathist_user.o offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := map_perf_test_user.o test_overhead-objs := test_overhead_user.o -test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o -test_cgrp2_attach-objs := test_cgrp2_attach.o -test_cgrp2_sock-objs := test_cgrp2_sock.o -test_cgrp2_sock2-objs := test_cgrp2_sock2.o -xdp1-objs := xdp1_user.o -# reuse xdp1 source intentionally -xdp2-objs := xdp1_user.o -test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ - test_current_task_under_cgroup_user.o trace_event-objs := trace_event_user.o $(TRACE_HELPERS) sampleip-objs := sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := tc_l2_redirect_user.o @@ -102,21 +71,14 @@ lwt_len_hist-objs := lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) -xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) -xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE) -xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) -xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE) -xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) -xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) # Tell kbuild to always build the programs @@ -124,35 +86,24 @@ always-y := $(tprogs-y) always-y += sockex1_kern.o always-y += sockex2_kern.o always-y += sockex3_kern.o -always-y += tracex1_kern.o -always-y += tracex2_kern.o -always-y += tracex3_kern.o -always-y += tracex4_kern.o -always-y += tracex5_kern.o -always-y += tracex6_kern.o -always-y += tracex7_kern.o -always-y += sock_flags_kern.o -always-y += test_probe_write_user_kern.o -always-y += trace_output_kern.o +always-y += tracex1.bpf.o +always-y += tracex3.bpf.o +always-y += tracex4.bpf.o +always-y += tracex5.bpf.o +always-y += tracex6.bpf.o +always-y += trace_output.bpf.o always-y += tcbpf1_kern.o always-y += tc_l2_redirect_kern.o always-y += lathist_kern.o -always-y += offwaketime_kern.o -always-y += spintest_kern.o -always-y += map_perf_test_kern.o -always-y += test_overhead_tp_kern.o -always-y += test_overhead_raw_tp_kern.o -always-y += test_overhead_kprobe_kern.o +always-y += offwaketime.bpf.o +always-y += spintest.bpf.o +always-y += map_perf_test.bpf.o always-y += parse_varlen.o parse_simple.o parse_ldabs.o -always-y += test_cgrp2_tc_kern.o -always-y += xdp1_kern.o -always-y += xdp2_kern.o -always-y += test_current_task_under_cgroup_kern.o always-y += trace_event_kern.o always-y += sampleip_kern.o -always-y += lwt_len_hist_kern.o +always-y += lwt_len_hist.bpf.o always-y += xdp_tx_iptunnel_kern.o -always-y += test_map_in_map_kern.o +always-y += test_map_in_map.bpf.o always-y += tcp_synrto_kern.o always-y += tcp_rwnd_kern.o always-y += tcp_bufs_kern.o @@ -162,18 +113,19 @@ always-y += tcp_clamp_kern.o always-y += tcp_basertt_kern.o always-y += tcp_tos_reflect_kern.o always-y += tcp_dumpstats_kern.o -always-y += xdp_rxq_info_kern.o always-y += xdp2skb_meta_kern.o always-y += syscall_tp_kern.o always-y += cpustat_kern.o always-y += xdp_adjust_tail_kern.o always-y += xdp_fwd_kern.o always-y += task_fd_query_kern.o -always-y += xdp_sample_pkts_kern.o always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o +COMMON_CFLAGS = $(TPROGS_USER_CFLAGS) +TPROGS_LDFLAGS = $(TPROGS_USER_LDFLAGS) + ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux # headers when arm instruction set identification is requested. @@ -190,33 +142,36 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic endif endif -TPROGS_CFLAGS += -Wall -O2 -TPROGS_CFLAGS += -Wmissing-prototypes -TPROGS_CFLAGS += -Wstrict-prototypes +ifeq ($(ARCH), x86) +BPF_EXTRA_CFLAGS += -fcf-protection +endif +COMMON_CFLAGS += -Wall -O2 +COMMON_CFLAGS += -Wmissing-prototypes +COMMON_CFLAGS += -Wstrict-prototypes +COMMON_CFLAGS += $(call try-run,\ + printf "int main() { return 0; }" |\ + $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) + +TPROGS_CFLAGS += $(COMMON_CFLAGS) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include TPROGS_CFLAGS += -I$(srctree)/tools/perf +TPROGS_CFLAGS += -I$(srctree)/tools/lib TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 ifdef SYSROOT -TPROGS_CFLAGS += --sysroot=$(SYSROOT) +COMMON_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif TPROGS_LDLIBS += $(LIBBPF) -lelf -lz -TPROGLDLIBS_xdp_monitor += -lm -TPROGLDLIBS_xdp_redirect += -lm -TPROGLDLIBS_xdp_redirect_cpu += -lm -TPROGLDLIBS_xdp_redirect_map += -lm -TPROGLDLIBS_xdp_redirect_map_multi += -lm TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_map_perf_test += -lrt -TPROGLDLIBS_test_overhead += -lrt # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang @@ -248,7 +203,7 @@ BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ - $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) @@ -275,15 +230,16 @@ clean: $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ - LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \ + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ + LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ $@ install_headers BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool -BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool -$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \ OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap @@ -326,14 +282,9 @@ $(obj)/$(TRACE_HELPERS) $(obj)/$(CGROUP_HELPERS) $(obj)/$(XDP_SAMPLE): | libbpf_ .PHONY: libbpf_hdrs -$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h -$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h -$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h -$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h -$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h $(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h -$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h +$(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h $(obj)/hbm.o: $(src)/hbm.h $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h @@ -346,21 +297,24 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \ -I$(LIBBPF_INCLUDE) \ -I$(src)/../../tools/testing/selftests/bpf -$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) +$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) $(TPROGS_USER_CFLAGS) $(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h +# Override includes for trace_helpers.o because __must_check won't be defined +# in our include path. +$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check= -include $(BPF_SAMPLES_PATH)/Makefile.target VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ - $(abspath ./vmlinux) + $(abspath $(objtree)/vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) ifeq ($(VMLINUX_H),) ifeq ($(VMLINUX_BTF),) $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ - build the kernel or set VMLINUX_BTF or VMLINUX_H variable) + build the kernel or set VMLINUX_BTF like "VMLINUX_BTF=/sys/kernel/btf/vmlinux" or VMLINUX_H variable) endif $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else @@ -370,7 +324,7 @@ endif clean-files += vmlinux.h # Get Clang's default includes on this system, as opposed to those seen by -# '-target bpf'. This fixes "missing" files on some architectures/distros, +# '--target=bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. # # Use '-idirafter': Don't interfere with include mechanics except where the @@ -383,31 +337,19 @@ endef CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) -$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o -$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o -$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o -$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o -$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h @echo " CLANG-BPF " $@ - $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \ + $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \ -Wno-compare-distinct-pointer-types -I$(srctree)/include \ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ -LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ - xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \ - xdp_router_ipv4.skel.h +LINKED_SKELS := xdp_router_ipv4.skel.h clean-files += $(LINKED_SKELS) -xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o -xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o -xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o -xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o -xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) @@ -434,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index 57f93edd1957..cabe2d216997 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -4,15 +4,24 @@ eBPF sample programs This directory contains a test stubs, verifier test-suite and examples for using eBPF. The examples use libbpf from tools/lib/bpf. +Note that the XDP-specific samples have been removed from this directory and +moved to the xdp-tools repository: https://github.com/xdp-project/xdp-tools +See the commit messages removing each tool from this directory for how to +convert specific command invocations between the old samples and the utilities +in xdp-tools. + Build dependencies ================== Compiling requires having installed: - * clang >= version 3.4.0 - * llvm >= version 3.7.1 + * clang + * llvm + * pahole -Note that LLVM's tool 'llc' must support target 'bpf', list version -and supported targets with command: ``llc --version`` +Consult :ref:`Documentation/process/changes.rst <changes>` for the minimum +version numbers required and how to update them. Note that LLVM's tool +'llc' must support target 'bpf', list version and supported targets with +command: ``llc --version`` Clean and configuration ----------------------- @@ -24,7 +33,8 @@ after some changes (on demand):: make -C samples/bpf clean make clean -Configure kernel, defconfig for instance:: +Configure kernel, defconfig for instance +(see "tools/testing/selftests/bpf/config" for a reference config):: make defconfig diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594d6..634e81d83efd 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include <linux/types.h> -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c index 5aefd19cdfa1..7ec7143e2757 100644 --- a/samples/bpf/cpustat_kern.c +++ b/samples/bpf/cpustat_kern.c @@ -76,8 +76,8 @@ struct { /* * The trace events for cpu_idle and cpu_frequency are taken from: - * /sys/kernel/debug/tracing/events/power/cpu_idle/format - * /sys/kernel/debug/tracing/events/power/cpu_frequency/format + * /sys/kernel/tracing/events/power/cpu_idle/format + * /sys/kernel/tracing/events/power/cpu_frequency/format * * These two events have same format, so define one common structure. */ @@ -211,7 +211,7 @@ int bpf_prog1(struct cpu_args *ctx) SEC("tracepoint/power/cpu_frequency") int bpf_prog2(struct cpu_args *ctx) { - u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; + u64 *pts, *cstate, *pstate, cur_ts, delta; u32 key, cpu, pstate_idx; u64 *val; @@ -232,7 +232,6 @@ int bpf_prog2(struct cpu_args *ctx) if (!cstate) return 0; - prev_state = *pstate; *pstate = ctx->state; if (!*pts) { diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c index ab90bb08a2b4..356f756cba0d 100644 --- a/samples/bpf/cpustat_user.c +++ b/samples/bpf/cpustat_user.c @@ -66,10 +66,10 @@ static void cpu_stat_print(void) printf("CPU-%-6d ", j); for (i = 0; i < MAX_CSTATE_ENTRIES; i++) - printf("%-11ld ", data->cstate[i] / 1000000); + printf("%-11lu ", data->cstate[i] / 1000000); for (i = 0; i < MAX_PSTATE_ENTRIES; i++) - printf("%-11ld ", data->pstate[i] / 1000000); + printf("%-11lu ", data->pstate[i] / 1000000); printf("\n"); } diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh index 38e4599350db..7f4f722787d5 100755 --- a/samples/bpf/do_hbm_test.sh +++ b/samples/bpf/do_hbm_test.sh @@ -112,7 +112,7 @@ function start_hbm () { processArgs () { for i in $args ; do case $i in - # Support for upcomming ingress rate limiting + # Support for upcoming ingress rate limiting #in) # support for upcoming ingress rate limiting # dir="-i" # dir_name="in" diff --git a/samples/bpf/gnu/stubs.h b/samples/bpf/gnu/stubs.h new file mode 100644 index 000000000000..1c638d9dce1a --- /dev/null +++ b/samples/bpf/gnu/stubs.h @@ -0,0 +1 @@ +/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */ diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 516fbac28b71..fc88d4dbdf48 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -5,7 +5,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * - * Example program for Host Bandwidth Managment + * Example program for Host Bandwidth Management * * This program loads a cgroup skb BPF program to enforce cgroup output * (egress) or input (ingress) bandwidth limits. @@ -24,7 +24,7 @@ * beyond the rate limit specified while there is available * bandwidth. Current implementation assumes there is only * NIC (eth0), but can be extended to support multiple NICs. - * Currrently only supported for egress. + * Currently only supported for egress. * -h Print this info * prog BPF program file name. Name defaults to hbm_out_kern.o */ @@ -65,7 +65,7 @@ static void Usage(void); static void read_trace_pipe2(void); static void do_error(char *msg, bool errno_flag); -#define DEBUGFS "/sys/kernel/debug/tracing/" +#define TRACEFS "/sys/kernel/tracing/" static struct bpf_program *bpf_prog; static struct bpf_object *obj; @@ -77,7 +77,7 @@ static void read_trace_pipe2(void) FILE *outf; char *outFname = "hbm_out.log"; - trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + trace_fd = open(TRACEFS "trace_pipe", O_RDONLY, 0); if (trace_fd < 0) { printf("Error opening trace_pipe\n"); return; @@ -315,6 +315,7 @@ static int run_bpf_prog(char *prog, int cg_id) fout = fopen(fname, "w"); fprintf(fout, "id:%d\n", cg_id); fprintf(fout, "ERROR: Could not lookup queue_stats\n"); + fclose(fout); } else if (stats_flag && qstats.lastPacketTime > qstats.firstPacketTime) { long long delta_us = (qstats.lastPacketTime - @@ -497,7 +498,6 @@ int main(int argc, char **argv) "Option -%c requires an argument.\n\n", optopt); case 'h': - __fallthrough; default: Usage(); return 0; diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c index 9b193231024a..f07474c72525 100644 --- a/samples/bpf/ibumad_kern.c +++ b/samples/bpf/ibumad_kern.c @@ -39,8 +39,8 @@ struct { /* Taken from the current format defined in * include/trace/events/ib_umad.h * and - * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format - * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format + * /sys/kernel/tracing/events/ib_umad/ib_umad_read/format + * /sys/kernel/tracing/events/ib_umad/ib_umad_write/format */ struct ib_umad_rw_args { u64 pad; diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist.bpf.c index 1fa14c54963a..dbab80e813fe 100644 --- a/samples/bpf/lwt_len_hist_kern.c +++ b/samples/bpf/lwt_len_hist.bpf.c @@ -10,29 +10,16 @@ * General Public License for more details. */ -#include <uapi/linux/bpf.h> -#include <uapi/linux/if_ether.h> -#include <uapi/linux/ip.h> -#include <uapi/linux/in.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> -struct bpf_elf_map { - __u32 type; - __u32 size_key; - __u32 size_value; - __u32 max_elem; - __u32 flags; - __u32 id; - __u32 pinning; -}; - -struct bpf_elf_map SEC("maps") lwt_len_hist_map = { - .type = BPF_MAP_TYPE_PERCPU_HASH, - .size_key = sizeof(__u64), - .size_value = sizeof(__u64), - .pinning = 2, - .max_elem = 1024, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, u64); + __type(value, u64); + __uint(pinning, LIBBPF_PIN_BY_NAME); + __uint(max_entries, 1024); +} lwt_len_hist_map SEC(".maps"); static unsigned int log2(unsigned int v) { diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh index 0eda9754f50b..381b2c634784 100755 --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -4,8 +4,8 @@ NS1=lwt_ns1 VETH0=tst_lwt1a VETH1=tst_lwt1b - -TRACE_ROOT=/sys/kernel/debug/tracing +BPF_PROG=lwt_len_hist.bpf.o +TRACE_ROOT=/sys/kernel/tracing function cleanup { # To reset saved histogram, remove pinned map @@ -30,7 +30,7 @@ ip netns exec $NS1 netserver echo 1 > ${TRACE_ROOT}/tracing_on cp /dev/null ${TRACE_ROOT}/trace -ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0 +ip route add 192.168.253.2/32 encap bpf out obj $BPF_PROG section len_hist dev $VETH0 netperf -H 192.168.253.2 -t TCP_STREAM cat ${TRACE_ROOT}/trace | grep -v '^#' ./lwt_len_hist diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test.bpf.c index 7342c5b2f278..3cdeba2afe12 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test.bpf.c @@ -4,14 +4,12 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> +#include "vmlinux.h" +#include <errno.h> #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#include "trace_common.h" #define MAX_ENTRIES 1000 #define MAX_NR_CPUS 1024 @@ -102,8 +100,8 @@ struct { __uint(max_entries, MAX_ENTRIES); } lru_hash_lookup_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_getuid)) -int stress_hmap(struct pt_regs *ctx) +SEC("ksyscall/getuid") +int BPF_KSYSCALL(stress_hmap) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -120,8 +118,8 @@ int stress_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_geteuid)) -int stress_percpu_hmap(struct pt_regs *ctx) +SEC("ksyscall/geteuid") +int BPF_KSYSCALL(stress_percpu_hmap) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -137,8 +135,8 @@ int stress_percpu_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getgid)) -int stress_hmap_alloc(struct pt_regs *ctx) +SEC("ksyscall/getgid") +int BPF_KSYSCALL(stress_hmap_alloc) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -154,8 +152,8 @@ int stress_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getegid)) -int stress_percpu_hmap_alloc(struct pt_regs *ctx) +SEC("ksyscall/getegid") +int BPF_KSYSCALL(stress_percpu_hmap_alloc) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -170,11 +168,10 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) } return 0; } - -SEC("kprobe/" SYSCALL(sys_connect)) -int stress_lru_hmap_alloc(struct pt_regs *ctx) +SEC("ksyscall/connect") +int BPF_KSYSCALL(stress_lru_hmap_alloc, int fd, struct sockaddr_in *uservaddr, + int addrlen) { - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn"; union { u16 dst6[8]; @@ -187,14 +184,11 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) u32 key; }; } test_params; - struct sockaddr_in6 *in6; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)uservaddr; u16 test_case; - int addrlen, ret; long val = 1; u32 key = 0; - - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs); - addrlen = (int)PT_REGS_PARM3_CORE(real_regs); + int ret; if (addrlen != sizeof(*in6)) return 0; @@ -251,8 +245,8 @@ done: return 0; } -SEC("kprobe/" SYSCALL(sys_gettid)) -int stress_lpm_trie_map_alloc(struct pt_regs *ctx) +SEC("ksyscall/gettid") +int BPF_KSYSCALL(stress_lpm_trie_map_alloc) { union { u32 b32[2]; @@ -273,8 +267,8 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getpgid)) -int stress_hash_map_lookup(struct pt_regs *ctx) +SEC("ksyscall/getpgid") +int BPF_KSYSCALL(stress_hash_map_lookup) { u32 key = 1, i; long *value; @@ -286,8 +280,8 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getppid)) -int stress_array_map_lookup(struct pt_regs *ctx) +SEC("ksyscall/getppid") +int BPF_KSYSCALL(stress_array_map_lookup) { u32 key = 1, i; long *value; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 1bb53f4b29e1..07ff471ed6ae 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -370,7 +370,7 @@ static void run_perf_test(int tasks) static void fill_lpm_trie(void) { - struct bpf_lpm_trie_key *key; + struct bpf_lpm_trie_key_u8 *key; unsigned long value = 0; unsigned int i; int r; @@ -443,7 +443,7 @@ int main(int argc, char **argv) if (argc > 4) max_cnt = atoi(argv[4]); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/net_shared.h b/samples/bpf/net_shared.h new file mode 100644 index 000000000000..88cc52461c98 --- /dev/null +++ b/samples/bpf/net_shared.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _NET_SHARED_H +#define _NET_SHARED_H + +#define AF_INET 2 +#define AF_INET6 10 + +#define ETH_ALEN 6 +#define ETH_P_802_3_MIN 0x0600 +#define ETH_P_8021Q 0x8100 +#define ETH_P_8021AD 0x88A8 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD +#define ETH_P_ARP 0x0806 +#define IPPROTO_ICMPV6 58 + +#define TC_ACT_OK 0 +#define TC_ACT_SHOT 2 + +#define IFNAMSIZ 16 + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define bpf_ntohs(x) __builtin_bswap16(x) +#define bpf_htons(x) __builtin_bswap16(x) +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define bpf_ntohs(x) (x) +#define bpf_htons(x) (x) +#else +# error "Endianness detection needs to be set up for your compiler?!" +#endif + +#endif diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime.bpf.c index eb4d94742e6b..4a65ba76c1b1 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime.bpf.c @@ -4,20 +4,15 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <uapi/linux/bpf.h> -#include <uapi/linux/ptrace.h> -#include <uapi/linux/perf_event.h> +#include "vmlinux.h" #include <linux/version.h> -#include <linux/sched.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> -#define _(P) \ - ({ \ - typeof(P) val; \ - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ - val; \ - }) +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif #define MINBLOCK_US 1 #define MAX_ENTRIES 10000 @@ -67,11 +62,9 @@ struct { SEC("kprobe/try_to_wake_up") int waker(struct pt_regs *ctx) { - struct task_struct *p = (void *) PT_REGS_PARM1(ctx); + struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx); + u32 pid = BPF_CORE_READ(p, pid); struct wokeby_t woke; - u32 pid; - - pid = _(p->pid); bpf_get_current_comm(&woke.name, sizeof(woke.name)); woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); @@ -110,29 +103,19 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta) } #if 1 -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ -struct sched_switch_args { - unsigned long long pad; - char prev_comm[TASK_COMM_LEN]; - int prev_pid; - int prev_prio; - long long prev_state; - char next_comm[TASK_COMM_LEN]; - int next_pid; - int next_prio; -}; +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ SEC("tracepoint/sched/sched_switch") -int oncpu(struct sched_switch_args *ctx) +int oncpu(struct trace_event_raw_sched_switch *ctx) { /* record previous thread sleep time */ u32 pid = ctx->prev_pid; #else -SEC("kprobe/finish_task_switch") +SEC("kprobe.multi/finish_task_switch*") int oncpu(struct pt_regs *ctx) { - struct task_struct *p = (void *) PT_REGS_PARM1(ctx); + struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx); /* record previous thread sleep time */ - u32 pid = _(p->pid); + u32 pid = BPF_CORE_READ(p, pid); #endif u64 delta, ts, *tsp; diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index b6eedcb98fb9..5557b5393642 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -105,7 +105,7 @@ int main(int argc, char **argv) return 2; } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 921c505bb567..9283f47844fb 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -21,10 +21,10 @@ #define DEFAULT_FREQ 99 #define DEFAULT_SECS 5 #define MAX_IPS 8192 -#define PAGE_OFFSET 0xffff880000000000 static int map_fd; static int nr_cpus; +static long _text_addr; static void usage(void) { @@ -108,7 +108,7 @@ static void print_ip_map(int fd) /* sort and print */ qsort(counts, max, sizeof(struct ipcount), count_cmp); for (i = 0; i < max; i++) { - if (counts[i].ip > PAGE_OFFSET) { + if (counts[i].ip > _text_addr) { sym = ksym_search(counts[i].ip); if (!sym) { printf("ksym not found. Is kallsyms loaded?\n"); @@ -169,6 +169,13 @@ int main(int argc, char **argv) return 2; } + /* used to determine whether the address is kernel space */ + _text_addr = ksym_get_addr("_text"); + if (!_text_addr) { + fprintf(stderr, "ERROR: no '_text' in /proc/kallsyms\n"); + return 3; + } + /* create perf FDs for each CPU */ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); links = calloc(nr_cpus, sizeof(struct bpf_link *)); diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c deleted file mode 100644 index 6d0ac7569d6f..000000000000 --- a/samples/bpf/sock_flags_kern.c +++ /dev/null @@ -1,49 +0,0 @@ -#include <uapi/linux/bpf.h> -#include <linux/socket.h> -#include <linux/net.h> -#include <uapi/linux/in.h> -#include <uapi/linux/in6.h> -#include <bpf/bpf_helpers.h> - -SEC("cgroup/sock1") -int bpf_prog1(struct bpf_sock *sk) -{ - char fmt[] = "socket: family %d type %d protocol %d\n"; - char fmt2[] = "socket: uid %u gid %u\n"; - __u64 gid_uid = bpf_get_current_uid_gid(); - __u32 uid = gid_uid & 0xffffffff; - __u32 gid = gid_uid >> 32; - - bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); - bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid); - - /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets - * ie., make ping6 fail - */ - if (sk->family == PF_INET6 && - sk->type == SOCK_RAW && - sk->protocol == IPPROTO_ICMPV6) - return 0; - - return 1; -} - -SEC("cgroup/sock2") -int bpf_prog2(struct bpf_sock *sk) -{ - char fmt[] = "socket: family %d type %d protocol %d\n"; - - bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); - - /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets - * ie., make ping fail - */ - if (sk->family == PF_INET && - sk->type == SOCK_RAW && - sk->protocol == IPPROTO_ICMP) - return 0; - - return 1; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index b7997541f7ee..f93d9145ab8a 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -31,7 +31,6 @@ static inline int proto_ports_offset(__u64 proto) switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_DCCP: case IPPROTO_ESP: case IPPROTO_SCTP: case IPPROTO_UDPLITE: diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest.bpf.c index 455da77319d9..cba5a9d50783 100644 --- a/samples/bpf/spintest_kern.c +++ b/samples/bpf/spintest.bpf.c @@ -4,14 +4,15 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> -#include <uapi/linux/perf_event.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, long); @@ -46,20 +47,10 @@ int foo(struct pt_regs *ctx) \ } /* add kprobes to all possible *spin* functions */ -SEC("kprobe/spin_unlock")PROG(p1) -SEC("kprobe/spin_lock")PROG(p2) -SEC("kprobe/mutex_spin_on_owner")PROG(p3) -SEC("kprobe/rwsem_spin_on_owner")PROG(p4) -SEC("kprobe/spin_unlock_irqrestore")PROG(p5) -SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6) -SEC("kprobe/_raw_spin_unlock_bh")PROG(p7) -SEC("kprobe/_raw_spin_unlock")PROG(p8) -SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9) -SEC("kprobe/_raw_spin_trylock_bh")PROG(p10) -SEC("kprobe/_raw_spin_lock_irq")PROG(p11) -SEC("kprobe/_raw_spin_trylock")PROG(p12) -SEC("kprobe/_raw_spin_lock")PROG(p13) -SEC("kprobe/_raw_spin_lock_bh")PROG(p14) +SEC("kprobe.multi/spin_*lock*")PROG(spin_lock) +SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner) +SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock) + /* and to inner bpf helpers */ SEC("kprobe/htab_map_update_elem")PROG(p15) SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16) diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index aadac14f748a..55971edb1088 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -9,13 +9,12 @@ int main(int ac, char **argv) { - char filename[256], symbol[256]; struct bpf_object *obj = NULL; struct bpf_link *links[20]; long key, next_key, value; struct bpf_program *prog; int map_fd, i, j = 0; - const char *section; + char filename[256]; struct ksym *sym; if (load_kallsyms()) { @@ -23,7 +22,7 @@ int main(int ac, char **argv) return 2; } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); @@ -44,20 +43,13 @@ int main(int ac, char **argv) } bpf_object__for_each_program(prog, obj) { - section = bpf_program__section_name(prog); - if (sscanf(section, "kprobe/%s", symbol) != 1) - continue; - - /* Attach prog only when symbol exists */ - if (ksym_get_addr(symbol)) { - links[j] = bpf_program__attach(prog); - if (libbpf_get_error(links[j])) { - fprintf(stderr, "bpf_program__attach failed\n"); - links[j] = NULL; - goto cleanup; - } - j++; + links[j] = bpf_program__attach(prog); + if (libbpf_get_error(links[j])) { + fprintf(stderr, "bpf_program__attach failed\n"); + links[j] = NULL; + goto cleanup; } + j++; } for (i = 0; i < 5; i++) { diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c index 88f940052450..a6e600f3d477 100644 --- a/samples/bpf/syscall_nrs.c +++ b/samples/bpf/syscall_nrs.c @@ -2,6 +2,9 @@ #include <uapi/linux/unistd.h> #include <linux/kbuild.h> +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-prototypes" + #define SYSNR(_NR) DEFINE(SYS ## _NR, _NR) void syscall_defines(void) @@ -17,3 +20,5 @@ void syscall_defines(void) #endif } + +#pragma GCC diagnostic pop diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c index 50231c2eff9c..58fef969a60e 100644 --- a/samples/bpf/syscall_tp_kern.c +++ b/samples/bpf/syscall_tp_kern.c @@ -4,6 +4,7 @@ #include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> +#if !defined(__aarch64__) struct syscalls_enter_open_args { unsigned long long unused; long syscall_nr; @@ -11,6 +12,7 @@ struct syscalls_enter_open_args { long flags; long mode; }; +#endif struct syscalls_exit_open_args { unsigned long long unused; @@ -18,6 +20,15 @@ struct syscalls_exit_open_args { long ret; }; +struct syscalls_enter_open_at_args { + unsigned long long unused; + long syscall_nr; + long long dfd; + long filename_ptr; + long flags; + long mode; +}; + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); @@ -44,26 +55,37 @@ static __always_inline void count(void *map) bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST); } +#if !defined(__aarch64__) SEC("tracepoint/syscalls/sys_enter_open") int trace_enter_open(struct syscalls_enter_open_args *ctx) { count(&enter_open_map); return 0; } +#endif SEC("tracepoint/syscalls/sys_enter_openat") -int trace_enter_open_at(struct syscalls_enter_open_args *ctx) +int trace_enter_open_at(struct syscalls_enter_open_at_args *ctx) { count(&enter_open_map); return 0; } +SEC("tracepoint/syscalls/sys_enter_openat2") +int trace_enter_open_at2(struct syscalls_enter_open_at_args *ctx) +{ + count(&enter_open_map); + return 0; +} + +#if !defined(__aarch64__) SEC("tracepoint/syscalls/sys_exit_open") int trace_enter_exit(struct syscalls_exit_open_args *ctx) { count(&exit_open_map); return 0; } +#endif SEC("tracepoint/syscalls/sys_exit_openat") int trace_enter_exit_at(struct syscalls_exit_open_args *ctx) @@ -71,3 +93,10 @@ int trace_enter_exit_at(struct syscalls_exit_open_args *ctx) count(&exit_open_map); return 0; } + +SEC("tracepoint/syscalls/sys_exit_openat2") +int trace_enter_exit_at2(struct syscalls_exit_open_args *ctx) +{ + count(&exit_open_map); + return 0; +} diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 7a788bb837fc..7a09ac74fac0 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -17,9 +17,9 @@ static void usage(const char *cmd) { - printf("USAGE: %s [-i num_progs] [-h]\n", cmd); - printf(" -i num_progs # number of progs of the test\n"); - printf(" -h # help\n"); + printf("USAGE: %s [-i nr_tests] [-h]\n", cmd); + printf(" -i nr_tests # rounds of test to run\n"); + printf(" -h # help\n"); } static void verify_map(int map_id) @@ -45,14 +45,14 @@ static void verify_map(int map_id) } } -static int test(char *filename, int num_progs) +static int test(char *filename, int nr_tests) { - int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; - struct bpf_link *links[num_progs * 4]; - struct bpf_object *objs[num_progs]; + int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0; + struct bpf_link **links = NULL; + struct bpf_object *objs[nr_tests]; struct bpf_program *prog; - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { objs[i] = bpf_object__open_file(filename, NULL); if (libbpf_get_error(objs[i])) { fprintf(stderr, "opening BPF object file failed\n"); @@ -60,6 +60,19 @@ static int test(char *filename, int num_progs) goto cleanup; } + /* One-time initialization */ + if (!links) { + int nr_progs = 0; + + bpf_object__for_each_program(prog, objs[i]) + nr_progs += 1; + + links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *)); + + if (!links) + goto cleanup; + } + /* load BPF program */ if (bpf_object__load(objs[i])) { fprintf(stderr, "loading BPF object file failed\n"); @@ -101,14 +114,18 @@ static int test(char *filename, int num_progs) close(fd); /* verify the map */ - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { verify_map(map0_fds[i]); verify_map(map1_fds[i]); } cleanup: - for (j--; j >= 0; j--) - bpf_link__destroy(links[j]); + if (links) { + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + free(links); + } for (i--; i >= 0; i--) bpf_object__close(objs[i]); @@ -117,13 +134,13 @@ cleanup: int main(int argc, char **argv) { - int opt, num_progs = 1; + int opt, nr_tests = 1; char filename[256]; while ((opt = getopt(argc, argv, "i:h")) != -1) { switch (opt) { case 'i': - num_progs = atoi(optarg); + nr_tests = atoi(optarg); break; case 'h': default: @@ -134,5 +151,5 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - return test(filename, num_progs); + return test(filename, nr_tests); } diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index a33d74bd3a4b..1e61f2180470 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -235,7 +235,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) struct bpf_link *link; ssize_t bytes; - snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", + snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events", event_type); kfd = open(buf, O_WRONLY | O_TRUNC, 0); CHECK_PERROR_RET(kfd < 0); @@ -252,7 +252,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) close(kfd); kfd = -1; - snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id", + snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%ss/%s/id", event_type, event_alias); efd = open(buf, O_RDONLY, 0); CHECK_PERROR_RET(efd < 0); diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh index 37d95ef3c20f..a28a8fc99dbe 100755 --- a/samples/bpf/tc_l2_redirect.sh +++ b/samples/bpf/tc_l2_redirect.sh @@ -8,6 +8,7 @@ REDIRECT_USER='./tc_l2_redirect' REDIRECT_BPF='./tc_l2_redirect_kern.o' RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter) +IPV6_DISABLED=$(< /proc/sys/net/ipv6/conf/all/disable_ipv6) IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding) function config_common { @@ -64,6 +65,7 @@ function config_common { sysctl -q -w net.ipv4.conf.all.rp_filter=0 sysctl -q -w net.ipv6.conf.all.forwarding=1 + sysctl -q -w net.ipv6.conf.all.disable_ipv6=0 } function cleanup { @@ -77,6 +79,7 @@ function cleanup { $IP link del ip6t >& /dev/null sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING + sysctl -q -w net.ipv6.conf.all.disable_ipv6=$IPV6_DISABLED rm -f /sys/fs/bpf/tc/globals/tun_iface [[ -z $DEBUG ]] || set -x set -e diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c index fd2fa0004330..b19fa9b88fe0 100644 --- a/samples/bpf/tc_l2_redirect_kern.c +++ b/samples/bpf/tc_l2_redirect_kern.c @@ -58,14 +58,11 @@ static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr) SEC("l2_to_iptun_ingress_forward") int _l2_to_iptun_ingress_forward(struct __sk_buff *skb) { - struct bpf_tunnel_key tkey = {}; void *data = (void *)(long)skb->data; struct eth_hdr *eth = data; void *data_end = (void *)(long)skb->data_end; int key = 0, *ifindex; - int ret; - if (data + sizeof(*eth) > data_end) return TC_ACT_OK; @@ -115,8 +112,6 @@ int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb) void *data_end = (void *)(long)skb->data_end; int key = 0, *ifindex; - int ret; - if (data + sizeof(*eth) > data_end) return TC_ACT_OK; @@ -205,7 +200,6 @@ int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb) SEC("drop_non_tun_vip") int _drop_non_tun_vip(struct __sk_buff *skb) { - struct bpf_tunnel_key tkey = {}; void *data = (void *)(long)skb->data; struct eth_hdr *eth = data; void *data_end = (void *)(long)skb->data_end; diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c index 8dfe09a92fec..822b0742b815 100644 --- a/samples/bpf/tcp_basertt_kern.c +++ b/samples/bpf/tcp_basertt_kern.c @@ -47,7 +47,7 @@ int bpf_basertt(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_BASE_RTT: n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION, cong, sizeof(cong)); - if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) { + if (!n && !__builtin_memcmp(cong, nv, sizeof(nv))) { /* Set base_rtt to 80us */ rv = 80; } else if (n) { diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c index 2311fc9dde85..339415eac477 100644 --- a/samples/bpf/tcp_cong_kern.c +++ b/samples/bpf/tcp_cong_kern.c @@ -5,7 +5,7 @@ * License as published by the Free Software Foundation. * * BPF program to set congestion control to dctcp when both hosts are - * in the same datacenter (as deteremined by IPv6 prefix). + * in the same datacenter (as determined by IPv6 prefix). * * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c deleted file mode 100644 index 05e88aa63009..000000000000 --- a/samples/bpf/test_cgrp2_array_pin.c +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2016 Facebook - */ -#include <linux/unistd.h> -#include <linux/bpf.h> - -#include <stdio.h> -#include <stdint.h> -#include <unistd.h> -#include <string.h> -#include <errno.h> -#include <fcntl.h> - -#include <bpf/bpf.h> - -static void usage(void) -{ - printf("Usage: test_cgrp2_array_pin [...]\n"); - printf(" -F <file> File to pin an BPF cgroup array\n"); - printf(" -U <file> Update an already pinned BPF cgroup array\n"); - printf(" -v <value> Full path of the cgroup2\n"); - printf(" -h Display this help\n"); -} - -int main(int argc, char **argv) -{ - const char *pinned_file = NULL, *cg2 = NULL; - int create_array = 1; - int array_key = 0; - int array_fd = -1; - int cg2_fd = -1; - int ret = -1; - int opt; - - while ((opt = getopt(argc, argv, "F:U:v:")) != -1) { - switch (opt) { - /* General args */ - case 'F': - pinned_file = optarg; - break; - case 'U': - pinned_file = optarg; - create_array = 0; - break; - case 'v': - cg2 = optarg; - break; - default: - usage(); - goto out; - } - } - - if (!cg2 || !pinned_file) { - usage(); - goto out; - } - - cg2_fd = open(cg2, O_RDONLY); - if (cg2_fd < 0) { - fprintf(stderr, "open(%s,...): %s(%d)\n", - cg2, strerror(errno), errno); - goto out; - } - - if (create_array) { - array_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_ARRAY, NULL, - sizeof(uint32_t), sizeof(uint32_t), - 1, NULL); - if (array_fd < 0) { - fprintf(stderr, - "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", - strerror(errno), errno); - goto out; - } - } else { - array_fd = bpf_obj_get(pinned_file); - if (array_fd < 0) { - fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", - pinned_file, strerror(errno), errno); - goto out; - } - } - - ret = bpf_map_update_elem(array_fd, &array_key, &cg2_fd, 0); - if (ret) { - perror("bpf_map_update_elem"); - goto out; - } - - if (create_array) { - ret = bpf_obj_pin(array_fd, pinned_file); - if (ret) { - fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n", - pinned_file, strerror(errno), errno); - goto out; - } - } - -out: - if (array_fd != -1) - close(array_fd); - if (cg2_fd != -1) - close(cg2_fd); - return ret; -} diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c deleted file mode 100644 index 68ce69457afe..000000000000 --- a/samples/bpf/test_cgrp2_attach.c +++ /dev/null @@ -1,177 +0,0 @@ -/* eBPF example program: - * - * - Creates arraymap in kernel with 4 bytes keys and 8 byte values - * - * - Loads eBPF program - * - * The eBPF program accesses the map passed in to store two pieces of - * information. The number of invocations of the program, which maps - * to the number of packets received, is stored to key 0. Key 1 is - * incremented on each iteration by the number of bytes stored in - * the skb. - * - * - Attaches the new program to a cgroup using BPF_PROG_ATTACH - * - * - Every second, reads map[0] and map[1] to see how many bytes and - * packets were seen on any socket of tasks in the given cgroup. - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <string.h> -#include <unistd.h> -#include <assert.h> -#include <errno.h> -#include <fcntl.h> - -#include <linux/bpf.h> -#include <bpf/bpf.h> - -#include "bpf_insn.h" -#include "bpf_util.h" - -enum { - MAP_KEY_PACKETS, - MAP_KEY_BYTES, -}; - -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - -static int prog_load(int map_fd, int verdict) -{ - struct bpf_insn prog[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */ - - /* Count packets */ - BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ - BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), - - /* Count bytes */ - BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ - BPF_LD_MAP_FD(BPF_REG_1, map_fd), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */ - - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), - - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ - BPF_EXIT_INSN(), - }; - size_t insns_cnt = ARRAY_SIZE(prog); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = bpf_log_buf, - .log_size = BPF_LOG_BUF_SIZE, - ); - - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, NULL, "GPL", - prog, insns_cnt, &opts); -} - -static int usage(const char *argv0) -{ - printf("Usage: %s [-d] [-D] <cg-path> <egress|ingress>\n", argv0); - printf(" -d Drop Traffic\n"); - printf(" -D Detach filter, and exit\n"); - return EXIT_FAILURE; -} - -static int attach_filter(int cg_fd, int type, int verdict) -{ - int prog_fd, map_fd, ret, key; - long long pkt_cnt, byte_cnt; - - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, - sizeof(key), sizeof(byte_cnt), - 256, NULL); - if (map_fd < 0) { - printf("Failed to create map: '%s'\n", strerror(errno)); - return EXIT_FAILURE; - } - - prog_fd = prog_load(map_fd, verdict); - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); - - if (prog_fd < 0) { - printf("Failed to load prog: '%s'\n", strerror(errno)); - return EXIT_FAILURE; - } - - ret = bpf_prog_attach(prog_fd, cg_fd, type, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; - } - while (1) { - key = MAP_KEY_PACKETS; - assert(bpf_map_lookup_elem(map_fd, &key, &pkt_cnt) == 0); - - key = MAP_KEY_BYTES; - assert(bpf_map_lookup_elem(map_fd, &key, &byte_cnt) == 0); - - printf("cgroup received %lld packets, %lld bytes\n", - pkt_cnt, byte_cnt); - sleep(1); - } - - return EXIT_SUCCESS; -} - -int main(int argc, char **argv) -{ - int detach_only = 0, verdict = 1; - enum bpf_attach_type type; - int opt, cg_fd, ret; - - while ((opt = getopt(argc, argv, "Dd")) != -1) { - switch (opt) { - case 'd': - verdict = 0; - break; - case 'D': - detach_only = 1; - break; - default: - return usage(argv[0]); - } - } - - if (argc - optind < 2) - return usage(argv[0]); - - if (strcmp(argv[optind + 1], "ingress") == 0) - type = BPF_CGROUP_INET_INGRESS; - else if (strcmp(argv[optind + 1], "egress") == 0) - type = BPF_CGROUP_INET_EGRESS; - else - return usage(argv[0]); - - cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); - return EXIT_FAILURE; - } - - if (detach_only) { - ret = bpf_prog_detach(cg_fd, type); - printf("bpf_prog_detach() returned '%s' (%d)\n", - strerror(errno), errno); - } else - ret = attach_filter(cg_fd, type, verdict); - - return ret; -} diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c deleted file mode 100644 index a0811df888f4..000000000000 --- a/samples/bpf/test_cgrp2_sock.c +++ /dev/null @@ -1,294 +0,0 @@ -/* eBPF example program: - * - * - Loads eBPF program - * - * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6} - * sockets opened by processes in the cgroup. - * - * - Attaches the new program to a cgroup using BPF_PROG_ATTACH - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <string.h> -#include <unistd.h> -#include <assert.h> -#include <errno.h> -#include <fcntl.h> -#include <net/if.h> -#include <inttypes.h> -#include <linux/bpf.h> -#include <bpf/bpf.h> - -#include "bpf_insn.h" - -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - -static int prog_load(__u32 idx, __u32 mark, __u32 prio) -{ - /* save pointer to context */ - struct bpf_insn prog_start[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - }; - struct bpf_insn prog_end[] = { - BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ - BPF_EXIT_INSN(), - }; - - /* set sk_bound_dev_if on socket */ - struct bpf_insn prog_dev[] = { - BPF_MOV64_IMM(BPF_REG_3, idx), - BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), - }; - - /* set mark on socket */ - struct bpf_insn prog_mark[] = { - /* get uid of process */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_current_uid_gid), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), - - /* if uid is 0, use given mark, else use the uid as the mark */ - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_3, mark), - - /* set the mark on the new socket */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)), - }; - - /* set priority on socket */ - struct bpf_insn prog_prio[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_3, prio), - BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = bpf_log_buf, - .log_size = BPF_LOG_BUF_SIZE, - ); - - struct bpf_insn *prog; - size_t insns_cnt; - void *p; - int ret; - - insns_cnt = sizeof(prog_start) + sizeof(prog_end); - if (idx) - insns_cnt += sizeof(prog_dev); - - if (mark) - insns_cnt += sizeof(prog_mark); - - if (prio) - insns_cnt += sizeof(prog_prio); - - p = prog = malloc(insns_cnt); - if (!prog) { - fprintf(stderr, "Failed to allocate memory for instructions\n"); - return EXIT_FAILURE; - } - - memcpy(p, prog_start, sizeof(prog_start)); - p += sizeof(prog_start); - - if (idx) { - memcpy(p, prog_dev, sizeof(prog_dev)); - p += sizeof(prog_dev); - } - - if (mark) { - memcpy(p, prog_mark, sizeof(prog_mark)); - p += sizeof(prog_mark); - } - - if (prio) { - memcpy(p, prog_prio, sizeof(prog_prio)); - p += sizeof(prog_prio); - } - - memcpy(p, prog_end, sizeof(prog_end)); - p += sizeof(prog_end); - - insns_cnt /= sizeof(struct bpf_insn); - - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", - prog, insns_cnt, &opts); - - free(prog); - - return ret; -} - -static int get_bind_to_device(int sd, char *name, size_t len) -{ - socklen_t optlen = len; - int rc; - - name[0] = '\0'; - rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); - if (rc < 0) - perror("setsockopt(SO_BINDTODEVICE)"); - - return rc; -} - -static unsigned int get_somark(int sd) -{ - unsigned int mark = 0; - socklen_t optlen = sizeof(mark); - int rc; - - rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen); - if (rc < 0) - perror("getsockopt(SO_MARK)"); - - return mark; -} - -static unsigned int get_priority(int sd) -{ - unsigned int prio = 0; - socklen_t optlen = sizeof(prio); - int rc; - - rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen); - if (rc < 0) - perror("getsockopt(SO_PRIORITY)"); - - return prio; -} - -static int show_sockopts(int family) -{ - unsigned int mark, prio; - char name[16]; - int sd; - - sd = socket(family, SOCK_DGRAM, 17); - if (sd < 0) { - perror("socket"); - return 1; - } - - if (get_bind_to_device(sd, name, sizeof(name)) < 0) - return 1; - - mark = get_somark(sd); - prio = get_priority(sd); - - close(sd); - - printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio); - - return 0; -} - -static int usage(const char *argv0) -{ - printf("Usage:\n"); - printf(" Attach a program\n"); - printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0); - printf("\n"); - printf(" Detach a program\n"); - printf(" %s -d cg-path\n", argv0); - printf("\n"); - printf(" Show inherited socket settings (mark, priority, and device)\n"); - printf(" %s [-6]\n", argv0); - return EXIT_FAILURE; -} - -int main(int argc, char **argv) -{ - __u32 idx = 0, mark = 0, prio = 0; - const char *cgrp_path = NULL; - int cg_fd, prog_fd, ret; - int family = PF_INET; - int do_attach = 1; - int rc; - - while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) { - switch (rc) { - case 'd': - do_attach = 0; - break; - case 'b': - idx = if_nametoindex(optarg); - if (!idx) { - idx = strtoumax(optarg, NULL, 0); - if (!idx) { - printf("Invalid device name\n"); - return EXIT_FAILURE; - } - } - break; - case 'm': - mark = strtoumax(optarg, NULL, 0); - break; - case 'p': - prio = strtoumax(optarg, NULL, 0); - break; - case '6': - family = PF_INET6; - break; - default: - return usage(argv[0]); - } - } - - if (optind == argc) - return show_sockopts(family); - - cgrp_path = argv[optind]; - if (!cgrp_path) { - fprintf(stderr, "cgroup path not given\n"); - return EXIT_FAILURE; - } - - if (do_attach && !idx && !mark && !prio) { - fprintf(stderr, - "One of device, mark or priority must be given\n"); - return EXIT_FAILURE; - } - - cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); - return EXIT_FAILURE; - } - - if (do_attach) { - prog_fd = prog_load(idx, mark, prio); - if (prog_fd < 0) { - printf("Failed to load prog: '%s'\n", strerror(errno)); - printf("Output from kernel verifier:\n%s\n-------\n", - bpf_log_buf); - return EXIT_FAILURE; - } - - ret = bpf_prog_attach(prog_fd, cg_fd, - BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; - } - } else { - ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE); - if (ret < 0) { - printf("Failed to detach prog from cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; - } - } - - close(cg_fd); - return EXIT_SUCCESS; -} diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh deleted file mode 100755 index 9f6174236856..000000000000 --- a/samples/bpf/test_cgrp2_sock.sh +++ /dev/null @@ -1,135 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# Test various socket options that can be set by attaching programs to cgroups. - -CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock" - -################################################################################ -# -print_result() -{ - local rc=$1 - local status=" OK " - - [ $rc -ne 0 ] && status="FAIL" - - printf "%-50s [%4s]\n" "$2" "$status" -} - -check_sock() -{ - out=$(test_cgrp2_sock) - echo $out | grep -q "$1" - if [ $? -ne 0 ]; then - print_result 1 "IPv4: $2" - echo " expected: $1" - echo " have: $out" - rc=1 - else - print_result 0 "IPv4: $2" - fi -} - -check_sock6() -{ - out=$(test_cgrp2_sock -6) - echo $out | grep -q "$1" - if [ $? -ne 0 ]; then - print_result 1 "IPv6: $2" - echo " expected: $1" - echo " have: $out" - rc=1 - else - print_result 0 "IPv6: $2" - fi -} - -################################################################################ -# - -cleanup() -{ - echo $$ >> ${CGRP_MNT}/cgroup.procs - rmdir ${CGRP_MNT}/sockopts -} - -cleanup_and_exit() -{ - local rc=$1 - local msg="$2" - - [ -n "$msg" ] && echo "ERROR: $msg" - - test_cgrp2_sock -d ${CGRP_MNT}/sockopts - ip li del cgrp2_sock - umount ${CGRP_MNT} - - exit $rc -} - - -################################################################################ -# main - -rc=0 - -ip li add cgrp2_sock type dummy 2>/dev/null - -set -e -mkdir -p ${CGRP_MNT} -mount -t cgroup2 none ${CGRP_MNT} -set +e - - -# make sure we have a known start point -cleanup 2>/dev/null - -mkdir -p ${CGRP_MNT}/sockopts -[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy" - - -# set pid into cgroup -echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs - -# no bpf program attached, so socket should show no settings -check_sock "dev , mark 0, priority 0" "No programs attached" -check_sock6 "dev , mark 0, priority 0" "No programs attached" - -# verify device is set -# -test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts -if [ $? -ne 0 ]; then - cleanup_and_exit 1 "Failed to install program to set device" -fi -check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set" -check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set" - -# verify mark is set -# -test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts -if [ $? -ne 0 ]; then - cleanup_and_exit 1 "Failed to install program to set mark" -fi -check_sock "dev , mark 666, priority 0" "Mark set" -check_sock6 "dev , mark 666, priority 0" "Mark set" - -# verify priority is set -# -test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts -if [ $? -ne 0 ]; then - cleanup_and_exit 1 "Failed to install program to set priority" -fi -check_sock "dev , mark 0, priority 123" "Priority set" -check_sock6 "dev , mark 0, priority 123" "Priority set" - -# all 3 at once -# -test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts -if [ $? -ne 0 ]; then - cleanup_and_exit 1 "Failed to install program to set device, mark and priority" -fi -check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set" -check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set" - -cleanup_and_exit $rc diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c deleted file mode 100644 index e7060aaa2f5a..000000000000 --- a/samples/bpf/test_cgrp2_sock2.c +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* eBPF example program: - * - * - Loads eBPF program - * - * The eBPF program loads a filter from file and attaches the - * program to a cgroup using BPF_PROG_ATTACH - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <string.h> -#include <unistd.h> -#include <assert.h> -#include <errno.h> -#include <fcntl.h> -#include <net/if.h> -#include <linux/bpf.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_insn.h" - -static int usage(const char *argv0) -{ - printf("Usage: %s cg-path filter-path [filter-id]\n", argv0); - return EXIT_FAILURE; -} - -int main(int argc, char **argv) -{ - int cg_fd, err, ret = EXIT_FAILURE, filter_id = 0, prog_cnt = 0; - const char *link_pin_path = "/sys/fs/bpf/test_cgrp2_sock2"; - struct bpf_link *link = NULL; - struct bpf_program *progs[2]; - struct bpf_program *prog; - struct bpf_object *obj; - - if (argc < 3) - return usage(argv[0]); - - if (argc > 3) - filter_id = atoi(argv[3]); - - cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); - return ret; - } - - obj = bpf_object__open_file(argv[2], NULL); - if (libbpf_get_error(obj)) { - printf("ERROR: opening BPF object file failed\n"); - return ret; - } - - bpf_object__for_each_program(prog, obj) { - progs[prog_cnt] = prog; - prog_cnt++; - } - - if (filter_id >= prog_cnt) { - printf("Invalid program id; program not found in file\n"); - goto cleanup; - } - - /* load BPF program */ - if (bpf_object__load(obj)) { - printf("ERROR: loading BPF object file failed\n"); - goto cleanup; - } - - link = bpf_program__attach_cgroup(progs[filter_id], cg_fd); - if (libbpf_get_error(link)) { - printf("ERROR: bpf_program__attach failed\n"); - link = NULL; - goto cleanup; - } - - err = bpf_link__pin(link, link_pin_path); - if (err < 0) { - printf("ERROR: bpf_link__pin failed: %d\n", err); - goto cleanup; - } - - ret = EXIT_SUCCESS; - -cleanup: - bpf_link__destroy(link); - bpf_object__close(obj); - return ret; -} diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh deleted file mode 100755 index 6a3dbe642b2b..000000000000 --- a/samples/bpf/test_cgrp2_sock2.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -BPFFS=/sys/fs/bpf -LINK_PIN=$BPFFS/test_cgrp2_sock2 - -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth0b - ip link set veth0b up - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add 172.16.1.101/24 dev veth0b - ip addr add 2401:db00::2/64 dev veth0b nodad -} - -function config_cgroup { - rm -rf /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2 - mount -t cgroup2 none /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2/foo - echo $$ >> /tmp/cgroupv2/foo/cgroup.procs -} - -function config_bpffs { - if mount | grep $BPFFS > /dev/null; then - echo "bpffs already mounted" - else - echo "bpffs not mounted. Mounting..." - mount -t bpf none $BPFFS - fi -} - -function attach_bpf { - ./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 - [ $? -ne 0 ] && exit 1 -} - -function cleanup { - rm -rf $LINK_PIN - ip link del veth0b - ip netns delete at_ns0 - umount /tmp/cgroupv2 - rm -rf /tmp/cgroupv2 -} - -cleanup 2>/dev/null - -set -e -config_device -config_cgroup -config_bpffs -set +e - -# -# Test 1 - fail ping6 -# -attach_bpf 0 -ping -c1 -w1 172.16.1.100 -if [ $? -ne 0 ]; then - echo "ping failed when it should succeed" - cleanup - exit 1 -fi - -ping6 -c1 -w1 2401:db00::1 -if [ $? -eq 0 ]; then - echo "ping6 succeeded when it should not" - cleanup - exit 1 -fi - -rm -rf $LINK_PIN -sleep 1 # Wait for link detach - -# -# Test 2 - fail ping -# -attach_bpf 1 -ping6 -c1 -w1 2401:db00::1 -if [ $? -ne 0 ]; then - echo "ping6 failed when it should succeed" - cleanup - exit 1 -fi - -ping -c1 -w1 172.16.1.100 -if [ $? -eq 0 ]; then - echo "ping succeeded when it should not" - cleanup - exit 1 -fi - -cleanup -echo -echo "*** PASS ***" diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh deleted file mode 100755 index 395573be6ae8..000000000000 --- a/samples/bpf/test_cgrp2_tc.sh +++ /dev/null @@ -1,185 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -MY_DIR=$(dirname $0) -# Details on the bpf prog -BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin' -BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o" -BPF_SECTION='filter' - -[ -z "$TC" ] && TC='tc' -[ -z "$IP" ] && IP='ip' - -# Names of the veth interface, net namespace...etc. -HOST_IFC='ve' -NS_IFC='vens' -NS='ns' - -find_mnt() { - cat /proc/mounts | \ - awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }' -} - -# Init cgroup2 vars -init_cgrp2_vars() { - CGRP2_ROOT=$(find_mnt cgroup2) - if [ -z "$CGRP2_ROOT" ] - then - CGRP2_ROOT='/mnt/cgroup2' - MOUNT_CGRP2="yes" - fi - CGRP2_TC="$CGRP2_ROOT/tc" - CGRP2_TC_LEAF="$CGRP2_TC/leaf" -} - -# Init bpf fs vars -init_bpf_fs_vars() { - local bpf_fs_root=$(find_mnt bpf) - [ -n "$bpf_fs_root" ] || return -1 - BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals" -} - -setup_cgrp2() { - case $1 in - start) - if [ "$MOUNT_CGRP2" == 'yes' ] - then - [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT - mount -t cgroup2 none $CGRP2_ROOT || return $? - fi - mkdir -p $CGRP2_TC_LEAF - ;; - *) - rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC - [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT - ;; - esac -} - -setup_bpf_cgrp2_array() { - local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME" - case $1 in - start) - $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC - ;; - *) - [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array - ;; - esac -} - -setup_net() { - case $1 in - start) - $IP link add $HOST_IFC type veth peer name $NS_IFC || return $? - $IP link set dev $HOST_IFC up || return $? - sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0 - - $IP netns add ns || return $? - $IP link set dev $NS_IFC netns ns || return $? - $IP -n $NS link set dev $NS_IFC up || return $? - $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0 - $TC qdisc add dev $HOST_IFC clsact || return $? - $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $? - ;; - *) - $IP netns del $NS - $IP link del $HOST_IFC - ;; - esac -} - -run_in_cgrp() { - # Fork another bash and move it under the specified cgroup. - # It makes the cgroup cleanup easier at the end of the test. - cmd='echo $$ > ' - cmd="$cmd $1/cgroup.procs; exec $2" - bash -c "$cmd" -} - -do_test() { - run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null" - local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \ - awk '/drop/{print substr($7, 0, index($7, ",")-1)}') - if [[ $dropped -eq 0 ]] - then - echo "FAIL" - return 1 - else - echo "Successfully filtered $dropped packets" - return 0 - fi -} - -do_exit() { - if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] - then - echo "------ DEBUG ------" - echo "mount: "; mount | grep -E '(cgroup2|bpf)'; echo - echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo - if [ -d "$BPF_FS_TC_SHARE" ] - then - echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo - fi - echo "Host net:" - $IP netns - $IP link show dev $HOST_IFC - $IP -6 a show dev $HOST_IFC - $TC -s qdisc show dev $HOST_IFC - echo - echo "$NS net:" - $IP -n $NS link show dev $NS_IFC - $IP -n $NS -6 link show dev $NS_IFC - echo "------ DEBUG ------" - echo - fi - - if [ "$MODE" != 'nocleanup' ] - then - setup_net stop - setup_bpf_cgrp2_array stop - setup_cgrp2 stop - fi -} - -init_cgrp2_vars -init_bpf_fs_vars - -while [[ $# -ge 1 ]] -do - a="$1" - case $a in - debug) - DEBUG='yes' - shift 1 - ;; - cleanup-only) - MODE='cleanuponly' - shift 1 - ;; - no-cleanup) - MODE='nocleanup' - shift 1 - ;; - *) - echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]" - echo " debug: Print cgrp and network setup details at the end of the test" - echo " cleanup-only: Try to cleanup things from last test. No test will be run" - echo " no-cleanup: Run the test but don't do cleanup at the end" - echo "[Note: If no arg is given, it will run the test and do cleanup at the end]" - echo - exit -1 - ;; - esac -done - -trap do_exit 0 - -[ "$MODE" == 'cleanuponly' ] && exit - -setup_cgrp2 start || exit $? -setup_net start || exit $? -init_bpf_fs_vars || exit $? -setup_bpf_cgrp2_array start || exit $? -do_test -echo diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c deleted file mode 100644 index 4dd532a312b9..000000000000 --- a/samples/bpf/test_cgrp2_tc_kern.c +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2016 Facebook - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#define KBUILD_MODNAME "foo" -#include <uapi/linux/if_ether.h> -#include <uapi/linux/in6.h> -#include <uapi/linux/ipv6.h> -#include <uapi/linux/pkt_cls.h> -#include <uapi/linux/bpf.h> -#include <bpf/bpf_helpers.h> - -/* copy of 'struct ethhdr' without __packed */ -struct eth_hdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - unsigned short h_proto; -}; - -#define PIN_GLOBAL_NS 2 -struct bpf_elf_map { - __u32 type; - __u32 size_key; - __u32 size_value; - __u32 max_elem; - __u32 flags; - __u32 id; - __u32 pinning; -}; - -struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = { - .type = BPF_MAP_TYPE_CGROUP_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, -}; - -SEC("filter") -int handle_egress(struct __sk_buff *skb) -{ - void *data = (void *)(long)skb->data; - struct eth_hdr *eth = data; - struct ipv6hdr *ip6h = data + sizeof(*eth); - void *data_end = (void *)(long)skb->data_end; - char dont_care_msg[] = "dont care %04x %d\n"; - char pass_msg[] = "pass\n"; - char reject_msg[] = "reject\n"; - - /* single length check */ - if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) - return TC_ACT_OK; - - if (eth->h_proto != htons(ETH_P_IPV6) || - ip6h->nexthdr != IPPROTO_ICMPV6) { - bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), - eth->h_proto, ip6h->nexthdr); - return TC_ACT_OK; - } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { - bpf_trace_printk(pass_msg, sizeof(pass_msg)); - return TC_ACT_OK; - } else { - bpf_trace_printk(reject_msg, sizeof(reject_msg)); - return TC_ACT_SHOT; - } -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c deleted file mode 100644 index fbd43e2bb4d3..000000000000 --- a/samples/bpf/test_current_task_under_cgroup_kern.c +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ - -#include <linux/ptrace.h> -#include <uapi/linux/bpf.h> -#include <linux/version.h> -#include <bpf/bpf_helpers.h> -#include <uapi/linux/utsname.h> -#include "trace_common.h" - -struct { - __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(u32)); - __uint(max_entries, 1); -} cgroup_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, u64); - __uint(max_entries, 1); -} perf_map SEC(".maps"); - -/* Writes the last PID that called sync to a map at index 0 */ -SEC("kprobe/" SYSCALL(sys_sync)) -int bpf_prog1(struct pt_regs *ctx) -{ - u64 pid = bpf_get_current_pid_tgid(); - int idx = 0; - - if (!bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY); - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c deleted file mode 100644 index ac251a417f45..000000000000 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me> - */ - -#define _GNU_SOURCE -#include <stdio.h> -#include <unistd.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "cgroup_helpers.h" - -#define CGROUP_PATH "/my-cgroup" - -int main(int argc, char **argv) -{ - pid_t remote_pid, local_pid = getpid(); - struct bpf_link *link = NULL; - struct bpf_program *prog; - int cg2, idx = 0, rc = 1; - struct bpf_object *obj; - char filename[256]; - int map_fd[2]; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) { - fprintf(stderr, "ERROR: opening BPF object file failed\n"); - return 0; - } - - prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); - if (!prog) { - printf("finding a prog in obj file failed\n"); - goto cleanup; - } - - /* load BPF program */ - if (bpf_object__load(obj)) { - fprintf(stderr, "ERROR: loading BPF object file failed\n"); - goto cleanup; - } - - map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map"); - map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map"); - if (map_fd[0] < 0 || map_fd[1] < 0) { - fprintf(stderr, "ERROR: finding a map in obj file failed\n"); - goto cleanup; - } - - link = bpf_program__attach(prog); - if (libbpf_get_error(link)) { - fprintf(stderr, "ERROR: bpf_program__attach failed\n"); - link = NULL; - goto cleanup; - } - - if (setup_cgroup_environment()) - goto err; - - cg2 = create_and_get_cgroup(CGROUP_PATH); - - if (cg2 < 0) - goto err; - - if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { - log_err("Adding target cgroup to map"); - goto err; - } - - if (join_cgroup(CGROUP_PATH)) - goto err; - - /* - * The installed helper program catched the sync call, and should - * write it to the map. - */ - - sync(); - bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid); - - if (local_pid != remote_pid) { - fprintf(stderr, - "BPF Helper didn't write correct PID to map, but: %d\n", - remote_pid); - goto err; - } - - /* Verify the negative scenario; leave the cgroup */ - if (join_cgroup("/")) - goto err; - - remote_pid = 0; - bpf_map_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); - - sync(); - bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid); - - if (local_pid == remote_pid) { - fprintf(stderr, "BPF cgroup negative test did not work\n"); - goto err; - } - - rc = 0; - -err: - close(cg2); - cleanup_cgroup_environment(); - -cleanup: - bpf_link__destroy(link); - bpf_object__close(obj); - return rc; -} diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 5efb91763d65..1c161276d57b 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -42,11 +42,6 @@ static inline void INIT_LIST_HEAD(struct list_head *list) list->prev = list; } -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c index 1b568575ad11..9a13dbb81847 100644 --- a/samples/bpf/test_lwt_bpf.c +++ b/samples/bpf/test_lwt_bpf.c @@ -10,16 +10,8 @@ * General Public License for more details. */ -#include <stdint.h> -#include <stddef.h> -#include <linux/bpf.h> -#include <linux/ip.h> -#include <linux/in.h> -#include <linux/in6.h> -#include <linux/tcp.h> -#include <linux/udp.h> -#include <linux/icmpv6.h> -#include <linux/if_ether.h> +#include "vmlinux.h" +#include "net_shared.h" #include <bpf/bpf_helpers.h> #include <string.h> @@ -44,9 +36,9 @@ SEC("test_ctx") int do_test_ctx(struct __sk_buff *skb) { skb->cb[0] = CB_MAGIC; - printk("len %d hash %d protocol %d\n", skb->len, skb->hash, + printk("len %d hash %d protocol %d", skb->len, skb->hash, skb->protocol); - printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0], + printk("cb %d ingress_ifindex %d ifindex %d", skb->cb[0], skb->ingress_ifindex, skb->ifindex); return BPF_OK; @@ -56,9 +48,9 @@ int do_test_ctx(struct __sk_buff *skb) SEC("test_cb") int do_test_cb(struct __sk_buff *skb) { - printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1], + printk("cb0: %x cb1: %x cb2: %x", skb->cb[0], skb->cb[1], skb->cb[2]); - printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]); + printk("cb3: %x cb4: %x", skb->cb[3], skb->cb[4]); return BPF_OK; } @@ -72,11 +64,11 @@ int do_test_data(struct __sk_buff *skb) struct iphdr *iph = data; if (data + sizeof(*iph) > data_end) { - printk("packet truncated\n"); + printk("packet truncated"); return BPF_DROP; } - printk("src: %x dst: %x\n", iph->saddr, iph->daddr); + printk("src: %x dst: %x", iph->saddr, iph->daddr); return BPF_OK; } @@ -97,7 +89,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip, ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1); if (ret < 0) { - printk("bpf_l4_csum_replace failed: %d\n", ret); + printk("bpf_l4_csum_replace failed: %d", ret); return BPF_DROP; } @@ -120,14 +112,14 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip, ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip, flags | sizeof(new_ip)); if (ret < 0) { - printk("bpf_l4_csum_replace failed: %d\n"); + printk("bpf_l4_csum_replace failed: %d"); return BPF_DROP; } } ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); if (ret < 0) { - printk("bpf_l3_csum_replace failed: %d\n", ret); + printk("bpf_l3_csum_replace failed: %d", ret); return BPF_DROP; } @@ -137,7 +129,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip, ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0); if (ret < 0) { - printk("bpf_skb_store_bytes() failed: %d\n", ret); + printk("bpf_skb_store_bytes() failed: %d", ret); return BPF_DROP; } @@ -153,12 +145,12 @@ int do_test_rewrite(struct __sk_buff *skb) ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4); if (ret < 0) { - printk("bpf_skb_load_bytes failed: %d\n", ret); + printk("bpf_skb_load_bytes failed: %d", ret); return BPF_DROP; } if (old_ip == 0x2fea8c0) { - printk("out: rewriting from %x to %x\n", old_ip, new_ip); + printk("out: rewriting from %x to %x", old_ip, new_ip); return rewrite(skb, old_ip, new_ip, 1); } @@ -173,16 +165,16 @@ static inline int __do_push_ll_and_redirect(struct __sk_buff *skb) ret = bpf_skb_change_head(skb, 14, 0); if (ret < 0) { - printk("skb_change_head() failed: %d\n", ret); + printk("skb_change_head() failed: %d", ret); } - ehdr.h_proto = __constant_htons(ETH_P_IP); + ehdr.h_proto = bpf_htons(ETH_P_IP); memcpy(&ehdr.h_source, &smac, 6); memcpy(&ehdr.h_dest, &dmac, 6); ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0); if (ret < 0) { - printk("skb_store_bytes() failed: %d\n", ret); + printk("skb_store_bytes() failed: %d", ret); return BPF_DROP; } @@ -202,7 +194,7 @@ int do_push_ll_and_redirect(struct __sk_buff *skb) ret = __do_push_ll_and_redirect(skb); if (ret >= 0) - printk("redirected to %d\n", ifindex); + printk("redirected to %d", ifindex); return ret; } @@ -229,7 +221,7 @@ SEC("fill_garbage") int do_fill_garbage(struct __sk_buff *skb) { __fill_garbage(skb); - printk("Set initial 96 bytes of header to FF\n"); + printk("Set initial 96 bytes of header to FF"); return BPF_OK; } @@ -238,7 +230,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb) { int ifindex = DST_IFINDEX; __fill_garbage(skb); - printk("redirected to %d\n", ifindex); + printk("redirected to %d", ifindex); return bpf_redirect(ifindex, 0); } @@ -246,7 +238,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb) SEC("drop_all") int do_drop_all(struct __sk_buff *skb) { - printk("dropping with: %d\n", BPF_DROP); + printk("dropping with: %d", BPF_DROP); return BPF_DROP; } diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh index 65a976058dd3..148e2df6cdce 100755 --- a/samples/bpf/test_lwt_bpf.sh +++ b/samples/bpf/test_lwt_bpf.sh @@ -19,7 +19,10 @@ IPVETH3="192.168.111.2" IP_LOCAL="192.168.99.1" -TRACE_ROOT=/sys/kernel/debug/tracing +PROG_SRC="test_lwt_bpf.c" +BPF_PROG="test_lwt_bpf.o" +TRACE_ROOT=/sys/kernel/tracing +CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context) function lookup_mac() { @@ -36,7 +39,7 @@ function lookup_mac() function cleanup { set +ex - rm test_lwt_bpf.o 2> /dev/null + rm $BPF_PROG 2> /dev/null ip link del $VETH0 2> /dev/null ip link del $VETH1 2> /dev/null ip link del $VETH2 2> /dev/null @@ -76,7 +79,7 @@ function install_test { cleanup_routes cp /dev/null ${TRACE_ROOT}/trace - OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE" + OPTS="encap bpf headroom 14 $1 obj $BPF_PROG section $2 $VERBOSE" if [ "$1" == "in" ]; then ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo @@ -96,7 +99,7 @@ function remove_prog { function filter_trace { # Add newline to allow starting EXPECT= variables on newline NL=$'\n' - echo "${NL}$*" | sed -e 's/^.*: : //g' + echo "${NL}$*" | sed -e 's/bpf_trace_printk: //g' } function expect_fail { @@ -160,11 +163,11 @@ function test_ctx_out { failure "test_ctx out: packets are dropped" } match_trace "$(get_trace)" " -len 84 hash 0 protocol 0 +len 84 hash 0 protocol 8 cb 1234 ingress_ifindex 0 ifindex 0 -len 84 hash 0 protocol 0 +len 84 hash 0 protocol 8 cb 1234 ingress_ifindex 0 ifindex 0 -len 84 hash 0 protocol 0 +len 84 hash 0 protocol 8 cb 1234 ingress_ifindex 0 ifindex 0" || exit 1 remove_prog out } @@ -367,14 +370,15 @@ setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3 ip netns exec $NS1 netserver echo 1 > ${TRACE_ROOT}/tracing_on +echo nocontext-info > ${TRACE_ROOT}/trace_options DST_MAC=$(lookup_mac $VETH1 $NS1) SRC_MAC=$(lookup_mac $VETH0) DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex) -CLANG_OPTS="-O2 -target bpf -I ../include/" +CLANG_OPTS="-O2 --target=bpf -I ../include/" CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX" -clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o +clang $CLANG_OPTS -c $PROG_SRC -o $BPF_PROG test_ctx_xmit test_ctx_out @@ -397,4 +401,5 @@ test_netperf_redirect cleanup echo 0 > ${TRACE_ROOT}/tracing_on +echo $CONTEXT_INFO > ${TRACE_ROOT}/trace_options exit 0 diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map.bpf.c index b0200c8eac09..9f030f9c4e1b 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map.bpf.c @@ -6,17 +6,17 @@ * License as published by the Free Software Foundation. */ #define KBUILD_MODNAME "foo" -#include <linux/ptrace.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> -#include <uapi/linux/in6.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#include "trace_common.h" #define MAX_NR_PORTS 65536 +#define EINVAL 22 +#define ENOENT 2 + /* map #0 */ struct inner_a { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -103,19 +103,15 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port) return result ? *result : -ENOENT; } -SEC("kprobe/__sys_connect") -int trace_sys_connect(struct pt_regs *ctx) +SEC("ksyscall/connect") +int BPF_KSYSCALL(trace_sys_connect, unsigned int fd, struct sockaddr_in6 *in6, int addrlen) { - struct sockaddr_in6 *in6; u16 test_case, port, dst6[8]; - int addrlen, ret, inline_ret, ret_key = 0; + int ret, inline_ret, ret_key = 0; u32 port_key; void *outer_map, *inner_map; bool inline_hash = false; - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx); - addrlen = (int)PT_REGS_PARM3_CORE(ctx); - if (addrlen != sizeof(*in6)) return 0; diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index 652ec720533d..55dca43f3723 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -38,7 +38,7 @@ static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key) uint32_t info_len = sizeof(info); int ret, id; - ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len); + ret = bpf_map_get_info_by_fd(inner_map_fd, &info, &info_len); assert(!ret); ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id); @@ -120,7 +120,7 @@ int main(int argc, char **argv) struct bpf_object *obj; char filename[256]; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c deleted file mode 100644 index 8fdd2c9c56b2..000000000000 --- a/samples/bpf/test_overhead_kprobe_kern.c +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2016 Facebook - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#include <linux/version.h> -#include <linux/ptrace.h> -#include <linux/sched.h> -#include <uapi/linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> - -#define _(P) \ - ({ \ - typeof(P) val = 0; \ - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ - val; \ - }) - -SEC("kprobe/__set_task_comm") -int prog(struct pt_regs *ctx) -{ - struct signal_struct *signal; - struct task_struct *tsk; - char oldcomm[TASK_COMM_LEN] = {}; - char newcomm[TASK_COMM_LEN] = {}; - u16 oom_score_adj; - u32 pid; - - tsk = (void *)PT_REGS_PARM1(ctx); - - pid = _(tsk->pid); - bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm); - bpf_probe_read_kernel_str(newcomm, sizeof(newcomm), - (void *)PT_REGS_PARM2(ctx)); - signal = _(tsk->signal); - oom_score_adj = _(signal->oom_score_adj); - return 0; -} - -SEC("kprobe/urandom_read") -int prog2(struct pt_regs *ctx) -{ - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c deleted file mode 100644 index 8763181a32f3..000000000000 --- a/samples/bpf/test_overhead_raw_tp_kern.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2018 Facebook */ -#include <uapi/linux/bpf.h> -#include <bpf/bpf_helpers.h> - -SEC("raw_tracepoint/task_rename") -int prog(struct bpf_raw_tracepoint_args *ctx) -{ - return 0; -} - -SEC("raw_tracepoint/urandom_read") -int prog2(struct bpf_raw_tracepoint_args *ctx) -{ - return 0; -} -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c deleted file mode 100644 index 80edadacb692..000000000000 --- a/samples/bpf/test_overhead_tp_kern.c +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (c) 2016 Facebook - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#include <linux/sched.h> -#include <uapi/linux/bpf.h> -#include <bpf/bpf_helpers.h> - -/* from /sys/kernel/debug/tracing/events/task/task_rename/format */ -struct task_rename { - __u64 pad; - __u32 pid; - char oldcomm[TASK_COMM_LEN]; - char newcomm[TASK_COMM_LEN]; - __u16 oom_score_adj; -}; -SEC("tracepoint/task/task_rename") -int prog(struct task_rename *ctx) -{ - return 0; -} - -/* from /sys/kernel/debug/tracing/events/random/urandom_read/format */ -struct urandom_read { - __u64 pad; - int got_bits; - int pool_left; - int input_left; -}; -SEC("tracepoint/random/urandom_read") -int prog2(struct urandom_read *ctx) -{ - return 0; -} -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c deleted file mode 100644 index 88717f8ec6ac..000000000000 --- a/samples/bpf/test_overhead_user.c +++ /dev/null @@ -1,215 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2016 Facebook - */ -#define _GNU_SOURCE -#include <sched.h> -#include <errno.h> -#include <stdio.h> -#include <sys/types.h> -#include <asm/unistd.h> -#include <fcntl.h> -#include <unistd.h> -#include <assert.h> -#include <sys/wait.h> -#include <stdlib.h> -#include <signal.h> -#include <linux/bpf.h> -#include <string.h> -#include <time.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#define MAX_CNT 1000000 - -static struct bpf_link *links[2]; -static struct bpf_object *obj; -static int cnt; - -static __u64 time_get_ns(void) -{ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); - return ts.tv_sec * 1000000000ull + ts.tv_nsec; -} - -static void test_task_rename(int cpu) -{ - __u64 start_time; - char buf[] = "test\n"; - int i, fd; - - fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); - if (fd < 0) { - printf("couldn't open /proc\n"); - exit(1); - } - start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) { - if (write(fd, buf, sizeof(buf)) < 0) { - printf("task rename failed: %s\n", strerror(errno)); - close(fd); - return; - } - } - printf("task_rename:%d: %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); - close(fd); -} - -static void test_urandom_read(int cpu) -{ - __u64 start_time; - char buf[4]; - int i, fd; - - fd = open("/dev/urandom", O_RDONLY); - if (fd < 0) { - printf("couldn't open /dev/urandom\n"); - exit(1); - } - start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) { - if (read(fd, buf, sizeof(buf)) < 0) { - printf("failed to read from /dev/urandom: %s\n", strerror(errno)); - close(fd); - return; - } - } - printf("urandom_read:%d: %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); - close(fd); -} - -static void loop(int cpu, int flags) -{ - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); - - if (flags & 1) - test_task_rename(cpu); - if (flags & 2) - test_urandom_read(cpu); -} - -static void run_perf_test(int tasks, int flags) -{ - pid_t pid[tasks]; - int i; - - for (i = 0; i < tasks; i++) { - pid[i] = fork(); - if (pid[i] == 0) { - loop(i, flags); - exit(0); - } else if (pid[i] == -1) { - printf("couldn't spawn #%d process\n", i); - exit(1); - } - } - for (i = 0; i < tasks; i++) { - int status; - - assert(waitpid(pid[i], &status, 0) == pid[i]); - assert(status == 0); - } -} - -static int load_progs(char *filename) -{ - struct bpf_program *prog; - int err = 0; - - obj = bpf_object__open_file(filename, NULL); - err = libbpf_get_error(obj); - if (err < 0) { - fprintf(stderr, "ERROR: opening BPF object file failed\n"); - return err; - } - - /* load BPF program */ - err = bpf_object__load(obj); - if (err < 0) { - fprintf(stderr, "ERROR: loading BPF object file failed\n"); - return err; - } - - bpf_object__for_each_program(prog, obj) { - links[cnt] = bpf_program__attach(prog); - err = libbpf_get_error(links[cnt]); - if (err < 0) { - fprintf(stderr, "ERROR: bpf_program__attach failed\n"); - links[cnt] = NULL; - return err; - } - cnt++; - } - - return err; -} - -static void unload_progs(void) -{ - while (cnt) - bpf_link__destroy(links[--cnt]); - - bpf_object__close(obj); -} - -int main(int argc, char **argv) -{ - int num_cpu = sysconf(_SC_NPROCESSORS_ONLN); - int test_flags = ~0; - char filename[256]; - int err = 0; - - - if (argc > 1) - test_flags = atoi(argv[1]) ? : test_flags; - if (argc > 2) - num_cpu = atoi(argv[2]) ? : num_cpu; - - if (test_flags & 0x3) { - printf("BASE\n"); - run_perf_test(num_cpu, test_flags); - } - - if (test_flags & 0xC) { - snprintf(filename, sizeof(filename), - "%s_kprobe_kern.o", argv[0]); - - printf("w/KPROBE\n"); - err = load_progs(filename); - if (!err) - run_perf_test(num_cpu, test_flags >> 2); - - unload_progs(); - } - - if (test_flags & 0x30) { - snprintf(filename, sizeof(filename), - "%s_tp_kern.o", argv[0]); - printf("w/TRACEPOINT\n"); - err = load_progs(filename); - if (!err) - run_perf_test(num_cpu, test_flags >> 4); - - unload_progs(); - } - - if (test_flags & 0xC0) { - snprintf(filename, sizeof(filename), - "%s_raw_tp_kern.o", argv[0]); - printf("w/RAW_TRACEPOINT\n"); - err = load_progs(filename); - if (!err) - run_perf_test(num_cpu, test_flags >> 6); - - unload_progs(); - } - - return err; -} diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh deleted file mode 100755 index 35db26f736b9..000000000000 --- a/samples/bpf/test_override_return.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -rm -r tmpmnt -rm -f testfile.img -dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 -DEVICE=$(losetup --show -f testfile.img) -mkfs.btrfs -f $DEVICE -mkdir tmpmnt -./tracex7 $DEVICE -if [ $? -eq 0 ] -then - echo "SUCCESS!" -else - echo "FAILED!" -fi -losetup -d $DEVICE diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c deleted file mode 100644 index 220a96438d75..000000000000 --- a/samples/bpf/test_probe_write_user_kern.c +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <uapi/linux/bpf.h> -#include <linux/version.h> -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> -#include <bpf/bpf_core_read.h> -#include "trace_common.h" - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, struct sockaddr_in); - __type(value, struct sockaddr_in); - __uint(max_entries, 256); -} dnat_map SEC(".maps"); - -/* kprobe is NOT a stable ABI - * kernel functions can be removed, renamed or completely change semantics. - * Number of arguments and their positions can change, etc. - * In such case this bpf+kprobe example will no longer be meaningful - * - * This example sits on a syscall, and the syscall ABI is relatively stable - * of course, across platforms, and over time, the ABI may change. - */ -SEC("kprobe/" SYSCALL(sys_connect)) -int bpf_prog1(struct pt_regs *ctx) -{ - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); - void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs); - int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs); - struct sockaddr_in new_addr, orig_addr = {}; - struct sockaddr_in *mapped_addr; - - if (sockaddr_len > sizeof(orig_addr)) - return 0; - - if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) - return 0; - - mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); - if (mapped_addr != NULL) { - memcpy(&new_addr, mapped_addr, sizeof(new_addr)); - bpf_probe_write_user(sockaddr_arg, &new_addr, - sizeof(new_addr)); - } - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c deleted file mode 100644 index 00ccfb834e45..000000000000 --- a/samples/bpf/test_probe_write_user_user.c +++ /dev/null @@ -1,108 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <assert.h> -#include <unistd.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <arpa/inet.h> - -int main(int ac, char **argv) -{ - struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in; - struct sockaddr serv_addr, mapped_addr, tmp_addr; - int serverfd, serverconnfd, clientfd, map_fd; - struct bpf_link *link = NULL; - struct bpf_program *prog; - struct bpf_object *obj; - socklen_t sockaddr_len; - char filename[256]; - char *ip; - - serv_addr_in = (struct sockaddr_in *)&serv_addr; - mapped_addr_in = (struct sockaddr_in *)&mapped_addr; - tmp_addr_in = (struct sockaddr_in *)&tmp_addr; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) { - fprintf(stderr, "ERROR: opening BPF object file failed\n"); - return 0; - } - - prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); - if (libbpf_get_error(prog)) { - fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); - goto cleanup; - } - - /* load BPF program */ - if (bpf_object__load(obj)) { - fprintf(stderr, "ERROR: loading BPF object file failed\n"); - goto cleanup; - } - - map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map"); - if (map_fd < 0) { - fprintf(stderr, "ERROR: finding a map in obj file failed\n"); - goto cleanup; - } - - link = bpf_program__attach(prog); - if (libbpf_get_error(link)) { - fprintf(stderr, "ERROR: bpf_program__attach failed\n"); - link = NULL; - goto cleanup; - } - - assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); - assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); - - /* Bind server to ephemeral port on lo */ - memset(&serv_addr, 0, sizeof(serv_addr)); - serv_addr_in->sin_family = AF_INET; - serv_addr_in->sin_port = 0; - serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - - assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0); - - sockaddr_len = sizeof(serv_addr); - assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0); - ip = inet_ntoa(serv_addr_in->sin_addr); - printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port)); - - memset(&mapped_addr, 0, sizeof(mapped_addr)); - mapped_addr_in->sin_family = AF_INET; - mapped_addr_in->sin_port = htons(5555); - mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); - - assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY)); - - assert(listen(serverfd, 5) == 0); - - ip = inet_ntoa(mapped_addr_in->sin_addr); - printf("Client connecting to: %s:%d\n", - ip, ntohs(mapped_addr_in->sin_port)); - assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0); - - sockaddr_len = sizeof(tmp_addr); - ip = inet_ntoa(tmp_addr_in->sin_addr); - assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0); - printf("Server received connection from: %s:%d\n", - ip, ntohs(tmp_addr_in->sin_port)); - - sockaddr_len = sizeof(tmp_addr); - assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0); - ip = inet_ntoa(tmp_addr_in->sin_addr); - printf("Client's peer address: %s:%d\n", - ip, ntohs(tmp_addr_in->sin_port)); - - /* Is the server's getsockname = the socket getpeername */ - assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0); - -cleanup: - bpf_link__destroy(link); - bpf_object__close(obj); - return 0; -} diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h deleted file mode 100644 index 8cb5400aed1f..000000000000 --- a/samples/bpf/trace_common.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef __TRACE_COMMON_H -#define __TRACE_COMMON_H - -#ifdef __x86_64__ -#define SYSCALL(SYS) "__x64_" __stringify(SYS) -#elif defined(__s390x__) -#define SYSCALL(SYS) "__s390x_" __stringify(SYS) -#else -#define SYSCALL(SYS) __stringify(SYS) -#endif - -#endif diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output.bpf.c index b64815af0943..565a73b51b04 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output.bpf.c @@ -1,8 +1,6 @@ -#include <linux/ptrace.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -11,7 +9,7 @@ struct { __uint(max_entries, 2); } my_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_write)) +SEC("ksyscall/write") int bpf_prog1(struct pt_regs *ctx) { struct S { diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 371732f9cf8e..d316fd2c8e24 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -51,7 +51,7 @@ int main(int argc, char **argv) char filename[256]; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1.bpf.c index ef30d2b353b0..ceedf0b1d479 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1.bpf.c @@ -4,42 +4,35 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <uapi/linux/bpf.h> +#include "vmlinux.h" +#include "net_shared.h" #include <linux/version.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> #include <bpf/bpf_tracing.h> -#define _(P) \ - ({ \ - typeof(P) val = 0; \ - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ - val; \ - }) - /* kprobe is NOT a stable ABI * kernel functions can be removed, renamed or completely change semantics. * Number of arguments and their positions can change, etc. * In such case this bpf+kprobe example will no longer be meaningful */ -SEC("kprobe/__netif_receive_skb_core") +SEC("kprobe.multi/__netif_receive_skb_core*") int bpf_prog1(struct pt_regs *ctx) { /* attaches to kprobe __netif_receive_skb_core, - * looks for packets on loobpack device and prints them + * looks for packets on loopback device and prints them + * (wildcard is used for avoiding symbol mismatch due to optimization) */ char devname[IFNAMSIZ]; struct net_device *dev; struct sk_buff *skb; int len; - /* non-portable! works for the given kernel only */ - bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); - dev = _(skb->dev); - len = _(skb->len); + bpf_core_read(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); + dev = BPF_CORE_READ(skb, dev); + len = BPF_CORE_READ(skb, len); - bpf_probe_read_kernel(devname, sizeof(devname), dev->name); + BPF_CORE_READ_STR_INTO(&devname, dev, name); if (devname[0] == 'l' && devname[1] == 'o') { char fmt[] = "skb %p len %d\n"; diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c index 9d4adb7fd834..8c3d9043a2b6 100644 --- a/samples/bpf/tracex1_user.c +++ b/samples/bpf/tracex1_user.c @@ -12,7 +12,7 @@ int main(int ac, char **argv) char filename[256]; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c deleted file mode 100644 index 93e0b7680b4f..000000000000 --- a/samples/bpf/tracex2_kern.c +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/version.h> -#include <uapi/linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> -#include "trace_common.h" - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, long); - __type(value, long); - __uint(max_entries, 1024); -} my_map SEC(".maps"); - -/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe - * example will no longer be meaningful - */ -SEC("kprobe/kfree_skb_reason") -int bpf_prog2(struct pt_regs *ctx) -{ - long loc = 0; - long init_val = 1; - long *value; - - /* read ip of kfree_skb_reason caller. - * non-portable version of __builtin_return_address(0) - */ - BPF_KPROBE_READ_RET_IP(loc, ctx); - - value = bpf_map_lookup_elem(&my_map, &loc); - if (value) - *value += 1; - else - bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY); - return 0; -} - -static unsigned int log2(unsigned int v) -{ - unsigned int r; - unsigned int shift; - - r = (v > 0xFFFF) << 4; v >>= r; - shift = (v > 0xFF) << 3; v >>= shift; r |= shift; - shift = (v > 0xF) << 2; v >>= shift; r |= shift; - shift = (v > 0x3) << 1; v >>= shift; r |= shift; - r |= (v >> 1); - return r; -} - -static unsigned int log2l(unsigned long v) -{ - unsigned int hi = v >> 32; - if (hi) - return log2(hi) + 32; - else - return log2(v); -} - -struct hist_key { - char comm[16]; - u64 pid_tgid; - u64 uid_gid; - u64 index; -}; - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_HASH); - __uint(key_size, sizeof(struct hist_key)); - __uint(value_size, sizeof(long)); - __uint(max_entries, 1024); -} my_hist_map SEC(".maps"); - -SEC("kprobe/" SYSCALL(sys_write)) -int bpf_prog3(struct pt_regs *ctx) -{ - long write_size = PT_REGS_PARM3(ctx); - long init_val = 1; - long *value; - struct hist_key key; - - key.index = log2l(write_size); - key.pid_tgid = bpf_get_current_pid_tgid(); - key.uid_gid = bpf_get_current_uid_gid(); - bpf_get_current_comm(&key.comm, sizeof(key.comm)); - - value = bpf_map_lookup_elem(&my_hist_map, &key); - if (value) - __sync_fetch_and_add(value, 1); - else - bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY); - return 0; -} -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c deleted file mode 100644 index 089e408abd7a..000000000000 --- a/samples/bpf/tracex2_user.c +++ /dev/null @@ -1,187 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <signal.h> -#include <string.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" - -#define MAX_INDEX 64 -#define MAX_STARS 38 - -/* my_map, my_hist_map */ -static int map_fd[2]; - -static void stars(char *str, long val, long max, int width) -{ - int i; - - for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++) - str[i] = '*'; - if (val > max) - str[i - 1] = '+'; - str[i] = '\0'; -} - -struct task { - char comm[16]; - __u64 pid_tgid; - __u64 uid_gid; -}; - -struct hist_key { - struct task t; - __u32 index; -}; - -#define SIZE sizeof(struct task) - -static void print_hist_for_pid(int fd, void *task) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct hist_key key = {}, next_key; - long values[nr_cpus]; - char starstr[MAX_STARS]; - long value; - long data[MAX_INDEX] = {}; - int max_ind = -1; - long max_value = 0; - int i, ind; - - while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { - if (memcmp(&next_key, task, SIZE)) { - key = next_key; - continue; - } - bpf_map_lookup_elem(fd, &next_key, values); - value = 0; - for (i = 0; i < nr_cpus; i++) - value += values[i]; - ind = next_key.index; - data[ind] = value; - if (value && ind > max_ind) - max_ind = ind; - if (value > max_value) - max_value = value; - key = next_key; - } - - printf(" syscall write() stats\n"); - printf(" byte_size : count distribution\n"); - for (i = 1; i <= max_ind + 1; i++) { - stars(starstr, data[i - 1], max_value, MAX_STARS); - printf("%8ld -> %-8ld : %-8ld |%-*s|\n", - (1l << i) >> 1, (1l << i) - 1, data[i - 1], - MAX_STARS, starstr); - } -} - -static void print_hist(int fd) -{ - struct hist_key key = {}, next_key; - static struct task tasks[1024]; - int task_cnt = 0; - int i; - - while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { - int found = 0; - - for (i = 0; i < task_cnt; i++) - if (memcmp(&tasks[i], &next_key, SIZE) == 0) - found = 1; - if (!found) - memcpy(&tasks[task_cnt++], &next_key, SIZE); - key = next_key; - } - - for (i = 0; i < task_cnt; i++) { - printf("\npid %d cmd %s uid %d\n", - (__u32) tasks[i].pid_tgid, - tasks[i].comm, - (__u32) tasks[i].uid_gid); - print_hist_for_pid(fd, &tasks[i]); - } - -} - -static void int_exit(int sig) -{ - print_hist(map_fd[1]); - exit(0); -} - -int main(int ac, char **argv) -{ - long key, next_key, value; - struct bpf_link *links[2]; - struct bpf_program *prog; - struct bpf_object *obj; - char filename[256]; - int i, j = 0; - FILE *f; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) { - fprintf(stderr, "ERROR: opening BPF object file failed\n"); - return 0; - } - - /* load BPF program */ - if (bpf_object__load(obj)) { - fprintf(stderr, "ERROR: loading BPF object file failed\n"); - goto cleanup; - } - - map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map"); - map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map"); - if (map_fd[0] < 0 || map_fd[1] < 0) { - fprintf(stderr, "ERROR: finding a map in obj file failed\n"); - goto cleanup; - } - - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - - /* start 'ping' in the background to have some kfree_skb_reason - * events */ - f = popen("ping -4 -c5 localhost", "r"); - (void) f; - - /* start 'dd' in the background to have plenty of 'write' syscalls */ - f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r"); - (void) f; - - bpf_object__for_each_program(prog, obj) { - links[j] = bpf_program__attach(prog); - if (libbpf_get_error(links[j])) { - fprintf(stderr, "ERROR: bpf_program__attach failed\n"); - links[j] = NULL; - goto cleanup; - } - j++; - } - - for (i = 0; i < 5; i++) { - key = 0; - while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_map_lookup_elem(map_fd[0], &next_key, &value); - printf("location 0x%lx count %ld\n", next_key, value); - key = next_key; - } - if (key) - printf("\n"); - sleep(1); - } - print_hist(map_fd[1]); - -cleanup: - for (j--; j >= 0; j--) - bpf_link__destroy(links[j]); - - bpf_object__close(obj); - return 0; -} diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3.bpf.c index bde6591cb20c..41f37966f5f5 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3.bpf.c @@ -4,13 +4,17 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +struct start_key { + dev_t dev; + u32 _pad; + sector_t sector; +}; + struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, long); @@ -18,16 +22,17 @@ struct { __uint(max_entries, 4096); } my_map SEC(".maps"); -/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe - * example will no longer be meaningful - */ -SEC("kprobe/blk_mq_start_request") -int bpf_prog1(struct pt_regs *ctx) +/* from /sys/kernel/tracing/events/block/block_io_start/format */ +SEC("tracepoint/block/block_io_start") +int bpf_prog1(struct trace_event_raw_block_rq *ctx) { - long rq = PT_REGS_PARM1(ctx); u64 val = bpf_ktime_get_ns(); + struct start_key key = { + .dev = ctx->dev, + .sector = ctx->sector + }; - bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); + bpf_map_update_elem(&my_map, &key, &val, BPF_ANY); return 0; } @@ -49,21 +54,26 @@ struct { __uint(max_entries, SLOTS); } lat_map SEC(".maps"); -SEC("kprobe/__blk_account_io_done") -int bpf_prog2(struct pt_regs *ctx) +/* from /sys/kernel/tracing/events/block/block_io_done/format */ +SEC("tracepoint/block/block_io_done") +int bpf_prog2(struct trace_event_raw_block_rq *ctx) { - long rq = PT_REGS_PARM1(ctx); + struct start_key key = { + .dev = ctx->dev, + .sector = ctx->sector + }; + u64 *value, l, base; u32 index; - value = bpf_map_lookup_elem(&my_map, &rq); + value = bpf_map_lookup_elem(&my_map, &key); if (!value) return 0; u64 cur_time = bpf_ktime_get_ns(); u64 delta = cur_time - *value; - bpf_map_delete_elem(&my_map, &rq); + bpf_map_delete_elem(&my_map, &key); /* the lines below are computing index = log10(delta)*10 * using integer arithmetic diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index d5eebace31e6..1002eb0323b4 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -125,7 +125,7 @@ int main(int ac, char **argv) } } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4.bpf.c index eb0f8fdd14bf..d786492fd926 100644 --- a/samples/bpf/tracex4_kern.c +++ b/samples/bpf/tracex4.bpf.c @@ -4,9 +4,8 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/ptrace.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -34,13 +33,13 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } -SEC("kretprobe/kmem_cache_alloc_node") +SEC("kretprobe/kmem_cache_alloc_node_noprof") int bpf_prog2(struct pt_regs *ctx) { long ptr = PT_REGS_RC(ctx); long ip = 0; - /* get ip address of kmem_cache_alloc_node() caller */ + /* get ip address of kmem_cache_alloc_node_noprof() caller */ BPF_KRETPROBE_READ_RET_IP(ip, ctx); struct pair v = { diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index 227b05a0bc88..a5145ad72cbf 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -51,9 +51,9 @@ int main(int ac, char **argv) struct bpf_program *prog; struct bpf_object *obj; char filename[256]; - int map_fd, i, j = 0; + int map_fd, j = 0; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); @@ -82,7 +82,7 @@ int main(int ac, char **argv) j++; } - for (i = 0; ; i++) { + while (1) { print_old_objects(map_fd); sleep(1); } diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5.bpf.c index 64a1f7550d7e..4d3d6c9b25fa 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5.bpf.c @@ -4,15 +4,15 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/ptrace.h> +#include "vmlinux.h" +#include "syscall_nrs.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> -#include <uapi/linux/seccomp.h> #include <uapi/linux/unistd.h> -#include "syscall_nrs.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#define __stringify(x) #x #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F struct { @@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); + bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); + bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index 9d7d79f0d47d..7e2d8397fb98 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -42,7 +42,7 @@ int main(int ac, char **argv) char filename[256]; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6.bpf.c index acad5712d8b4..9b23b4737cfb 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6.bpf.c @@ -1,7 +1,8 @@ -#include <linux/ptrace.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -45,13 +46,24 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } -SEC("kprobe/htab_map_lookup_elem") -int bpf_prog2(struct pt_regs *ctx) +/* + * Since *_map_lookup_elem can't be expected to trigger bpf programs + * due to potential deadlocks (bpf_disable_instrumentation), this bpf + * program will be attached to bpf_map_copy_value (which is called + * from map_lookup_elem) and will only filter the hashtable type. + */ +SEC("kprobe/bpf_map_copy_value") +int BPF_KPROBE(bpf_prog2, struct bpf_map *map) { u32 key = bpf_get_smp_processor_id(); struct bpf_perf_event_value *val, buf; + enum bpf_map_type type; int error; + type = BPF_CORE_READ(map, map_type); + if (type != BPF_MAP_TYPE_HASH) + return 0; + error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf)); if (error) return 0; diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 8e83bf2a84a4..ae811ac83bc2 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -180,7 +180,7 @@ int main(int argc, char **argv) char filename[256]; int i = 0; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c deleted file mode 100644 index c5a92df8ac31..000000000000 --- a/samples/bpf/tracex7_kern.c +++ /dev/null @@ -1,16 +0,0 @@ -#include <uapi/linux/ptrace.h> -#include <uapi/linux/bpf.h> -#include <linux/version.h> -#include <bpf/bpf_helpers.h> - -SEC("kprobe/open_ctree") -int bpf_prog1(struct pt_regs *ctx) -{ - unsigned long rc = -12; - - bpf_override_return(ctx, rc); - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c deleted file mode 100644 index 8be7ce18d3ba..000000000000 --- a/samples/bpf/tracex7_user.c +++ /dev/null @@ -1,56 +0,0 @@ -#define _GNU_SOURCE - -#include <stdio.h> -#include <unistd.h> -#include <bpf/libbpf.h> - -int main(int argc, char **argv) -{ - struct bpf_link *link = NULL; - struct bpf_program *prog; - struct bpf_object *obj; - char filename[256]; - char command[256]; - int ret = 0; - FILE *f; - - if (!argv[1]) { - fprintf(stderr, "ERROR: Run with the btrfs device argument!\n"); - return 0; - } - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) { - fprintf(stderr, "ERROR: opening BPF object file failed\n"); - return 0; - } - - prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); - if (!prog) { - fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); - goto cleanup; - } - - /* load BPF program */ - if (bpf_object__load(obj)) { - fprintf(stderr, "ERROR: loading BPF object file failed\n"); - goto cleanup; - } - - link = bpf_program__attach(prog); - if (libbpf_get_error(link)) { - fprintf(stderr, "ERROR: bpf_program__attach failed\n"); - link = NULL; - goto cleanup; - } - - snprintf(command, 256, "mount %s tmpmnt/", argv[1]); - f = popen(command, "r"); - ret = pclose(f); - -cleanup: - bpf_link__destroy(link); - bpf_object__close(obj); - return ret ? 0 : 1; -} diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c deleted file mode 100644 index 0a5c704badd0..000000000000 --- a/samples/bpf/xdp1_kern.c +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright (c) 2016 PLUMgrid - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#define KBUILD_MODNAME "foo" -#include <uapi/linux/bpf.h> -#include <linux/in.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <bpf/bpf_helpers.h> - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, long); - __uint(max_entries, 256); -} rxcnt SEC(".maps"); - -static int parse_ipv4(void *data, u64 nh_off, void *data_end) -{ - struct iphdr *iph = data + nh_off; - - if (iph + 1 > data_end) - return 0; - return iph->protocol; -} - -static int parse_ipv6(void *data, u64 nh_off, void *data_end) -{ - struct ipv6hdr *ip6h = data + nh_off; - - if (ip6h + 1 > data_end) - return 0; - return ip6h->nexthdr; -} - -#define XDPBUFSIZE 64 -SEC("xdp.frags") -int xdp_prog1(struct xdp_md *ctx) -{ - __u8 pkt[XDPBUFSIZE] = {}; - void *data_end = &pkt[XDPBUFSIZE-1]; - void *data = pkt; - struct ethhdr *eth = data; - int rc = XDP_DROP; - long *value; - u16 h_proto; - u64 nh_off; - u32 ipproto; - - if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) - return rc; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return rc; - - h_proto = eth->h_proto; - - /* Handle VLAN tagged packet */ - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { - struct vlan_hdr *vhdr; - - vhdr = data + nh_off; - nh_off += sizeof(struct vlan_hdr); - if (data + nh_off > data_end) - return rc; - h_proto = vhdr->h_vlan_encapsulated_proto; - } - /* Handle double VLAN tagged packet */ - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { - struct vlan_hdr *vhdr; - - vhdr = data + nh_off; - nh_off += sizeof(struct vlan_hdr); - if (data + nh_off > data_end) - return rc; - h_proto = vhdr->h_vlan_encapsulated_proto; - } - - if (h_proto == htons(ETH_P_IP)) - ipproto = parse_ipv4(data, nh_off, data_end); - else if (h_proto == htons(ETH_P_IPV6)) - ipproto = parse_ipv6(data, nh_off, data_end); - else - ipproto = 0; - - value = bpf_map_lookup_elem(&rxcnt, &ipproto); - if (value) - *value += 1; - - return rc; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c deleted file mode 100644 index 281dc964de8d..000000000000 --- a/samples/bpf/xdp1_user.c +++ /dev/null @@ -1,166 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2016 PLUMgrid - */ -#include <linux/bpf.h> -#include <linux/if_link.h> -#include <assert.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <libgen.h> -#include <net/if.h> - -#include "bpf_util.h" -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -static int ifindex; -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static __u32 prog_id; - -static void int_exit(int sig) -{ - __u32 curr_prog_id = 0; - - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { - printf("bpf_xdp_query_id failed\n"); - exit(1); - } - if (prog_id == curr_prog_id) - bpf_xdp_detach(ifindex, xdp_flags, NULL); - else if (!curr_prog_id) - printf("couldn't find a prog id on a given interface\n"); - else - printf("program on interface changed, not removing\n"); - exit(0); -} - -/* simple per-protocol drop counter - */ -static void poll_stats(int map_fd, int interval) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - __u64 values[nr_cpus], prev[UINT8_MAX] = { 0 }; - int i; - - while (1) { - __u32 key = UINT32_MAX; - - sleep(interval); - - while (bpf_map_get_next_key(map_fd, &key, &key) == 0) { - __u64 sum = 0; - - assert(bpf_map_lookup_elem(map_fd, &key, values) == 0); - for (i = 0; i < nr_cpus; i++) - sum += values[i]; - if (sum > prev[key]) - printf("proto %u: %10llu pkt/s\n", - key, (sum - prev[key]) / interval); - prev[key] = sum; - } - } -} - -static void usage(const char *prog) -{ - fprintf(stderr, - "usage: %s [OPTS] IFACE\n\n" - "OPTS:\n" - " -S use skb-mode\n" - " -N enforce native mode\n" - " -F force loading prog\n", - prog); -} - -int main(int argc, char **argv) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - const char *optstr = "FSN"; - int prog_fd, map_fd, opt; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_map *map; - char filename[256]; - int err; - - while ((opt = getopt(argc, argv, optstr)) != -1) { - switch (opt) { - case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case 'N': - /* default, set below */ - break; - case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; - break; - default: - usage(basename(argv[0])); - return 1; - } - } - - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) - xdp_flags |= XDP_FLAGS_DRV_MODE; - - if (optind == argc) { - usage(basename(argv[0])); - return 1; - } - - ifindex = if_nametoindex(argv[optind]); - if (!ifindex) { - perror("if_nametoindex"); - return 1; - } - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) - return 1; - - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - - err = bpf_object__load(obj); - if (err) - return 1; - - prog_fd = bpf_program__fd(prog); - - map = bpf_object__next_map(obj, NULL); - if (!map) { - printf("finding a map in obj file failed\n"); - return 1; - } - map_fd = bpf_map__fd(map); - - if (!prog_fd) { - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); - return 1; - } - - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { - printf("link set xdp fd failed\n"); - return 1; - } - - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (err) { - printf("can't get prog info - %s\n", strerror(errno)); - return err; - } - prog_id = info.id; - - poll_stats(map_fd, 1); - - return 0; -} diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c deleted file mode 100644 index 67804ecf7ce3..000000000000 --- a/samples/bpf/xdp2_kern.c +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2016 PLUMgrid - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#define KBUILD_MODNAME "foo" -#include <uapi/linux/bpf.h> -#include <linux/in.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <bpf/bpf_helpers.h> - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, long); - __uint(max_entries, 256); -} rxcnt SEC(".maps"); - -static void swap_src_dst_mac(void *data) -{ - unsigned short *p = data; - unsigned short dst[3]; - - dst[0] = p[0]; - dst[1] = p[1]; - dst[2] = p[2]; - p[0] = p[3]; - p[1] = p[4]; - p[2] = p[5]; - p[3] = dst[0]; - p[4] = dst[1]; - p[5] = dst[2]; -} - -static int parse_ipv4(void *data, u64 nh_off, void *data_end) -{ - struct iphdr *iph = data + nh_off; - - if (iph + 1 > data_end) - return 0; - return iph->protocol; -} - -static int parse_ipv6(void *data, u64 nh_off, void *data_end) -{ - struct ipv6hdr *ip6h = data + nh_off; - - if (ip6h + 1 > data_end) - return 0; - return ip6h->nexthdr; -} - -#define XDPBUFSIZE 64 -SEC("xdp.frags") -int xdp_prog1(struct xdp_md *ctx) -{ - __u8 pkt[XDPBUFSIZE] = {}; - void *data_end = &pkt[XDPBUFSIZE-1]; - void *data = pkt; - struct ethhdr *eth = data; - int rc = XDP_DROP; - long *value; - u16 h_proto; - u64 nh_off; - u32 ipproto; - - if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) - return rc; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return rc; - - h_proto = eth->h_proto; - - /* Handle VLAN tagged packet */ - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { - struct vlan_hdr *vhdr; - - vhdr = data + nh_off; - nh_off += sizeof(struct vlan_hdr); - if (data + nh_off > data_end) - return rc; - h_proto = vhdr->h_vlan_encapsulated_proto; - } - /* Handle double VLAN tagged packet */ - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { - struct vlan_hdr *vhdr; - - vhdr = data + nh_off; - nh_off += sizeof(struct vlan_hdr); - if (data + nh_off > data_end) - return rc; - h_proto = vhdr->h_vlan_encapsulated_proto; - } - - if (h_proto == htons(ETH_P_IP)) - ipproto = parse_ipv4(data, nh_off, data_end); - else if (h_proto == htons(ETH_P_IPV6)) - ipproto = parse_ipv6(data, nh_off, data_end); - else - ipproto = 0; - - value = bpf_map_lookup_elem(&rxcnt, &ipproto); - if (value) - *value += 1; - - if (ipproto == IPPROTO_UDP) { - swap_src_dst_mac(data); - - if (bpf_xdp_store_bytes(ctx, 0, pkt, sizeof(pkt))) - return rc; - - rc = XDP_TX; - } - - return rc; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c index d5631014a176..3c36c25d9902 100644 --- a/samples/bpf/xdp2skb_meta_kern.c +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -32,7 +32,7 @@ SEC("xdp_mark") int _xdp_mark(struct xdp_md *ctx) { struct meta_info *meta; - void *data, *data_end; + void *data; int ret; /* Reserve space in-front of data pointer for our meta info. @@ -63,7 +63,6 @@ SEC("tc_mark") int _tc_mark(struct __sk_buff *ctx) { void *data = (void *)(unsigned long)ctx->data; - void *data_end = (void *)(unsigned long)ctx->data_end; void *data_meta = (void *)(unsigned long)ctx->data_meta; struct meta_info *meta = data_meta; diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c index ffdd548627f0..da67bcad1c63 100644 --- a/samples/bpf/xdp_adjust_tail_kern.c +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -57,6 +57,7 @@ static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth) static __always_inline __u16 csum_fold_helper(__u32 csum) { + csum = (csum & 0xffff) + (csum >> 16); return ~((csum & 0xffff) + (csum >> 16)); } diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 167646077c8f..e9426bd65420 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -184,7 +184,7 @@ int main(int argc, char **argv) return 1; } - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (err) { printf("can't get prog info - %s\n", strerror(errno)); return 1; diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 84f57f1209ce..193b3b79b31f 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -76,9 +76,9 @@ static int do_detach(int ifindex, const char *ifname, const char *app_name) return prog_fd; } - err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); if (err) { - printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n", + printf("ERROR: bpf_prog_get_info_by_fd failed (%s)\n", strerror(errno)); goto close_out; } diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c deleted file mode 100644 index cfb41e2205f4..000000000000 --- a/samples/bpf/xdp_monitor.bpf.c +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. - * - * XDP monitor tool, based on tracepoints - */ -#include "xdp_sample.bpf.h" - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c deleted file mode 100644 index 58015eb2ffae..000000000000 --- a/samples/bpf/xdp_monitor_user.c +++ /dev/null @@ -1,118 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ -static const char *__doc__= -"XDP monitor tool, based on tracepoints\n"; - -static const char *__doc_err_only__= -" NOTICE: Only tracking XDP redirect errors\n" -" Enable redirect success stats via '-s/--stats'\n" -" (which comes with a per packet processing overhead)\n"; - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <stdint.h> -#include <string.h> -#include <ctype.h> -#include <unistd.h> -#include <locale.h> -#include <getopt.h> -#include <net/if.h> -#include <time.h> -#include <signal.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" -#include "xdp_sample_user.h" -#include "xdp_monitor.skel.h" - -static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT | - SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT | - SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; - -DEFINE_SAMPLE_INIT(xdp_monitor); - -static const struct option long_options[] = { - { "help", no_argument, NULL, 'h' }, - { "stats", no_argument, NULL, 's' }, - { "interval", required_argument, NULL, 'i' }, - { "verbose", no_argument, NULL, 'v' }, - {} -}; - -int main(int argc, char **argv) -{ - unsigned long interval = 2; - int ret = EXIT_FAIL_OPTION; - struct xdp_monitor *skel; - bool errors_only = true; - int longindex = 0, opt; - bool error = true; - - /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "si:vh", - long_options, &longindex)) != -1) { - switch (opt) { - case 's': - errors_only = false; - mask |= SAMPLE_REDIRECT_CNT; - break; - case 'i': - interval = strtoul(optarg, NULL, 0); - break; - case 'v': - sample_switch_mode(); - break; - case 'h': - error = false; - default: - sample_usage(argv, long_options, __doc__, mask, error); - return ret; - } - } - - skel = xdp_monitor__open(); - if (!skel) { - fprintf(stderr, "Failed to xdp_monitor__open: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end; - } - - ret = sample_init_pre_load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - ret = xdp_monitor__load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - ret = sample_init(skel, mask); - if (ret < 0) { - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - if (errors_only) - printf("%s", __doc_err_only__); - - ret = sample_run(interval, NULL, NULL); - if (ret < 0) { - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - ret = EXIT_OK; -end_destroy: - xdp_monitor__destroy(skel); -end: - sample_exit(ret); -} diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c deleted file mode 100644 index 7c02bacfe96b..000000000000 --- a/samples/bpf/xdp_redirect.bpf.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#include "vmlinux.h" -#include "xdp_sample.bpf.h" -#include "xdp_sample_shared.h" - -const volatile int ifindex_out; - -SEC("xdp") -int xdp_redirect_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct ethhdr *eth = data; - struct datarec *rec; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - swap_src_dst_mac(data); - return bpf_redirect(ifindex_out, 0); -} - -/* Redirect require an XDP bpf_prog loaded on the TX device */ -SEC("xdp") -int xdp_redirect_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c deleted file mode 100644 index 87c54bfdbb70..000000000000 --- a/samples/bpf/xdp_redirect_cpu.bpf.c +++ /dev/null @@ -1,539 +0,0 @@ -/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) - * - * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. - */ -#include "vmlinux.h" -#include "xdp_sample.bpf.h" -#include "xdp_sample_shared.h" -#include "hash_func01.h" - -/* Special map type that can XDP_REDIRECT frames to another CPU */ -struct { - __uint(type, BPF_MAP_TYPE_CPUMAP); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(struct bpf_cpumap_val)); -} cpu_map SEC(".maps"); - -/* Set of maps controlling available CPU, and for iterating through - * selectable redirect CPUs. - */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, u32); -} cpus_available SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, u32); - __uint(max_entries, 1); -} cpus_count SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, u32); - __uint(max_entries, 1); -} cpus_iterator SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(struct bpf_devmap_val)); - __uint(max_entries, 1); -} tx_port SEC(".maps"); - -char tx_mac_addr[ETH_ALEN]; - -/* Helper parse functions */ - -static __always_inline -bool parse_eth(struct ethhdr *eth, void *data_end, - u16 *eth_proto, u64 *l3_offset) -{ - u16 eth_type; - u64 offset; - - offset = sizeof(*eth); - if ((void *)eth + offset > data_end) - return false; - - eth_type = eth->h_proto; - - /* Skip non 802.3 Ethertypes */ - if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) - return false; - - /* Handle VLAN tagged packet */ - if (eth_type == bpf_htons(ETH_P_8021Q) || - eth_type == bpf_htons(ETH_P_8021AD)) { - struct vlan_hdr *vlan_hdr; - - vlan_hdr = (void *)eth + offset; - offset += sizeof(*vlan_hdr); - if ((void *)eth + offset > data_end) - return false; - eth_type = vlan_hdr->h_vlan_encapsulated_proto; - } - /* Handle double VLAN tagged packet */ - if (eth_type == bpf_htons(ETH_P_8021Q) || - eth_type == bpf_htons(ETH_P_8021AD)) { - struct vlan_hdr *vlan_hdr; - - vlan_hdr = (void *)eth + offset; - offset += sizeof(*vlan_hdr); - if ((void *)eth + offset > data_end) - return false; - eth_type = vlan_hdr->h_vlan_encapsulated_proto; - } - - *eth_proto = bpf_ntohs(eth_type); - *l3_offset = offset; - return true; -} - -static __always_inline -u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct iphdr *iph = data + nh_off; - struct udphdr *udph; - - if (iph + 1 > data_end) - return 0; - if (!(iph->protocol == IPPROTO_UDP)) - return 0; - - udph = (void *)(iph + 1); - if (udph + 1 > data_end) - return 0; - - return bpf_ntohs(udph->dest); -} - -static __always_inline -int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct iphdr *iph = data + nh_off; - - if (iph + 1 > data_end) - return 0; - return iph->protocol; -} - -static __always_inline -int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ipv6hdr *ip6h = data + nh_off; - - if (ip6h + 1 > data_end) - return 0; - return ip6h->nexthdr; -} - -SEC("xdp") -int xdp_prognum0_no_touch(struct xdp_md *ctx) -{ - u32 key = bpf_get_smp_processor_id(); - struct datarec *rec; - u32 *cpu_selected; - u32 cpu_dest = 0; - u32 key0 = 0; - - /* Only use first entry in cpus_available */ - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); - if (!cpu_selected) - return XDP_ABORTED; - cpu_dest = *cpu_selected; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - if (cpu_dest >= nr_cpus) { - NO_TEAR_INC(rec->issue); - return XDP_ABORTED; - } - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp") -int xdp_prognum1_touch_data(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct ethhdr *eth = data; - struct datarec *rec; - u32 *cpu_selected; - u32 cpu_dest = 0; - u32 key0 = 0; - u16 eth_type; - - /* Only use first entry in cpus_available */ - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); - if (!cpu_selected) - return XDP_ABORTED; - cpu_dest = *cpu_selected; - - /* Validate packet length is minimum Eth header size */ - if (eth + 1 > data_end) - return XDP_ABORTED; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - /* Read packet data, and use it (drop non 802.3 Ethertypes) */ - eth_type = eth->h_proto; - if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { - NO_TEAR_INC(rec->dropped); - return XDP_DROP; - } - - if (cpu_dest >= nr_cpus) { - NO_TEAR_INC(rec->issue); - return XDP_ABORTED; - } - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp") -int xdp_prognum2_round_robin(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct datarec *rec; - u32 cpu_dest = 0; - u32 key0 = 0; - - u32 *cpu_selected; - u32 *cpu_iterator; - u32 *cpu_max; - u32 cpu_idx; - - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); - if (!cpu_max) - return XDP_ABORTED; - - cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); - if (!cpu_iterator) - return XDP_ABORTED; - cpu_idx = *cpu_iterator; - - *cpu_iterator += 1; - if (*cpu_iterator == *cpu_max) - *cpu_iterator = 0; - - cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_selected) - return XDP_ABORTED; - cpu_dest = *cpu_selected; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - if (cpu_dest >= nr_cpus) { - NO_TEAR_INC(rec->issue); - return XDP_ABORTED; - } - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp") -int xdp_prognum3_proto_separate(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct ethhdr *eth = data; - u8 ip_proto = IPPROTO_UDP; - struct datarec *rec; - u16 eth_proto = 0; - u64 l3_offset = 0; - u32 cpu_dest = 0; - u32 *cpu_lookup; - u32 cpu_idx = 0; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) - return XDP_PASS; /* Just skip */ - - /* Extract L4 protocol */ - switch (eth_proto) { - case ETH_P_IP: - ip_proto = get_proto_ipv4(ctx, l3_offset); - break; - case ETH_P_IPV6: - ip_proto = get_proto_ipv6(ctx, l3_offset); - break; - case ETH_P_ARP: - cpu_idx = 0; /* ARP packet handled on separate CPU */ - break; - default: - cpu_idx = 0; - } - - /* Choose CPU based on L4 protocol */ - switch (ip_proto) { - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: - cpu_idx = 2; - break; - case IPPROTO_TCP: - cpu_idx = 0; - break; - case IPPROTO_UDP: - cpu_idx = 1; - break; - default: - cpu_idx = 0; - } - - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_lookup) - return XDP_ABORTED; - cpu_dest = *cpu_lookup; - - if (cpu_dest >= nr_cpus) { - NO_TEAR_INC(rec->issue); - return XDP_ABORTED; - } - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp") -int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct ethhdr *eth = data; - u8 ip_proto = IPPROTO_UDP; - struct datarec *rec; - u16 eth_proto = 0; - u64 l3_offset = 0; - u32 cpu_dest = 0; - u32 *cpu_lookup; - u32 cpu_idx = 0; - u16 dest_port; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) - return XDP_PASS; /* Just skip */ - - /* Extract L4 protocol */ - switch (eth_proto) { - case ETH_P_IP: - ip_proto = get_proto_ipv4(ctx, l3_offset); - break; - case ETH_P_IPV6: - ip_proto = get_proto_ipv6(ctx, l3_offset); - break; - case ETH_P_ARP: - cpu_idx = 0; /* ARP packet handled on separate CPU */ - break; - default: - cpu_idx = 0; - } - - /* Choose CPU based on L4 protocol */ - switch (ip_proto) { - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: - cpu_idx = 2; - break; - case IPPROTO_TCP: - cpu_idx = 0; - break; - case IPPROTO_UDP: - cpu_idx = 1; - /* DDoS filter UDP port 9 (pktgen) */ - dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); - if (dest_port == 9) { - NO_TEAR_INC(rec->dropped); - return XDP_DROP; - } - break; - default: - cpu_idx = 0; - } - - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_lookup) - return XDP_ABORTED; - cpu_dest = *cpu_lookup; - - if (cpu_dest >= nr_cpus) { - NO_TEAR_INC(rec->issue); - return XDP_ABORTED; - } - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -/* Hashing initval */ -#define INITVAL 15485863 - -static __always_inline -u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct iphdr *iph = data + nh_off; - u32 cpu_hash; - - if (iph + 1 > data_end) - return 0; - - cpu_hash = iph->saddr + iph->daddr; - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); - - return cpu_hash; -} - -static __always_inline -u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ipv6hdr *ip6h = data + nh_off; - u32 cpu_hash; - - if (ip6h + 1 > data_end) - return 0; - - cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; - cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; - cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; - cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); - - return cpu_hash; -} - -/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The - * hashing scheme is symmetric, meaning swapping IP src/dest still hit - * same CPU. - */ -SEC("xdp") -int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct ethhdr *eth = data; - struct datarec *rec; - u16 eth_proto = 0; - u64 l3_offset = 0; - u32 cpu_dest = 0; - u32 cpu_idx = 0; - u32 *cpu_lookup; - u32 key0 = 0; - u32 *cpu_max; - u32 cpu_hash; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); - if (!cpu_max) - return XDP_ABORTED; - - if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) - return XDP_PASS; /* Just skip */ - - /* Hash for IPv4 and IPv6 */ - switch (eth_proto) { - case ETH_P_IP: - cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); - break; - case ETH_P_IPV6: - cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); - break; - case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ - default: - cpu_hash = 0; - } - - /* Choose CPU based on hash */ - cpu_idx = cpu_hash % *cpu_max; - - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_lookup) - return XDP_ABORTED; - cpu_dest = *cpu_lookup; - - if (cpu_dest >= nr_cpus) { - NO_TEAR_INC(rec->issue); - return XDP_ABORTED; - } - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp/cpumap") -int xdp_redirect_cpu_devmap(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - swap_src_dst_mac(data); - return bpf_redirect_map(&tx_port, 0, 0); -} - -SEC("xdp/cpumap") -int xdp_redirect_cpu_pass(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -SEC("xdp/cpumap") -int xdp_redirect_cpu_drop(struct xdp_md *ctx) -{ - return XDP_DROP; -} - -SEC("xdp/devmap") -int xdp_redirect_egress_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); - - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c deleted file mode 100644 index a12381c37d2b..000000000000 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. - */ -static const char *__doc__ = -"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n" -"Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n" -"Valid specification for CPUMAP BPF program:\n" -" --mprog-name/-e pass (use built-in XDP_PASS program)\n" -" --mprog-name/-e drop (use built-in XDP_DROP program)\n" -" --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n" -" Custom CPUMAP BPF program:\n" -" --mprog-filename/-f <filename> --mprog-name/-e <program>\n" -" Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n" -" to configure DEVMAP in BPF object <filename>\n"; - -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> -#include <locale.h> -#include <sys/sysinfo.h> -#include <getopt.h> -#include <net/if.h> -#include <time.h> -#include <linux/limits.h> -#include <arpa/inet.h> -#include <linux/if_link.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" -#include "xdp_sample_user.h" -#include "xdp_redirect_cpu.skel.h" - -static int map_fd; -static int avail_fd; -static int count_fd; - -static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | - SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT | - SAMPLE_EXCEPTION_CNT; - -DEFINE_SAMPLE_INIT(xdp_redirect_cpu); - -static const struct option long_options[] = { - { "help", no_argument, NULL, 'h' }, - { "dev", required_argument, NULL, 'd' }, - { "skb-mode", no_argument, NULL, 'S' }, - { "progname", required_argument, NULL, 'p' }, - { "qsize", required_argument, NULL, 'q' }, - { "cpu", required_argument, NULL, 'c' }, - { "stress-mode", no_argument, NULL, 'x' }, - { "force", no_argument, NULL, 'F' }, - { "interval", required_argument, NULL, 'i' }, - { "verbose", no_argument, NULL, 'v' }, - { "stats", no_argument, NULL, 's' }, - { "mprog-name", required_argument, NULL, 'e' }, - { "mprog-filename", required_argument, NULL, 'f' }, - { "redirect-device", required_argument, NULL, 'r' }, - { "redirect-map", required_argument, NULL, 'm' }, - {} -}; - -static void print_avail_progs(struct bpf_object *obj) -{ - struct bpf_program *pos; - - printf(" Programs to be used for -p/--progname:\n"); - bpf_object__for_each_program(pos, obj) { - if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) { - if (!strncmp(bpf_program__name(pos), "xdp_prognum", - sizeof("xdp_prognum") - 1)) - printf(" %s\n", bpf_program__name(pos)); - } - } -} - -static void usage(char *argv[], const struct option *long_options, - const char *doc, int mask, bool error, struct bpf_object *obj) -{ - sample_usage(argv, long_options, doc, mask, error); - print_avail_progs(obj); -} - -static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, - __u32 avail_idx, bool new) -{ - __u32 curr_cpus_count = 0; - __u32 key = 0; - int ret; - - /* Add a CPU entry to cpumap, as this allocate a cpu entry in - * the kernel for the cpu. - */ - ret = bpf_map_update_elem(map_fd, &cpu, value, 0); - if (ret < 0) { - fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno)); - return ret; - } - - /* Inform bpf_prog's that a new CPU is available to select - * from via some control maps. - */ - ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0); - if (ret < 0) { - fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno)); - return ret; - } - - /* When not replacing/updating existing entry, bump the count */ - ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count); - if (ret < 0) { - fprintf(stderr, "Failed reading curr cpus_count: %s\n", - strerror(errno)); - return ret; - } - if (new) { - curr_cpus_count++; - ret = bpf_map_update_elem(count_fd, &key, - &curr_cpus_count, 0); - if (ret < 0) { - fprintf(stderr, "Failed write curr cpus_count: %s\n", - strerror(errno)); - return ret; - } - } - - printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n", - new ? "Add new" : "Replace", cpu, avail_idx, - value->qsize, value->bpf_prog.fd, curr_cpus_count); - - return 0; -} - -/* CPUs are zero-indexed. Thus, add a special sentinel default value - * in map cpus_available to mark CPU index'es not configured - */ -static int mark_cpus_unavailable(void) -{ - int ret, i, n_cpus = libbpf_num_possible_cpus(); - __u32 invalid_cpu = n_cpus; - - for (i = 0; i < n_cpus; i++) { - ret = bpf_map_update_elem(avail_fd, &i, - &invalid_cpu, 0); - if (ret < 0) { - fprintf(stderr, "Failed marking CPU unavailable: %s\n", - strerror(errno)); - return ret; - } - } - - return 0; -} - -/* Stress cpumap management code by concurrently changing underlying cpumap */ -static void stress_cpumap(void *ctx) -{ - struct bpf_cpumap_val *value = ctx; - - /* Changing qsize will cause kernel to free and alloc a new - * bpf_cpu_map_entry, with an associated/complicated tear-down - * procedure. - */ - value->qsize = 1024; - create_cpu_entry(1, value, 0, false); - value->qsize = 8; - create_cpu_entry(1, value, 0, false); - value->qsize = 16000; - create_cpu_entry(1, value, 0, false); -} - -static int set_cpumap_prog(struct xdp_redirect_cpu *skel, - const char *redir_interface, const char *redir_map, - const char *mprog_filename, const char *mprog_name) -{ - if (mprog_filename) { - struct bpf_program *prog; - struct bpf_object *obj; - int ret; - - if (!mprog_name) { - fprintf(stderr, "BPF program not specified for file %s\n", - mprog_filename); - goto end; - } - if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) { - fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n", - redir_interface ? "device" : "map", redir_interface ? "map" : "device"); - goto end; - } - - /* Custom BPF program */ - obj = bpf_object__open_file(mprog_filename, NULL); - if (!obj) { - ret = -errno; - fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n", - strerror(errno)); - return ret; - } - - ret = bpf_object__load(obj); - if (ret < 0) { - ret = -errno; - fprintf(stderr, "Failed to bpf_object__load: %s\n", - strerror(errno)); - return ret; - } - - if (redir_map) { - int err, redir_map_fd, ifindex_out, key = 0; - - redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); - if (redir_map_fd < 0) { - fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n", - strerror(errno)); - return redir_map_fd; - } - - ifindex_out = if_nametoindex(redir_interface); - if (!ifindex_out) - ifindex_out = strtoul(redir_interface, NULL, 0); - if (!ifindex_out) { - fprintf(stderr, "Bad interface name or index\n"); - return -EINVAL; - } - - err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0); - if (err < 0) - return err; - } - - prog = bpf_object__find_program_by_name(obj, mprog_name); - if (!prog) { - ret = -errno; - fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n", - strerror(errno)); - return ret; - } - - return bpf_program__fd(prog); - } else { - if (mprog_name) { - if (redir_interface || redir_map) { - fprintf(stderr, "Need to specify --mprog-filename/-f\n"); - goto end; - } - if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) { - /* Use built-in pass/drop programs */ - return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass) - : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop); - } else { - fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n", - mprog_name); - goto end; - } - } else { - if (redir_map) { - fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and" - " --redirect-device with --redirect-map\n"); - goto end; - } - if (redir_interface) { - /* Use built-in devmap redirect */ - struct bpf_devmap_val val = {}; - int ifindex_out, err; - __u32 key = 0; - - if (!redir_interface) - return 0; - - ifindex_out = if_nametoindex(redir_interface); - if (!ifindex_out) - ifindex_out = strtoul(redir_interface, NULL, 0); - if (!ifindex_out) { - fprintf(stderr, "Bad interface name or index\n"); - return -EINVAL; - } - - if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) { - printf("Get interface %d mac failed\n", ifindex_out); - return -EINVAL; - } - - val.ifindex = ifindex_out; - val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog); - err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0); - if (err < 0) - return -errno; - - return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap); - } - } - } - - /* Disabled */ - return 0; -end: - fprintf(stderr, "Invalid options for CPUMAP BPF program\n"); - return -EINVAL; -} - -int main(int argc, char **argv) -{ - const char *redir_interface = NULL, *redir_map = NULL; - const char *mprog_filename = NULL, *mprog_name = NULL; - struct xdp_redirect_cpu *skel; - struct bpf_map_info info = {}; - struct bpf_cpumap_val value; - __u32 infosz = sizeof(info); - int ret = EXIT_FAIL_OPTION; - unsigned long interval = 2; - bool stress_mode = false; - struct bpf_program *prog; - const char *prog_name; - bool generic = false; - bool force = false; - int added_cpus = 0; - bool error = true; - int longindex = 0; - int add_cpu = -1; - int ifindex = -1; - int *cpu, i, opt; - __u32 qsize; - int n_cpus; - - n_cpus = libbpf_num_possible_cpus(); - - /* Notice: Choosing the queue size is very important when CPU is - * configured with power-saving states. - * - * If deepest state take 133 usec to wakeup from (133/10^6). When link - * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can - * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = - * 166250 bytes. With MTU size packets this is 110 packets, and with - * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. - * - * Setting default cpumap queue to 2048 as worst-case (small packet) - * should be +64 packet due kthread wakeup call (due to xdp_do_flush) - * worst-case is 2043 packets. - * - * Sysadm can configured system to avoid deep-sleep via: - * tuned-adm profile network-latency - */ - qsize = 2048; - - skel = xdp_redirect_cpu__open(); - if (!skel) { - fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end; - } - - ret = sample_init_pre_load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) { - fprintf(stderr, "Failed to set max entries for cpu_map map: %s", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) { - fprintf(stderr, "Failed to set max entries for cpus_available map: %s", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - cpu = calloc(n_cpus, sizeof(int)); - if (!cpu) { - fprintf(stderr, "Failed to allocate cpu array\n"); - goto end_destroy; - } - - prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs; - while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh", - long_options, &longindex)) != -1) { - switch (opt) { - case 'd': - if (strlen(optarg) >= IF_NAMESIZE) { - fprintf(stderr, "-d/--dev name too long\n"); - usage(argv, long_options, __doc__, mask, true, skel->obj); - goto end_cpu; - } - ifindex = if_nametoindex(optarg); - if (!ifindex) - ifindex = strtoul(optarg, NULL, 0); - if (!ifindex) { - fprintf(stderr, "Bad interface index or name (%d): %s\n", - errno, strerror(errno)); - usage(argv, long_options, __doc__, mask, true, skel->obj); - goto end_cpu; - } - break; - case 's': - mask |= SAMPLE_REDIRECT_MAP_CNT; - break; - case 'i': - interval = strtoul(optarg, NULL, 0); - break; - case 'S': - generic = true; - break; - case 'x': - stress_mode = true; - break; - case 'p': - /* Selecting eBPF prog to load */ - prog_name = optarg; - prog = bpf_object__find_program_by_name(skel->obj, - prog_name); - if (!prog) { - fprintf(stderr, - "Failed to find program %s specified by" - " option -p/--progname\n", - prog_name); - print_avail_progs(skel->obj); - goto end_cpu; - } - break; - case 'f': - mprog_filename = optarg; - break; - case 'e': - mprog_name = optarg; - break; - case 'r': - redir_interface = optarg; - mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI; - break; - case 'm': - redir_map = optarg; - break; - case 'c': - /* Add multiple CPUs */ - add_cpu = strtoul(optarg, NULL, 0); - if (add_cpu >= n_cpus) { - fprintf(stderr, - "--cpu nr too large for cpumap err (%d):%s\n", - errno, strerror(errno)); - usage(argv, long_options, __doc__, mask, true, skel->obj); - goto end_cpu; - } - cpu[added_cpus++] = add_cpu; - break; - case 'q': - qsize = strtoul(optarg, NULL, 0); - break; - case 'F': - force = true; - break; - case 'v': - sample_switch_mode(); - break; - case 'h': - error = false; - default: - usage(argv, long_options, __doc__, mask, error, skel->obj); - goto end_cpu; - } - } - - ret = EXIT_FAIL_OPTION; - if (ifindex == -1) { - fprintf(stderr, "Required option --dev missing\n"); - usage(argv, long_options, __doc__, mask, true, skel->obj); - goto end_cpu; - } - - if (add_cpu == -1) { - fprintf(stderr, "Required option --cpu missing\n" - "Specify multiple --cpu option to add more\n"); - usage(argv, long_options, __doc__, mask, true, skel->obj); - goto end_cpu; - } - - skel->rodata->from_match[0] = ifindex; - if (redir_interface) - skel->rodata->to_match[0] = if_nametoindex(redir_interface); - - ret = xdp_redirect_cpu__load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n", - strerror(errno)); - goto end_cpu; - } - - ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz); - if (ret < 0) { - fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n", - strerror(errno)); - goto end_cpu; - } - - skel->bss->cpumap_map_id = info.id; - - map_fd = bpf_map__fd(skel->maps.cpu_map); - avail_fd = bpf_map__fd(skel->maps.cpus_available); - count_fd = bpf_map__fd(skel->maps.cpus_count); - - ret = mark_cpus_unavailable(); - if (ret < 0) { - fprintf(stderr, "Unable to mark CPUs as unavailable\n"); - goto end_cpu; - } - - ret = sample_init(skel, mask); - if (ret < 0) { - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_cpu; - } - - value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map, - mprog_filename, mprog_name); - if (value.bpf_prog.fd < 0) { - fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n", - strerror(-value.bpf_prog.fd)); - usage(argv, long_options, __doc__, mask, true, skel->obj); - ret = EXIT_FAIL_BPF; - goto end_cpu; - } - value.qsize = qsize; - - for (i = 0; i < added_cpus; i++) { - if (create_cpu_entry(cpu[i], &value, i, true) < 0) { - fprintf(stderr, "Cannot proceed, exiting\n"); - usage(argv, long_options, __doc__, mask, true, skel->obj); - goto end_cpu; - } - } - - ret = EXIT_FAIL_XDP; - if (sample_install_xdp(prog, ifindex, generic, force) < 0) - goto end_cpu; - - ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value); - if (ret < 0) { - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_cpu; - } - ret = EXIT_OK; -end_cpu: - free(cpu); -end_destroy: - xdp_redirect_cpu__destroy(skel); -end: - sample_exit(ret); -} diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c deleted file mode 100644 index 8557c278df77..000000000000 --- a/samples/bpf/xdp_redirect_map.bpf.c +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#define KBUILD_MODNAME "foo" - -#include "vmlinux.h" -#include "xdp_sample.bpf.h" -#include "xdp_sample_shared.h" - -/* The 2nd xdp prog on egress does not support skb mode, so we define two - * maps, tx_port_general and tx_port_native. - */ -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); - __uint(max_entries, 1); -} tx_port_general SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(struct bpf_devmap_val)); - __uint(max_entries, 1); -} tx_port_native SEC(".maps"); - -/* store egress interface mac address */ -const volatile __u8 tx_mac_addr[ETH_ALEN]; - -static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = bpf_get_smp_processor_id(); - struct ethhdr *eth = data; - struct datarec *rec; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - swap_src_dst_mac(data); - return bpf_redirect_map(redirect_map, 0, 0); -} - -SEC("xdp") -int xdp_redirect_map_general(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &tx_port_general); -} - -SEC("xdp") -int xdp_redirect_map_native(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &tx_port_native); -} - -SEC("xdp/devmap") -int xdp_redirect_map_egress(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u8 *mac_addr = (u8 *) tx_mac_addr; - struct ethhdr *eth = data; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - barrier_var(mac_addr); /* prevent optimizing out memcpy */ - __builtin_memcpy(eth->h_source, mac_addr, ETH_ALEN); - - return XDP_PASS; -} - -/* Redirect require an XDP bpf_prog loaded on the TX device */ -SEC("xdp") -int xdp_redirect_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c deleted file mode 100644 index 8b2fd4ec2c76..000000000000 --- a/samples/bpf/xdp_redirect_map_multi.bpf.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define KBUILD_MODNAME "foo" - -#include "vmlinux.h" -#include "xdp_sample.bpf.h" -#include "xdp_sample_shared.h" - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); - __uint(max_entries, 32); -} forward_map_general SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(struct bpf_devmap_val)); - __uint(max_entries, 32); -} forward_map_native SEC(".maps"); - -/* map to store egress interfaces mac addresses */ -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, u32); - __type(value, __be64); - __uint(max_entries, 32); -} mac_map SEC(".maps"); - -static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map) -{ - u32 key = bpf_get_smp_processor_id(); - struct datarec *rec; - - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_PASS; - NO_TEAR_INC(rec->processed); - - return bpf_redirect_map(forward_map, 0, - BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); -} - -SEC("xdp") -int xdp_redirect_map_general(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &forward_map_general); -} - -SEC("xdp") -int xdp_redirect_map_native(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &forward_map_native); -} - -SEC("xdp/devmap") -int xdp_devmap_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = ctx->egress_ifindex; - struct ethhdr *eth = data; - __be64 *mac; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - mac = bpf_map_lookup_elem(&mac_map, &key); - if (mac) - __builtin_memcpy(eth->h_source, mac, ETH_ALEN); - - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c deleted file mode 100644 index 9e24f2705b67..000000000000 --- a/samples/bpf/xdp_redirect_map_multi_user.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -static const char *__doc__ = -"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n" -"Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n"; - -#include <linux/bpf.h> -#include <linux/if_link.h> -#include <assert.h> -#include <getopt.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <net/if.h> -#include <unistd.h> -#include <libgen.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <linux/if_ether.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" -#include "xdp_sample_user.h" -#include "xdp_redirect_map_multi.skel.h" - -#define MAX_IFACE_NUM 32 -static int ifaces[MAX_IFACE_NUM] = {}; - -static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT | - SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING; - -DEFINE_SAMPLE_INIT(xdp_redirect_map_multi); - -static const struct option long_options[] = { - { "help", no_argument, NULL, 'h' }, - { "skb-mode", no_argument, NULL, 'S' }, - { "force", no_argument, NULL, 'F' }, - { "load-egress", no_argument, NULL, 'X' }, - { "stats", no_argument, NULL, 's' }, - { "interval", required_argument, NULL, 'i' }, - { "verbose", no_argument, NULL, 'v' }, - {} -}; - -static int update_mac_map(struct bpf_map *map) -{ - int mac_map_fd = bpf_map__fd(map); - unsigned char mac_addr[6]; - unsigned int ifindex; - int i, ret = -1; - - for (i = 0; ifaces[i] > 0; i++) { - ifindex = ifaces[i]; - - ret = get_mac_addr(ifindex, mac_addr); - if (ret < 0) { - fprintf(stderr, "get interface %d mac failed\n", - ifindex); - return ret; - } - - ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0); - if (ret < 0) { - fprintf(stderr, "Failed to update mac address for ifindex %d\n", - ifindex); - return ret; - } - } - - return 0; -} - -int main(int argc, char **argv) -{ - struct bpf_devmap_val devmap_val = {}; - struct xdp_redirect_map_multi *skel; - struct bpf_program *ingress_prog; - bool xdp_devmap_attached = false; - struct bpf_map *forward_map; - int ret = EXIT_FAIL_OPTION; - unsigned long interval = 2; - char ifname[IF_NAMESIZE]; - unsigned int ifindex; - bool generic = false; - bool force = false; - bool tried = false; - bool error = true; - int i, opt; - - while ((opt = getopt_long(argc, argv, "hSFXi:vs", - long_options, NULL)) != -1) { - switch (opt) { - case 'S': - generic = true; - /* devmap_xmit tracepoint not available */ - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | - SAMPLE_DEVMAP_XMIT_CNT_MULTI); - break; - case 'F': - force = true; - break; - case 'X': - xdp_devmap_attached = true; - break; - case 'i': - interval = strtoul(optarg, NULL, 0); - break; - case 'v': - sample_switch_mode(); - break; - case 's': - mask |= SAMPLE_REDIRECT_MAP_CNT; - break; - case 'h': - error = false; - default: - sample_usage(argv, long_options, __doc__, mask, error); - return ret; - } - } - - if (argc <= optind + 1) { - sample_usage(argv, long_options, __doc__, mask, error); - return ret; - } - - skel = xdp_redirect_map_multi__open(); - if (!skel) { - fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end; - } - - ret = sample_init_pre_load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - ret = EXIT_FAIL_OPTION; - for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { - ifaces[i] = if_nametoindex(argv[optind + i]); - if (!ifaces[i]) - ifaces[i] = strtoul(argv[optind + i], NULL, 0); - if (!if_indextoname(ifaces[i], ifname)) { - fprintf(stderr, "Bad interface index or name\n"); - sample_usage(argv, long_options, __doc__, mask, true); - goto end_destroy; - } - - skel->rodata->from_match[i] = ifaces[i]; - skel->rodata->to_match[i] = ifaces[i]; - } - - ret = xdp_redirect_map_multi__load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - if (xdp_devmap_attached) { - /* Update mac_map with all egress interfaces' mac addr */ - if (update_mac_map(skel->maps.mac_map) < 0) { - fprintf(stderr, "Updating mac address failed\n"); - ret = EXIT_FAIL; - goto end_destroy; - } - } - - ret = sample_init(skel, mask); - if (ret < 0) { - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - - ingress_prog = skel->progs.xdp_redirect_map_native; - forward_map = skel->maps.forward_map_native; - - for (i = 0; ifaces[i] > 0; i++) { - ifindex = ifaces[i]; - - ret = EXIT_FAIL_XDP; -restart: - /* bind prog_fd to each interface */ - if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) { - if (generic && !tried) { - fprintf(stderr, - "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); - ingress_prog = skel->progs.xdp_redirect_map_general; - forward_map = skel->maps.forward_map_general; - tried = true; - goto restart; - } - goto end_destroy; - } - - /* Add all the interfaces to forward group and attach - * egress devmap program if exist - */ - devmap_val.ifindex = ifindex; - if (xdp_devmap_attached) - devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog); - ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0); - if (ret < 0) { - fprintf(stderr, "Failed to update devmap value: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - } - - ret = sample_run(interval, NULL, NULL); - if (ret < 0) { - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - ret = EXIT_OK; -end_destroy: - xdp_redirect_map_multi__destroy(skel); -end: - sample_exit(ret); -} diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c deleted file mode 100644 index c889a1394dc1..000000000000 --- a/samples/bpf/xdp_redirect_map_user.c +++ /dev/null @@ -1,228 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io - */ -static const char *__doc__ = -"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n" -"Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n"; - -#include <linux/bpf.h> -#include <linux/if_link.h> -#include <assert.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <net/if.h> -#include <unistd.h> -#include <libgen.h> -#include <getopt.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" -#include "xdp_sample_user.h" -#include "xdp_redirect_map.skel.h" - -static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; - -DEFINE_SAMPLE_INIT(xdp_redirect_map); - -static const struct option long_options[] = { - { "help", no_argument, NULL, 'h' }, - { "skb-mode", no_argument, NULL, 'S' }, - { "force", no_argument, NULL, 'F' }, - { "load-egress", no_argument, NULL, 'X' }, - { "stats", no_argument, NULL, 's' }, - { "interval", required_argument, NULL, 'i' }, - { "verbose", no_argument, NULL, 'v' }, - {} -}; - -static int verbose = 0; - -int main(int argc, char **argv) -{ - struct bpf_devmap_val devmap_val = {}; - bool xdp_devmap_attached = false; - struct xdp_redirect_map *skel; - char str[2 * IF_NAMESIZE + 1]; - char ifname_out[IF_NAMESIZE]; - struct bpf_map *tx_port_map; - char ifname_in[IF_NAMESIZE]; - int ifindex_in, ifindex_out; - unsigned long interval = 2; - int ret = EXIT_FAIL_OPTION; - struct bpf_program *prog; - bool generic = false; - bool force = false; - bool tried = false; - bool error = true; - int opt, key = 0; - - while ((opt = getopt_long(argc, argv, "hSFXi:vs", - long_options, NULL)) != -1) { - switch (opt) { - case 'S': - generic = true; - /* devmap_xmit tracepoint not available */ - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | - SAMPLE_DEVMAP_XMIT_CNT_MULTI); - break; - case 'F': - force = true; - break; - case 'X': - xdp_devmap_attached = true; - break; - case 'i': - interval = strtoul(optarg, NULL, 0); - break; - case 'v': - sample_switch_mode(); - verbose = 1; - break; - case 's': - mask |= SAMPLE_REDIRECT_MAP_CNT; - break; - case 'h': - error = false; - default: - sample_usage(argv, long_options, __doc__, mask, error); - return ret; - } - } - - if (argc <= optind + 1) { - sample_usage(argv, long_options, __doc__, mask, true); - goto end; - } - - ifindex_in = if_nametoindex(argv[optind]); - if (!ifindex_in) - ifindex_in = strtoul(argv[optind], NULL, 0); - - ifindex_out = if_nametoindex(argv[optind + 1]); - if (!ifindex_out) - ifindex_out = strtoul(argv[optind + 1], NULL, 0); - - if (!ifindex_in || !ifindex_out) { - fprintf(stderr, "Bad interface index or name\n"); - sample_usage(argv, long_options, __doc__, mask, true); - goto end; - } - - skel = xdp_redirect_map__open(); - if (!skel) { - fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end; - } - - ret = sample_init_pre_load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - /* Load 2nd xdp prog on egress. */ - if (xdp_devmap_attached) { - ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr); - if (ret < 0) { - fprintf(stderr, "Failed to get interface %d mac address: %s\n", - ifindex_out, strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - if (verbose) - printf("Egress ifindex:%d using src MAC %02x:%02x:%02x:%02x:%02x:%02x\n", - ifindex_out, - skel->rodata->tx_mac_addr[0], skel->rodata->tx_mac_addr[1], - skel->rodata->tx_mac_addr[2], skel->rodata->tx_mac_addr[3], - skel->rodata->tx_mac_addr[4], skel->rodata->tx_mac_addr[5]); - } - - skel->rodata->from_match[0] = ifindex_in; - skel->rodata->to_match[0] = ifindex_out; - - ret = xdp_redirect_map__load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - ret = sample_init(skel, mask); - if (ret < 0) { - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - - prog = skel->progs.xdp_redirect_map_native; - tx_port_map = skel->maps.tx_port_native; -restart: - if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) { - /* First try with struct bpf_devmap_val as value for generic - * mode, then fallback to sizeof(int) for older kernels. - */ - fprintf(stderr, - "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); - if (generic && !tried) { - prog = skel->progs.xdp_redirect_map_general; - tx_port_map = skel->maps.tx_port_general; - tried = true; - goto restart; - } - ret = EXIT_FAIL_XDP; - goto end_destroy; - } - - /* Loading dummy XDP prog on out-device */ - sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force); - - devmap_val.ifindex = ifindex_out; - if (xdp_devmap_attached) - devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress); - ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0); - if (ret < 0) { - fprintf(stderr, "Failed to update devmap value: %s\n", - strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - ret = EXIT_FAIL; - if (!if_indextoname(ifindex_in, ifname_in)) { - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, - strerror(errno)); - goto end_destroy; - } - - if (!if_indextoname(ifindex_out, ifname_out)) { - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, - strerror(errno)); - goto end_destroy; - } - - safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); - printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", - ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); - snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); - - ret = sample_run(interval, NULL, NULL); - if (ret < 0) { - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - ret = EXIT_OK; -end_destroy: - xdp_redirect_map__destroy(skel); -end: - sample_exit(ret); -} diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c deleted file mode 100644 index 8663dd631b6e..000000000000 --- a/samples/bpf/xdp_redirect_user.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> - */ -static const char *__doc__ = -"XDP redirect tool, using bpf_redirect helper\n" -"Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n"; - -#include <linux/bpf.h> -#include <linux/if_link.h> -#include <assert.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <net/if.h> -#include <unistd.h> -#include <libgen.h> -#include <getopt.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" -#include "xdp_sample_user.h" -#include "xdp_redirect.skel.h" - -static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT | - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; - -DEFINE_SAMPLE_INIT(xdp_redirect); - -static const struct option long_options[] = { - {"help", no_argument, NULL, 'h' }, - {"skb-mode", no_argument, NULL, 'S' }, - {"force", no_argument, NULL, 'F' }, - {"stats", no_argument, NULL, 's' }, - {"interval", required_argument, NULL, 'i' }, - {"verbose", no_argument, NULL, 'v' }, - {} -}; - -int main(int argc, char **argv) -{ - int ifindex_in, ifindex_out, opt; - char str[2 * IF_NAMESIZE + 1]; - char ifname_out[IF_NAMESIZE]; - char ifname_in[IF_NAMESIZE]; - int ret = EXIT_FAIL_OPTION; - unsigned long interval = 2; - struct xdp_redirect *skel; - bool generic = false; - bool force = false; - bool error = true; - - while ((opt = getopt_long(argc, argv, "hSFi:vs", - long_options, NULL)) != -1) { - switch (opt) { - case 'S': - generic = true; - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | - SAMPLE_DEVMAP_XMIT_CNT_MULTI); - break; - case 'F': - force = true; - break; - case 'i': - interval = strtoul(optarg, NULL, 0); - break; - case 'v': - sample_switch_mode(); - break; - case 's': - mask |= SAMPLE_REDIRECT_CNT; - break; - case 'h': - error = false; - default: - sample_usage(argv, long_options, __doc__, mask, error); - return ret; - } - } - - if (argc <= optind + 1) { - sample_usage(argv, long_options, __doc__, mask, true); - return ret; - } - - ifindex_in = if_nametoindex(argv[optind]); - if (!ifindex_in) - ifindex_in = strtoul(argv[optind], NULL, 0); - - ifindex_out = if_nametoindex(argv[optind + 1]); - if (!ifindex_out) - ifindex_out = strtoul(argv[optind + 1], NULL, 0); - - if (!ifindex_in || !ifindex_out) { - fprintf(stderr, "Bad interface index or name\n"); - sample_usage(argv, long_options, __doc__, mask, true); - goto end; - } - - skel = xdp_redirect__open(); - if (!skel) { - fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end; - } - - ret = sample_init_pre_load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - skel->rodata->from_match[0] = ifindex_in; - skel->rodata->to_match[0] = ifindex_out; - skel->rodata->ifindex_out = ifindex_out; - - ret = xdp_redirect__load(skel); - if (ret < 0) { - fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno)); - ret = EXIT_FAIL_BPF; - goto end_destroy; - } - - ret = sample_init(skel, mask); - if (ret < 0) { - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - - ret = EXIT_FAIL_XDP; - if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in, - generic, force) < 0) - goto end_destroy; - - /* Loading dummy XDP prog on out-device */ - sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, - generic, force); - - ret = EXIT_FAIL; - if (!if_indextoname(ifindex_in, ifname_in)) { - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, - strerror(errno)); - goto end_destroy; - } - - if (!if_indextoname(ifindex_out, ifname_out)) { - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, - strerror(errno)); - goto end_destroy; - } - - safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); - printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", - ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); - snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); - - ret = sample_run(interval, NULL, NULL); - if (ret < 0) { - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); - ret = EXIT_FAIL; - goto end_destroy; - } - ret = EXIT_OK; -end_destroy: - xdp_redirect__destroy(skel); -end: - sample_exit(ret); -} diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 9d41db09c480..266fdd0b025d 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock) static void read_route(struct nlmsghdr *nh, int nll) { char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; - struct bpf_lpm_trie_key *prefix_key; + struct bpf_lpm_trie_key_u8 *prefix_key; struct rtattr *rt_attr; struct rtmsg *rt_msg; int rtm_family; diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c deleted file mode 100644 index 5e7459f9bf3e..000000000000 --- a/samples/bpf/xdp_rxq_info_kern.c +++ /dev/null @@ -1,140 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. - * - * Example howto extract XDP RX-queue info - */ -#include <uapi/linux/bpf.h> -#include <uapi/linux/if_ether.h> -#include <uapi/linux/in.h> -#include <bpf/bpf_helpers.h> - -/* Config setup from with userspace - * - * User-side setup ifindex in config_map, to verify that - * ctx->ingress_ifindex is correct (against configured ifindex) - */ -struct config { - __u32 action; - int ifindex; - __u32 options; -}; -enum cfg_options_flags { - NO_TOUCH = 0x0U, - READ_MEM = 0x1U, - SWAP_MAC = 0x2U, -}; - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, struct config); - __uint(max_entries, 1); -} config_map SEC(".maps"); - -/* Common stats data record (shared with userspace) */ -struct datarec { - __u64 processed; - __u64 issue; -}; - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 1); -} stats_global_map SEC(".maps"); - -#define MAX_RXQs 64 - -/* Stats per rx_queue_index (per CPU) */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, MAX_RXQs + 1); -} rx_queue_index_map SEC(".maps"); - -static __always_inline -void swap_src_dst_mac(void *data) -{ - unsigned short *p = data; - unsigned short dst[3]; - - dst[0] = p[0]; - dst[1] = p[1]; - dst[2] = p[2]; - p[0] = p[3]; - p[1] = p[4]; - p[2] = p[5]; - p[3] = dst[0]; - p[4] = dst[1]; - p[5] = dst[2]; -} - -SEC("xdp_prog0") -int xdp_prognum0(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct datarec *rec, *rxq_rec; - int ingress_ifindex; - struct config *config; - u32 key = 0; - - /* Global stats record */ - rec = bpf_map_lookup_elem(&stats_global_map, &key); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF - * instructions inside kernel to access xdp_rxq->dev->ifindex - */ - ingress_ifindex = ctx->ingress_ifindex; - - config = bpf_map_lookup_elem(&config_map, &key); - if (!config) - return XDP_ABORTED; - - /* Simple test: check ctx provided ifindex is as expected */ - if (ingress_ifindex != config->ifindex) { - /* count this error case */ - rec->issue++; - return XDP_ABORTED; - } - - /* Update stats per rx_queue_index. Handle if rx_queue_index - * is larger than stats map can contain info for. - */ - key = ctx->rx_queue_index; - if (key >= MAX_RXQs) - key = MAX_RXQs; - rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key); - if (!rxq_rec) - return XDP_ABORTED; - rxq_rec->processed++; - if (key == MAX_RXQs) - rxq_rec->issue++; - - /* Default: Don't touch packet data, only count packets */ - if (unlikely(config->options & (READ_MEM|SWAP_MAC))) { - struct ethhdr *eth = data; - - if (eth + 1 > data_end) - return XDP_ABORTED; - - /* Avoid compiler removing this: Drop non 802.3 Ethertypes */ - if (ntohs(eth->h_proto) < ETH_P_802_3_MIN) - return XDP_ABORTED; - - /* XDP_TX requires changing MAC-addrs, else HW may drop. - * Can also be enabled with --swapmac (for test purposes) - */ - if (unlikely(config->options & SWAP_MAC)) - swap_src_dst_mac(data); - } - - return config->action; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c deleted file mode 100644 index 08f5331d2b00..000000000000 --- a/samples/bpf/xdp_rxq_info_user.c +++ /dev/null @@ -1,614 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. - */ -static const char *__doc__ = " XDP RX-queue info extract example\n\n" - "Monitor how many packets per sec (pps) are received\n" - "per NIC RX queue index and which CPU processed the packet\n" - ; - -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> -#include <locale.h> -#include <getopt.h> -#include <net/if.h> -#include <time.h> -#include <limits.h> -#include <arpa/inet.h> -#include <linux/if_link.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include "bpf_util.h" - -static int ifindex = -1; -static char ifname_buf[IF_NAMESIZE]; -static char *ifname; -static __u32 prog_id; - -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - -static struct bpf_map *stats_global_map; -static struct bpf_map *rx_queue_index_map; - -/* Exit return codes */ -#define EXIT_OK 0 -#define EXIT_FAIL 1 -#define EXIT_FAIL_OPTION 2 -#define EXIT_FAIL_XDP 3 -#define EXIT_FAIL_BPF 4 -#define EXIT_FAIL_MEM 5 - -#define FAIL_MEM_SIG INT_MAX -#define FAIL_STAT_SIG (INT_MAX - 1) - -static const struct option long_options[] = { - {"help", no_argument, NULL, 'h' }, - {"dev", required_argument, NULL, 'd' }, - {"skb-mode", no_argument, NULL, 'S' }, - {"sec", required_argument, NULL, 's' }, - {"no-separators", no_argument, NULL, 'z' }, - {"action", required_argument, NULL, 'a' }, - {"readmem", no_argument, NULL, 'r' }, - {"swapmac", no_argument, NULL, 'm' }, - {"force", no_argument, NULL, 'F' }, - {0, 0, NULL, 0 } -}; - -static void int_exit(int sig) -{ - __u32 curr_prog_id = 0; - - if (ifindex > -1) { - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { - printf("bpf_xdp_query_id failed\n"); - exit(EXIT_FAIL); - } - if (prog_id == curr_prog_id) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - bpf_xdp_detach(ifindex, xdp_flags, NULL); - } else if (!curr_prog_id) { - printf("couldn't find a prog id on a given iface\n"); - } else { - printf("program on interface changed, not removing\n"); - } - } - - if (sig == FAIL_MEM_SIG) - exit(EXIT_FAIL_MEM); - else if (sig == FAIL_STAT_SIG) - exit(EXIT_FAIL); - - exit(EXIT_OK); -} - -struct config { - __u32 action; - int ifindex; - __u32 options; -}; -enum cfg_options_flags { - NO_TOUCH = 0x0U, - READ_MEM = 0x1U, - SWAP_MAC = 0x2U, -}; -#define XDP_ACTION_MAX (XDP_TX + 1) -#define XDP_ACTION_MAX_STRLEN 11 -static const char *xdp_action_names[XDP_ACTION_MAX] = { - [XDP_ABORTED] = "XDP_ABORTED", - [XDP_DROP] = "XDP_DROP", - [XDP_PASS] = "XDP_PASS", - [XDP_TX] = "XDP_TX", -}; - -static const char *action2str(int action) -{ - if (action < XDP_ACTION_MAX) - return xdp_action_names[action]; - return NULL; -} - -static int parse_xdp_action(char *action_str) -{ - size_t maxlen; - __u64 action = -1; - int i; - - for (i = 0; i < XDP_ACTION_MAX; i++) { - maxlen = XDP_ACTION_MAX_STRLEN; - if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) { - action = i; - break; - } - } - return action; -} - -static void list_xdp_actions(void) -{ - int i; - - printf("Available XDP --action <options>\n"); - for (i = 0; i < XDP_ACTION_MAX; i++) - printf("\t%s\n", xdp_action_names[i]); - printf("\n"); -} - -static char* options2str(enum cfg_options_flags flag) -{ - if (flag == NO_TOUCH) - return "no_touch"; - if (flag & SWAP_MAC) - return "swapmac"; - if (flag & READ_MEM) - return "read"; - fprintf(stderr, "ERR: Unknown config option flags"); - int_exit(FAIL_STAT_SIG); - return "unknown"; -} - -static void usage(char *argv[]) -{ - int i; - - printf("\nDOCUMENTATION:\n%s\n", __doc__); - printf(" Usage: %s (options-see-below)\n", argv[0]); - printf(" Listing options:\n"); - for (i = 0; long_options[i].name != 0; i++) { - printf(" --%-12s", long_options[i].name); - if (long_options[i].flag != NULL) - printf(" flag (internal value:%d)", - *long_options[i].flag); - else - printf(" short-option: -%c", - long_options[i].val); - printf("\n"); - } - printf("\n"); - list_xdp_actions(); -} - -#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ -static __u64 gettime(void) -{ - struct timespec t; - int res; - - res = clock_gettime(CLOCK_MONOTONIC, &t); - if (res < 0) { - fprintf(stderr, "Error with gettimeofday! (%i)\n", res); - int_exit(FAIL_STAT_SIG); - } - return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; -} - -/* Common stats data record shared with _kern.c */ -struct datarec { - __u64 processed; - __u64 issue; -}; -struct record { - __u64 timestamp; - struct datarec total; - struct datarec *cpu; -}; -struct stats_record { - struct record stats; - struct record *rxq; -}; - -static struct datarec *alloc_record_per_cpu(void) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct datarec *array; - - array = calloc(nr_cpus, sizeof(struct datarec)); - if (!array) { - fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); - int_exit(FAIL_MEM_SIG); - } - return array; -} - -static struct record *alloc_record_per_rxq(void) -{ - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); - struct record *array; - - array = calloc(nr_rxqs, sizeof(struct record)); - if (!array) { - fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); - int_exit(FAIL_MEM_SIG); - } - return array; -} - -static struct stats_record *alloc_stats_record(void) -{ - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); - struct stats_record *rec; - int i; - - rec = calloc(1, sizeof(struct stats_record)); - if (!rec) { - fprintf(stderr, "Mem alloc error\n"); - int_exit(FAIL_MEM_SIG); - } - rec->rxq = alloc_record_per_rxq(); - for (i = 0; i < nr_rxqs; i++) - rec->rxq[i].cpu = alloc_record_per_cpu(); - - rec->stats.cpu = alloc_record_per_cpu(); - return rec; -} - -static void free_stats_record(struct stats_record *r) -{ - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); - int i; - - for (i = 0; i < nr_rxqs; i++) - free(r->rxq[i].cpu); - - free(r->rxq); - free(r->stats.cpu); - free(r); -} - -static bool map_collect_percpu(int fd, __u32 key, struct record *rec) -{ - /* For percpu maps, userspace gets a value per possible CPU */ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct datarec values[nr_cpus]; - __u64 sum_processed = 0; - __u64 sum_issue = 0; - int i; - - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { - fprintf(stderr, - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); - return false; - } - /* Get time as close as possible to reading map contents */ - rec->timestamp = gettime(); - - /* Record and sum values from each CPU */ - for (i = 0; i < nr_cpus; i++) { - rec->cpu[i].processed = values[i].processed; - sum_processed += values[i].processed; - rec->cpu[i].issue = values[i].issue; - sum_issue += values[i].issue; - } - rec->total.processed = sum_processed; - rec->total.issue = sum_issue; - return true; -} - -static void stats_collect(struct stats_record *rec) -{ - int fd, i, max_rxqs; - - fd = bpf_map__fd(stats_global_map); - map_collect_percpu(fd, 0, &rec->stats); - - fd = bpf_map__fd(rx_queue_index_map); - max_rxqs = bpf_map__max_entries(rx_queue_index_map); - for (i = 0; i < max_rxqs; i++) - map_collect_percpu(fd, i, &rec->rxq[i]); -} - -static double calc_period(struct record *r, struct record *p) -{ - double period_ = 0; - __u64 period = 0; - - period = r->timestamp - p->timestamp; - if (period > 0) - period_ = ((double) period / NANOSEC_PER_SEC); - - return period_; -} - -static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) -{ - __u64 packets = 0; - __u64 pps = 0; - - if (period_ > 0) { - packets = r->processed - p->processed; - pps = packets / period_; - } - return pps; -} - -static __u64 calc_errs_pps(struct datarec *r, - struct datarec *p, double period_) -{ - __u64 packets = 0; - __u64 pps = 0; - - if (period_ > 0) { - packets = r->issue - p->issue; - pps = packets / period_; - } - return pps; -} - -static void stats_print(struct stats_record *stats_rec, - struct stats_record *stats_prev, - int action, __u32 cfg_opt) -{ - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); - unsigned int nr_cpus = bpf_num_possible_cpus(); - double pps = 0, err = 0; - struct record *rec, *prev; - double t; - int rxq; - int i; - - /* Header */ - printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n", - ifname, ifindex, action2str(action), options2str(cfg_opt)); - - /* stats_global_map */ - { - char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n"; - char *fm2_rx = "%-15s %-7s %'-11.0f\n"; - char *errstr = ""; - - printf("%-15s %-7s %-11s %-11s\n", - "XDP stats", "CPU", "pps", "issue-pps"); - - rec = &stats_rec->stats; - prev = &stats_prev->stats; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps (r, p, t); - err = calc_errs_pps(r, p, t); - if (err > 0) - errstr = "invalid-ifindex"; - if (pps > 0) - printf(fmt_rx, "XDP-RX CPU", - i, pps, err, errstr); - } - pps = calc_pps (&rec->total, &prev->total, t); - err = calc_errs_pps(&rec->total, &prev->total, t); - printf(fm2_rx, "XDP-RX CPU", "total", pps, err); - } - - /* rx_queue_index_map */ - printf("\n%-15s %-7s %-11s %-11s\n", - "RXQ stats", "RXQ:CPU", "pps", "issue-pps"); - - for (rxq = 0; rxq < nr_rxqs; rxq++) { - char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n"; - char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n"; - char *errstr = ""; - int rxq_ = rxq; - - /* Last RXQ in map catch overflows */ - if (rxq_ == nr_rxqs - 1) - rxq_ = -1; - - rec = &stats_rec->rxq[rxq]; - prev = &stats_prev->rxq[rxq]; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps (r, p, t); - err = calc_errs_pps(r, p, t); - if (err > 0) { - if (rxq_ == -1) - errstr = "map-overflow-RXQ"; - else - errstr = "err"; - } - if (pps > 0) - printf(fmt_rx, "rx_queue_index", - rxq_, i, pps, err, errstr); - } - pps = calc_pps (&rec->total, &prev->total, t); - err = calc_errs_pps(&rec->total, &prev->total, t); - if (pps || err) - printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err); - } -} - - -/* Pointer swap trick */ -static inline void swap(struct stats_record **a, struct stats_record **b) -{ - struct stats_record *tmp; - - tmp = *a; - *a = *b; - *b = tmp; -} - -static void stats_poll(int interval, int action, __u32 cfg_opt) -{ - struct stats_record *record, *prev; - - record = alloc_stats_record(); - prev = alloc_stats_record(); - stats_collect(record); - - while (1) { - swap(&prev, &record); - stats_collect(record); - stats_print(record, prev, action, cfg_opt); - sleep(interval); - } - - free_stats_record(record); - free_stats_record(prev); -} - - -int main(int argc, char **argv) -{ - __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - int prog_fd, map_fd, opt, err; - bool use_separators = true; - struct config cfg = { 0 }; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_map *map; - char filename[256]; - int longindex = 0; - int interval = 2; - __u32 key = 0; - - - char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 }; - int action = XDP_PASS; /* Default action */ - char *action_str = NULL; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) - return EXIT_FAIL; - - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - - err = bpf_object__load(obj); - if (err) - return EXIT_FAIL; - prog_fd = bpf_program__fd(prog); - - map = bpf_object__find_map_by_name(obj, "config_map"); - stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map"); - rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map"); - if (!map || !stats_global_map || !rx_queue_index_map) { - printf("finding a map in obj file failed\n"); - return EXIT_FAIL; - } - map_fd = bpf_map__fd(map); - - if (!prog_fd) { - fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno)); - return EXIT_FAIL; - } - - /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:", - long_options, &longindex)) != -1) { - switch (opt) { - case 'd': - if (strlen(optarg) >= IF_NAMESIZE) { - fprintf(stderr, "ERR: --dev name too long\n"); - goto error; - } - ifname = (char *)&ifname_buf; - strncpy(ifname, optarg, IF_NAMESIZE); - ifindex = if_nametoindex(ifname); - if (ifindex == 0) { - fprintf(stderr, - "ERR: --dev name unknown err(%d):%s\n", - errno, strerror(errno)); - goto error; - } - break; - case 's': - interval = atoi(optarg); - break; - case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case 'z': - use_separators = false; - break; - case 'a': - action_str = (char *)&action_str_buf; - strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN); - break; - case 'r': - cfg_options |= READ_MEM; - break; - case 'm': - cfg_options |= SWAP_MAC; - break; - case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; - break; - case 'h': - error: - default: - usage(argv); - return EXIT_FAIL_OPTION; - } - } - - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) - xdp_flags |= XDP_FLAGS_DRV_MODE; - - /* Required option */ - if (ifindex == -1) { - fprintf(stderr, "ERR: required option --dev missing\n"); - usage(argv); - return EXIT_FAIL_OPTION; - } - cfg.ifindex = ifindex; - - /* Parse action string */ - if (action_str) { - action = parse_xdp_action(action_str); - if (action < 0) { - fprintf(stderr, "ERR: Invalid XDP --action: %s\n", - action_str); - list_xdp_actions(); - return EXIT_FAIL_OPTION; - } - } - cfg.action = action; - - /* XDP_TX requires changing MAC-addrs, else HW may drop */ - if (action == XDP_TX) - cfg_options |= SWAP_MAC; - cfg.options = cfg_options; - - /* Trick to pretty printf with thousands separators use %' */ - if (use_separators) - setlocale(LC_NUMERIC, "en_US"); - - /* User-side setup ifindex in config_map */ - err = bpf_map_update_elem(map_fd, &key, &cfg, 0); - if (err) { - fprintf(stderr, "Store config failed (err:%d)\n", err); - exit(EXIT_FAIL_BPF); - } - - /* Remove XDP program when program is interrupted or killed */ - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { - fprintf(stderr, "link set xdp fd failed\n"); - return EXIT_FAIL_XDP; - } - - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (err) { - printf("can't get prog info - %s\n", strerror(errno)); - return err; - } - prog_id = info.id; - - stats_poll(interval, action, cfg_options); - return EXIT_OK; -} diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h index 25b1dbe9b37b..fecc41c5df04 100644 --- a/samples/bpf/xdp_sample.bpf.h +++ b/samples/bpf/xdp_sample.bpf.h @@ -7,17 +7,9 @@ #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> +#include "net_shared.h" #include "xdp_sample_shared.h" -#define ETH_ALEN 6 -#define ETH_P_802_3_MIN 0x0600 -#define ETH_P_8021Q 0x8100 -#define ETH_P_8021AD 0x88A8 -#define ETH_P_IP 0x0800 -#define ETH_P_IPV6 0x86DD -#define ETH_P_ARP 0x0806 -#define IPPROTO_ICMPV6 58 - #define EINVAL 22 #define ENETDOWN 100 #define EMSGSIZE 90 @@ -55,18 +47,6 @@ static __always_inline void swap_src_dst_mac(void *data) p[5] = dst[2]; } -#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ - __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define bpf_ntohs(x) __builtin_bswap16(x) -#define bpf_htons(x) __builtin_bswap16(x) -#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ - __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define bpf_ntohs(x) (x) -#define bpf_htons(x) (x) -#else -# error "Endianness detection needs to be set up for your compiler?!" -#endif - /* * Note: including linux/compiler.h or linux/kernel.h for the macros below * conflicts with vmlinux.h include in BPF files, so we define them here. diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c deleted file mode 100644 index 9cf76b340dd7..000000000000 --- a/samples/bpf/xdp_sample_pkts_kern.c +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/ptrace.h> -#include <linux/version.h> -#include <uapi/linux/bpf.h> -#include <bpf/bpf_helpers.h> - -#define SAMPLE_SIZE 64ul - -struct { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(u32)); -} my_map SEC(".maps"); - -SEC("xdp_sample") -int xdp_sample_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - - /* Metadata will be in the perf event before the packet data. */ - struct S { - u16 cookie; - u16 pkt_len; - } __packed metadata; - - if (data < data_end) { - /* The XDP perf_event_output handler will use the upper 32 bits - * of the flags argument as a number of bytes to include of the - * packet payload in the event data. If the size is too big, the - * call to bpf_perf_event_output will fail and return -EFAULT. - * - * See bpf_xdp_event_output in net/core/filter.c. - * - * The BPF_F_CURRENT_CPU flag means that the event output fd - * will be indexed by the CPU number in the event map. - */ - u64 flags = BPF_F_CURRENT_CPU; - u16 sample_size; - int ret; - - metadata.cookie = 0xdead; - metadata.pkt_len = (u16)(data_end - data); - sample_size = min(metadata.pkt_len, SAMPLE_SIZE); - flags |= (u64)sample_size << 32; - - ret = bpf_perf_event_output(ctx, &my_map, flags, - &metadata, sizeof(metadata)); - if (ret) - bpf_printk("perf_event_output failed: %d\n", ret); - } - - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c deleted file mode 100644 index 7df7163239ac..000000000000 --- a/samples/bpf/xdp_sample_pkts_user.c +++ /dev/null @@ -1,196 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <linux/perf_event.h> -#include <linux/bpf.h> -#include <net/if.h> -#include <errno.h> -#include <assert.h> -#include <sys/sysinfo.h> -#include <sys/ioctl.h> -#include <signal.h> -#include <bpf/libbpf.h> -#include <bpf/bpf.h> -#include <libgen.h> -#include <linux/if_link.h> - -#include "perf-sys.h" - -static int if_idx; -static char *if_name; -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static __u32 prog_id; -static struct perf_buffer *pb = NULL; - -static int do_attach(int idx, int fd, const char *name) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - int err; - - err = bpf_xdp_attach(idx, fd, xdp_flags, NULL); - if (err < 0) { - printf("ERROR: failed to attach program to %s\n", name); - return err; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - printf("can't get prog info - %s\n", strerror(errno)); - return err; - } - prog_id = info.id; - - return err; -} - -static int do_detach(int idx, const char *name) -{ - __u32 curr_prog_id = 0; - int err = 0; - - err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id); - if (err) { - printf("bpf_xdp_query_id failed\n"); - return err; - } - if (prog_id == curr_prog_id) { - err = bpf_xdp_detach(idx, xdp_flags, NULL); - if (err < 0) - printf("ERROR: failed to detach prog from %s\n", name); - } else if (!curr_prog_id) { - printf("couldn't find a prog id on a %s\n", name); - } else { - printf("program on interface changed, not removing\n"); - } - - return err; -} - -#define SAMPLE_SIZE 64 - -static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) -{ - struct { - __u16 cookie; - __u16 pkt_len; - __u8 pkt_data[SAMPLE_SIZE]; - } __packed *e = data; - int i; - - if (e->cookie != 0xdead) { - printf("BUG cookie %x sized %d\n", e->cookie, size); - return; - } - - printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len); - for (i = 0; i < 14 && i < e->pkt_len; i++) - printf("%02x ", e->pkt_data[i]); - printf("\n"); -} - -static void sig_handler(int signo) -{ - do_detach(if_idx, if_name); - perf_buffer__free(pb); - exit(0); -} - -static void usage(const char *prog) -{ - fprintf(stderr, - "%s: %s [OPTS] <ifname|ifindex>\n\n" - "OPTS:\n" - " -F force loading prog\n" - " -S use skb-mode\n", - __func__, prog); -} - -int main(int argc, char **argv) -{ - const char *optstr = "FS"; - int prog_fd, map_fd, opt; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_map *map; - char filename[256]; - int ret, err; - - while ((opt = getopt(argc, argv, optstr)) != -1) { - switch (opt) { - case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; - break; - case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - default: - usage(basename(argv[0])); - return 1; - } - } - - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) - xdp_flags |= XDP_FLAGS_DRV_MODE; - - if (optind == argc) { - usage(basename(argv[0])); - return 1; - } - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) - return 1; - - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - - err = bpf_object__load(obj); - if (err) - return 1; - - prog_fd = bpf_program__fd(prog); - - map = bpf_object__next_map(obj, NULL); - if (!map) { - printf("finding a map in obj file failed\n"); - return 1; - } - map_fd = bpf_map__fd(map); - - if_idx = if_nametoindex(argv[optind]); - if (!if_idx) - if_idx = strtoul(argv[optind], NULL, 0); - - if (!if_idx) { - fprintf(stderr, "Invalid ifname\n"); - return 1; - } - if_name = argv[optind]; - err = do_attach(if_idx, prog_fd, if_name); - if (err) - return err; - - if (signal(SIGINT, sig_handler) || - signal(SIGHUP, sig_handler) || - signal(SIGTERM, sig_handler)) { - perror("signal"); - return 1; - } - - pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); - err = libbpf_get_error(pb); - if (err) { - perror("perf_buffer setup failed"); - return 1; - } - - while ((ret = perf_buffer__poll(pb, 1000)) >= 0) { - } - - kill(0, SIGINT); - return ret; -} diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 307baef6861a..7e4b2f7108a6 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -295,7 +295,7 @@ int main(int argc, char **argv) return 1; } - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); if (err) { printf("can't get prog info - %s\n", strerror(errno)); return err; diff --git a/samples/cgroup/.gitignore b/samples/cgroup/.gitignore new file mode 100644 index 000000000000..3a0161194cce --- /dev/null +++ b/samples/cgroup/.gitignore @@ -0,0 +1,3 @@ +/cgroup_event_listener +/memcg_event_listener + diff --git a/samples/cgroup/Makefile b/samples/cgroup/Makefile new file mode 100644 index 000000000000..526c8569707c --- /dev/null +++ b/samples/cgroup/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +userprogs-always-y += cgroup_event_listener memcg_event_listener + +userccflags += -I usr/include diff --git a/samples/cgroup/cgroup_event_listener.c b/samples/cgroup/cgroup_event_listener.c new file mode 100644 index 000000000000..3d70dc831a76 --- /dev/null +++ b/samples/cgroup/cgroup_event_listener.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cgroup_event_listener.c - Simple listener of cgroup events + * + * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name> + */ + +#include <assert.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <sys/eventfd.h> + +#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>" + +int main(int argc, char **argv) +{ + int efd = -1; + int cfd = -1; + int event_control = -1; + char event_control_path[PATH_MAX]; + char line[LINE_MAX]; + int ret; + + if (argc != 3) + errx(1, "%s", USAGE_STR); + + cfd = open(argv[1], O_RDONLY); + if (cfd == -1) + err(1, "Cannot open %s", argv[1]); + + ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", + dirname(argv[1])); + if (ret >= PATH_MAX) + errx(1, "Path to cgroup.event_control is too long"); + + event_control = open(event_control_path, O_WRONLY); + if (event_control == -1) + err(1, "Cannot open %s", event_control_path); + + efd = eventfd(0, 0); + if (efd == -1) + err(1, "eventfd() failed"); + + ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); + if (ret >= LINE_MAX) + errx(1, "Arguments string is too long"); + + ret = write(event_control, line, strlen(line) + 1); + if (ret == -1) + err(1, "Cannot write to cgroup.event_control"); + + while (1) { + uint64_t result; + + ret = read(efd, &result, sizeof(result)); + if (ret == -1) { + if (errno == EINTR) + continue; + err(1, "Cannot read from eventfd"); + } + assert(ret == sizeof(result)); + + ret = access(event_control_path, W_OK); + if ((ret == -1) && (errno == ENOENT)) { + puts("The cgroup seems to have removed."); + break; + } + + if (ret == -1) + err(1, "cgroup.event_control is not accessible any more"); + + printf("%s %s: crossed\n", argv[1], argv[2]); + } + + return 0; +} diff --git a/samples/cgroup/memcg_event_listener.c b/samples/cgroup/memcg_event_listener.c new file mode 100644 index 000000000000..41425edbd88a --- /dev/null +++ b/samples/cgroup/memcg_event_listener.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * memcg_event_listener.c - Simple listener of memcg memory.events + * + * Copyright (c) 2023, SaluteDevices. All Rights Reserved. + * + * Author: Dmitry Rokosov <ddrokosov@salutedevices.com> + */ + +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <poll.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/inotify.h> +#include <unistd.h> + +/* Size of buffer to use when reading inotify events */ +#define INOTIFY_BUFFER_SIZE 8192 + +#define INOTIFY_EVENT_NEXT(event, length) ({ \ + (length) -= sizeof(*(event)) + (event)->len; \ + (event)++; \ +}) + +#define INOTIFY_EVENT_OK(event, length) ((length) >= (ssize_t)sizeof(*(event))) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +struct memcg_counters { + long low; + long high; + long max; + long oom; + long oom_kill; + long oom_group_kill; +}; + +struct memcg_events { + struct memcg_counters counters; + char path[PATH_MAX]; + int inotify_fd; + int inotify_wd; +}; + +static void print_memcg_counters(const struct memcg_counters *counters) +{ + printf("MEMCG events:\n"); + printf("\tlow: %ld\n", counters->low); + printf("\thigh: %ld\n", counters->high); + printf("\tmax: %ld\n", counters->max); + printf("\toom: %ld\n", counters->oom); + printf("\toom_kill: %ld\n", counters->oom_kill); + printf("\toom_group_kill: %ld\n", counters->oom_group_kill); +} + +static int get_memcg_counter(char *line, const char *name, long *counter) +{ + size_t len = strlen(name); + char *endptr; + long tmp; + + if (memcmp(line, name, len)) { + warnx("Counter line %s has wrong name, %s is expected", + line, name); + return -EINVAL; + } + + /* skip the whitespace delimiter */ + len += 1; + + errno = 0; + tmp = strtol(&line[len], &endptr, 10); + if (((tmp == LONG_MAX || tmp == LONG_MIN) && errno == ERANGE) || + (errno && !tmp)) { + warnx("Failed to parse: %s", &line[len]); + return -ERANGE; + } + + if (endptr == &line[len]) { + warnx("Not digits were found in line %s", &line[len]); + return -EINVAL; + } + + if (!(*endptr == '\0' || (*endptr == '\n' && *++endptr == '\0'))) { + warnx("Further characters after number: %s", endptr); + return -EINVAL; + } + + *counter = tmp; + + return 0; +} + +static int read_memcg_events(struct memcg_events *events, bool show_diff) +{ + FILE *fp = fopen(events->path, "re"); + size_t i; + int ret = 0; + bool any_new_events = false; + char *line = NULL; + size_t len = 0; + struct memcg_counters new_counters; + struct memcg_counters *counters = &events->counters; + struct { + const char *name; + long *new; + long *old; + } map[] = { + { + .name = "low", + .new = &new_counters.low, + .old = &counters->low, + }, + { + .name = "high", + .new = &new_counters.high, + .old = &counters->high, + }, + { + .name = "max", + .new = &new_counters.max, + .old = &counters->max, + }, + { + .name = "oom", + .new = &new_counters.oom, + .old = &counters->oom, + }, + { + .name = "oom_kill", + .new = &new_counters.oom_kill, + .old = &counters->oom_kill, + }, + { + .name = "oom_group_kill", + .new = &new_counters.oom_group_kill, + .old = &counters->oom_group_kill, + }, + }; + + if (!fp) { + warn("Failed to open memcg events file %s", events->path); + return -EBADF; + } + + /* Read new values for memcg counters */ + for (i = 0; i < ARRAY_SIZE(map); ++i) { + ssize_t nread; + + errno = 0; + nread = getline(&line, &len, fp); + if (nread == -1) { + if (errno) { + warn("Failed to read line for counter %s", + map[i].name); + ret = -EIO; + goto exit; + } + + break; + } + + ret = get_memcg_counter(line, map[i].name, map[i].new); + if (ret) { + warnx("Failed to get counter value from line %s", line); + goto exit; + } + } + + for (i = 0; i < ARRAY_SIZE(map); ++i) { + long diff; + + if (*map[i].new > *map[i].old) { + diff = *map[i].new - *map[i].old; + + if (show_diff) + printf("*** %ld MEMCG %s event%s, " + "change counter %ld => %ld\n", + diff, map[i].name, + (diff == 1) ? "" : "s", + *map[i].old, *map[i].new); + + *map[i].old += diff; + any_new_events = true; + } + } + + if (show_diff && !any_new_events) + printf("*** No new untracked memcg events available\n"); + +exit: + free(line); + fclose(fp); + + return ret; +} + +static void process_memcg_events(struct memcg_events *events, + struct inotify_event *event) +{ + int ret; + + if (events->inotify_wd != event->wd) { + warnx("Unknown inotify event %d, should be %d", event->wd, + events->inotify_wd); + return; + } + + printf("Received event in %s:\n", events->path); + + if (!(event->mask & IN_MODIFY)) { + warnx("No IN_MODIFY event, skip it"); + return; + } + + ret = read_memcg_events(events, /* show_diff = */true); + if (ret) + warnx("Can't read memcg events"); +} + +static void monitor_events(struct memcg_events *events) +{ + struct pollfd fds[1]; + int ret; + + printf("Started monitoring memory events from '%s'...\n", events->path); + + fds[0].fd = events->inotify_fd; + fds[0].events = POLLIN; + + for (;;) { + ret = poll(fds, ARRAY_SIZE(fds), -1); + if (ret < 0 && errno != EAGAIN) + err(EXIT_FAILURE, "Can't poll memcg events (%d)", ret); + + if (fds[0].revents & POLLERR) + err(EXIT_FAILURE, "Got POLLERR during monitor events"); + + if (fds[0].revents & POLLIN) { + struct inotify_event *event; + char buffer[INOTIFY_BUFFER_SIZE]; + ssize_t length; + + length = read(fds[0].fd, buffer, INOTIFY_BUFFER_SIZE); + if (length <= 0) + continue; + + event = (struct inotify_event *)buffer; + while (INOTIFY_EVENT_OK(event, length)) { + process_memcg_events(events, event); + event = INOTIFY_EVENT_NEXT(event, length); + } + } + } +} + +static int initialize_memcg_events(struct memcg_events *events, + const char *cgroup) +{ + int ret; + + memset(events, 0, sizeof(struct memcg_events)); + + ret = snprintf(events->path, PATH_MAX, + "/sys/fs/cgroup/%s/memory.events", cgroup); + if (ret >= PATH_MAX) { + warnx("Path to cgroup memory.events is too long"); + return -EMSGSIZE; + } else if (ret < 0) { + warn("Can't generate cgroup event full name"); + return ret; + } + + ret = read_memcg_events(events, /* show_diff = */false); + if (ret) { + warnx("Failed to read initial memcg events state (%d)", ret); + return ret; + } + + events->inotify_fd = inotify_init(); + if (events->inotify_fd < 0) { + warn("Failed to setup new inotify device"); + return -EMFILE; + } + + events->inotify_wd = inotify_add_watch(events->inotify_fd, + events->path, IN_MODIFY); + if (events->inotify_wd < 0) { + warn("Couldn't add monitor in dir %s", events->path); + return -EIO; + } + + printf("Initialized MEMCG events with counters:\n"); + print_memcg_counters(&events->counters); + + return 0; +} + +static void cleanup_memcg_events(struct memcg_events *events) +{ + inotify_rm_watch(events->inotify_fd, events->inotify_wd); + close(events->inotify_fd); +} + +int main(int argc, const char **argv) +{ + struct memcg_events events; + ssize_t ret; + + if (argc != 2) + errx(EXIT_FAILURE, "Usage: %s <cgroup>", argv[0]); + + ret = initialize_memcg_events(&events, argv[1]); + if (ret) + errx(EXIT_FAILURE, "Can't initialize memcg events (%zd)", ret); + + monitor_events(&events); + + cleanup_memcg_events(&events); + + printf("Exiting memcg event listener...\n"); + + return EXIT_SUCCESS; +} diff --git a/samples/check-exec/.gitignore b/samples/check-exec/.gitignore new file mode 100644 index 000000000000..cd759a19dacd --- /dev/null +++ b/samples/check-exec/.gitignore @@ -0,0 +1,2 @@ +/inc +/set-exec diff --git a/samples/check-exec/Makefile b/samples/check-exec/Makefile new file mode 100644 index 000000000000..c4f08ad0f8e3 --- /dev/null +++ b/samples/check-exec/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause + +userprogs-always-y := \ + inc \ + set-exec + +userccflags += -I usr/include + +.PHONY: all clean + +all: + $(MAKE) -C ../.. samples/check-exec/ + +clean: + $(MAKE) -C ../.. M=samples/check-exec/ clean diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c new file mode 100644 index 000000000000..7f6ef06a2f06 --- /dev/null +++ b/samples/check-exec/inc.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Very simple script interpreter that can evaluate two different commands (one + * per line): + * - "?" to initialize a counter from user's input; + * - "+" to increment the counter (which is set to 0 by default). + * + * See tools/testing/selftests/exec/check-exec-tests.sh and + * Documentation/userspace-api/check_exec.rst + * + * Copyright © 2024 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/fcntl.h> +#include <linux/prctl.h> +#include <linux/securebits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + +/* Returns 1 on error, 0 otherwise. */ +static int interpret_buffer(char *buffer, size_t buffer_size) +{ + char *line, *saveptr = NULL; + long long number = 0; + + /* Each command is the first character of a line. */ + saveptr = NULL; + line = strtok_r(buffer, "\n", &saveptr); + while (line) { + if (*line != '#' && strlen(line) != 1) { + fprintf(stderr, "# ERROR: Unknown string\n"); + return 1; + } + switch (*line) { + case '#': + /* Skips shebang and comments. */ + break; + case '+': + /* Increments and prints the number. */ + number++; + printf("%lld\n", number); + break; + case '?': + /* Reads integer from stdin. */ + fprintf(stderr, "> Enter new number: \n"); + if (scanf("%lld", &number) != 1) { + fprintf(stderr, + "# WARNING: Failed to read number from stdin\n"); + } + break; + default: + fprintf(stderr, "# ERROR: Unknown character '%c'\n", + *line); + return 1; + } + line = strtok_r(NULL, "\n", &saveptr); + } + return 0; +} + +/* Returns 1 on error, 0 otherwise. */ +static int interpret_stream(FILE *script, char *const script_name, + char *const *const envp, const bool restrict_stream) +{ + int err; + char *const script_argv[] = { script_name, NULL }; + char buf[128] = {}; + size_t buf_size = sizeof(buf); + + /* + * We pass a valid argv and envp to the kernel to emulate a native + * script execution. We must use the script file descriptor instead of + * the script path name to avoid race conditions. + */ + err = sys_execveat(fileno(script), "", script_argv, envp, + AT_EMPTY_PATH | AT_EXECVE_CHECK); + if (err && restrict_stream) { + perror("ERROR: Script execution check"); + return 1; + } + + /* Reads script. */ + buf_size = fread(buf, 1, buf_size - 1, script); + return interpret_buffer(buf, buf_size); +} + +static void print_usage(const char *argv0) +{ + fprintf(stderr, "usage: %s <script.inc> | -i | -c <command>\n\n", + argv0); + fprintf(stderr, "Example:\n"); + fprintf(stderr, " ./set-exec -fi -- ./inc -i < script-exec.inc\n"); +} + +int main(const int argc, char *const argv[], char *const *const envp) +{ + int opt; + char *cmd = NULL; + char *script_name = NULL; + bool interpret_stdin = false; + FILE *script_file = NULL; + int secbits; + bool deny_interactive, restrict_file; + size_t arg_nb; + + secbits = prctl(PR_GET_SECUREBITS); + if (secbits == -1) { + /* + * This should never happen, except with a buggy seccomp + * filter. + */ + perror("ERROR: Failed to get securebits"); + return 1; + } + + deny_interactive = !!(secbits & SECBIT_EXEC_DENY_INTERACTIVE); + restrict_file = !!(secbits & SECBIT_EXEC_RESTRICT_FILE); + + while ((opt = getopt(argc, argv, "c:i")) != -1) { + switch (opt) { + case 'c': + if (cmd) { + fprintf(stderr, "ERROR: Command already set"); + return 1; + } + cmd = optarg; + break; + case 'i': + interpret_stdin = true; + break; + default: + print_usage(argv[0]); + return 1; + } + } + + /* Checks that only one argument is used, or read stdin. */ + arg_nb = !!cmd + !!interpret_stdin; + if (arg_nb == 0 && argc == 2) { + script_name = argv[1]; + } else if (arg_nb != 1) { + print_usage(argv[0]); + return 1; + } + + if (cmd) { + /* + * Other kind of interactive interpretations should be denied + * as well (e.g. CLI arguments passing script snippets, + * environment variables interpreted as script). However, any + * way to pass script files should only be restricted according + * to restrict_file. + */ + if (deny_interactive) { + fprintf(stderr, + "ERROR: Interactive interpretation denied.\n"); + return 1; + } + + return interpret_buffer(cmd, strlen(cmd)); + } + + if (interpret_stdin && !script_name) { + script_file = stdin; + /* + * As for any execve(2) call, this path may be logged by the + * kernel. + */ + script_name = "/proc/self/fd/0"; + /* + * When stdin is used, it can point to a regular file or a + * pipe. Restrict stdin execution according to + * SECBIT_EXEC_DENY_INTERACTIVE but always allow executable + * files (which are not considered as interactive inputs). + */ + return interpret_stream(script_file, script_name, envp, + deny_interactive); + } else if (script_name && !interpret_stdin) { + /* + * In this sample, we don't pass any argument to scripts, but + * otherwise we would have to forge an argv with such + * arguments. + */ + script_file = fopen(script_name, "r"); + if (!script_file) { + perror("ERROR: Failed to open script"); + return 1; + } + /* + * Restricts file execution according to + * SECBIT_EXEC_RESTRICT_FILE. + */ + return interpret_stream(script_file, script_name, envp, + restrict_file); + } + + print_usage(argv[0]); + return 1; +} diff --git a/samples/check-exec/run-script-ask.sh b/samples/check-exec/run-script-ask.sh new file mode 100755 index 000000000000..8ef0fdc37266 --- /dev/null +++ b/samples/check-exec/run-script-ask.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env sh +# SPDX-License-Identifier: BSD-3-Clause + +DIR="$(dirname -- "$0")" + +PATH="${PATH}:${DIR}" + +set -x +"${DIR}/script-ask.inc" diff --git a/samples/check-exec/script-ask.inc b/samples/check-exec/script-ask.inc new file mode 100755 index 000000000000..720a8e649225 --- /dev/null +++ b/samples/check-exec/script-ask.inc @@ -0,0 +1,5 @@ +#!/usr/bin/env inc +# SPDX-License-Identifier: BSD-3-Clause + +? ++ diff --git a/samples/check-exec/script-exec.inc b/samples/check-exec/script-exec.inc new file mode 100755 index 000000000000..3245cb9d8dd1 --- /dev/null +++ b/samples/check-exec/script-exec.inc @@ -0,0 +1,4 @@ +#!/usr/bin/env inc +# SPDX-License-Identifier: BSD-3-Clause + ++ diff --git a/samples/check-exec/script-noexec.inc b/samples/check-exec/script-noexec.inc new file mode 100644 index 000000000000..3245cb9d8dd1 --- /dev/null +++ b/samples/check-exec/script-noexec.inc @@ -0,0 +1,4 @@ +#!/usr/bin/env inc +# SPDX-License-Identifier: BSD-3-Clause + ++ diff --git a/samples/check-exec/set-exec.c b/samples/check-exec/set-exec.c new file mode 100644 index 000000000000..ba86a60a20dd --- /dev/null +++ b/samples/check-exec/set-exec.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Simple tool to set SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, + * before executing a command. + * + * Copyright © 2024 Microsoft Corporation + */ + +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <errno.h> +#include <linux/prctl.h> +#include <linux/securebits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +static void print_usage(const char *argv0) +{ + fprintf(stderr, "usage: %s -f|-i -- <cmd> [args]...\n\n", argv0); + fprintf(stderr, "Execute a command with\n"); + fprintf(stderr, "- SECBIT_EXEC_RESTRICT_FILE set: -f\n"); + fprintf(stderr, "- SECBIT_EXEC_DENY_INTERACTIVE set: -i\n"); +} + +int main(const int argc, char *const argv[], char *const *const envp) +{ + const char *cmd_path; + char *const *cmd_argv; + int opt, secbits_cur, secbits_new; + bool has_policy = false; + + secbits_cur = prctl(PR_GET_SECUREBITS); + if (secbits_cur == -1) { + /* + * This should never happen, except with a buggy seccomp + * filter. + */ + perror("ERROR: Failed to get securebits"); + return 1; + } + + secbits_new = secbits_cur; + while ((opt = getopt(argc, argv, "fi")) != -1) { + switch (opt) { + case 'f': + secbits_new |= SECBIT_EXEC_RESTRICT_FILE | + SECBIT_EXEC_RESTRICT_FILE_LOCKED; + has_policy = true; + break; + case 'i': + secbits_new |= SECBIT_EXEC_DENY_INTERACTIVE | + SECBIT_EXEC_DENY_INTERACTIVE_LOCKED; + has_policy = true; + break; + default: + print_usage(argv[0]); + return 1; + } + } + + if (!argv[optind] || !has_policy) { + print_usage(argv[0]); + return 1; + } + + if (secbits_cur != secbits_new && + prctl(PR_SET_SECUREBITS, secbits_new)) { + perror("Failed to set secure bit(s)."); + fprintf(stderr, + "Hint: The running kernel may not support this feature.\n"); + return 1; + } + + cmd_path = argv[optind]; + cmd_argv = argv + optind; + fprintf(stderr, "Executing command...\n"); + execvpe(cmd_path, cmd_argv, envp); + fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path, + strerror(errno)); + return 1; +} diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c index 37a657b25d58..fd5d163828c5 100644 --- a/samples/configfs/configfs_sample.c +++ b/samples/configfs/configfs_sample.c @@ -364,4 +364,5 @@ static void __exit configfs_example_exit(void) module_init(configfs_example_init); module_exit(configfs_example_exit); +MODULE_DESCRIPTION("Sample configfs module"); MODULE_LICENSE("GPL"); diff --git a/samples/connector/cn_test.c b/samples/connector/cn_test.c index 0958a171d048..73d50b4aebb6 100644 --- a/samples/connector/cn_test.c +++ b/samples/connector/cn_test.c @@ -172,7 +172,7 @@ static int cn_test_init(void) static void cn_test_fini(void) { - del_timer_sync(&cn_test_timer); + timer_delete_sync(&cn_test_timer); cn_del_callback(&cn_test_id); cn_test_id.val--; cn_del_callback(&cn_test_id); diff --git a/samples/damon/Kconfig b/samples/damon/Kconfig new file mode 100644 index 000000000000..cbf96fd8a8bf --- /dev/null +++ b/samples/damon/Kconfig @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "DAMON Samples" + +config SAMPLE_DAMON_WSSE + bool "DAMON sample module for working set size estimation" + depends on DAMON && DAMON_VADDR + help + This builds DAMON sample module for working set size estimation. + + The module receives a pid, monitor access to the virtual address + space of the process, estimate working set size of the process, and + repeatedly prints the size on the kernel log. + + If unsure, say N. + +config SAMPLE_DAMON_PRCL + bool "DAMON sample module for access-aware proactive reclamation" + depends on DAMON && DAMON_VADDR + help + This builds DAMON sample module for access-aware proactive + reclamation. + + The module receives a pid, monitor access to the virtual address + space of the process, find memory regions that not accessed, and + proactively reclaim the regions. + + If unsure, say N. + +config SAMPLE_DAMON_MTIER + bool "DAMON sample module for memory tiering" + depends on DAMON && DAMON_PADDR + help + Thps builds DAMON sample module for memory tierign. + + The module assumes the system is constructed with two NUMA nodes, + which seems as local and remote nodes to all CPUs. For example, + node0 is for DDR5 DRAMs connected via DIMM, while node1 is for DDR4 + DRAMs connected via CXL. + + If unsure, say N. + +endmenu diff --git a/samples/damon/Makefile b/samples/damon/Makefile new file mode 100644 index 000000000000..72f68cbf422a --- /dev/null +++ b/samples/damon/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_SAMPLE_DAMON_WSSE) += wsse.o +obj-$(CONFIG_SAMPLE_DAMON_PRCL) += prcl.o +obj-$(CONFIG_SAMPLE_DAMON_MTIER) += mtier.o diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c new file mode 100644 index 000000000000..775838a23d93 --- /dev/null +++ b/samples/damon/mtier.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * memory tiering: migrate cold pages in node 0 and hot pages in node 1 to node + * 1 and node 0, respectively. Adjust the hotness/coldness threshold aiming + * resulting 99.6 % node 0 utilization ratio. + */ + +#define pr_fmt(fmt) "damon_sample_mtier: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_sample_mtier." + +static unsigned long node0_start_addr __read_mostly; +module_param(node0_start_addr, ulong, 0600); + +static unsigned long node0_end_addr __read_mostly; +module_param(node0_end_addr, ulong, 0600); + +static unsigned long node1_start_addr __read_mostly; +module_param(node1_start_addr, ulong, 0600); + +static unsigned long node1_end_addr __read_mostly; +module_param(node1_end_addr, ulong, 0600); + +static unsigned long node0_mem_used_bp __read_mostly = 9970; +module_param(node0_mem_used_bp, ulong, 0600); + +static unsigned long node0_mem_free_bp __read_mostly = 50; +module_param(node0_mem_free_bp, ulong, 0600); + +static int damon_sample_mtier_enable_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enabled_param_ops = { + .set = damon_sample_mtier_enable_store, + .get = param_get_bool, +}; + +static bool enabled __read_mostly; +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, "Enable or disable DAMON_SAMPLE_MTIER"); + +static bool detect_node_addresses __read_mostly; +module_param(detect_node_addresses, bool, 0600); + +static struct damon_ctx *ctxs[2]; + +struct region_range { + phys_addr_t start; + phys_addr_t end; +}; + +static int nid_to_phys(int target_node, struct region_range *range) +{ + if (!node_online(target_node)) { + pr_err("NUMA node %d is not online\n", target_node); + return -EINVAL; + } + + range->start = PFN_PHYS(node_start_pfn(target_node)); + range->end = PFN_PHYS(node_end_pfn(target_node)); + + return 0; +} + +static struct damon_ctx *damon_sample_mtier_build_ctx(bool promote) +{ + struct damon_ctx *ctx; + struct damon_attrs attrs; + struct damon_target *target; + struct damon_region *region; + struct damos *scheme; + struct damos_quota_goal *quota_goal; + struct damos_filter *filter; + struct region_range addr; + int ret; + + ctx = damon_new_ctx(); + if (!ctx) + return NULL; + attrs = (struct damon_attrs) { + .sample_interval = 5 * USEC_PER_MSEC, + .aggr_interval = 100 * USEC_PER_MSEC, + .ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC, + .min_nr_regions = 10, + .max_nr_regions = 1000, + }; + + /* + * auto-tune sampling and aggregation interval aiming 4% DAMON-observed + * accesses ratio, keeping sampling interval in [5ms, 10s] range. + */ + attrs.intervals_goal = (struct damon_intervals_goal) { + .access_bp = 400, .aggrs = 3, + .min_sample_us = 5000, .max_sample_us = 10000000, + }; + if (damon_set_attrs(ctx, &attrs)) + goto free_out; + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) + goto free_out; + + target = damon_new_target(); + if (!target) + goto free_out; + damon_add_target(ctx, target); + + if (detect_node_addresses) { + ret = promote ? nid_to_phys(1, &addr) : nid_to_phys(0, &addr); + if (ret) + goto free_out; + } else { + addr.start = promote ? node1_start_addr : node0_start_addr; + addr.end = promote ? node1_end_addr : node0_end_addr; + } + + region = damon_new_region(addr.start, addr.end); + if (!region) + goto free_out; + damon_add_region(region, target); + + scheme = damon_new_scheme( + /* access pattern */ + &(struct damos_access_pattern) { + .min_sz_region = PAGE_SIZE, + .max_sz_region = ULONG_MAX, + .min_nr_accesses = promote ? 1 : 0, + .max_nr_accesses = promote ? UINT_MAX : 0, + .min_age_region = 0, + .max_age_region = UINT_MAX}, + /* action */ + promote ? DAMOS_MIGRATE_HOT : DAMOS_MIGRATE_COLD, + 1000000, /* apply interval (1s) */ + &(struct damos_quota){ + /* 200 MiB per sec by most */ + .reset_interval = 1000, + .sz = 200 * 1024 * 1024, + /* ignore size of region when prioritizing */ + .weight_sz = 0, + .weight_nr_accesses = 100, + .weight_age = 100, + }, + &(struct damos_watermarks){}, + promote ? 0 : 1); /* migrate target node id */ + if (!scheme) + goto free_out; + damon_set_schemes(ctx, &scheme, 1); + quota_goal = damos_new_quota_goal( + promote ? DAMOS_QUOTA_NODE_MEM_USED_BP : + DAMOS_QUOTA_NODE_MEM_FREE_BP, + promote ? node0_mem_used_bp : node0_mem_free_bp); + if (!quota_goal) + goto free_out; + quota_goal->nid = 0; + damos_add_quota_goal(&scheme->quota, quota_goal); + filter = damos_new_filter(DAMOS_FILTER_TYPE_YOUNG, true, promote); + if (!filter) + goto free_out; + damos_add_filter(scheme, filter); + return ctx; +free_out: + damon_destroy_ctx(ctx); + return NULL; +} + +static int damon_sample_mtier_start(void) +{ + struct damon_ctx *ctx; + + ctx = damon_sample_mtier_build_ctx(true); + if (!ctx) + return -ENOMEM; + ctxs[0] = ctx; + ctx = damon_sample_mtier_build_ctx(false); + if (!ctx) { + damon_destroy_ctx(ctxs[0]); + return -ENOMEM; + } + ctxs[1] = ctx; + return damon_start(ctxs, 2, true); +} + +static void damon_sample_mtier_stop(void) +{ + damon_stop(ctxs, 2); + damon_destroy_ctx(ctxs[0]); + damon_destroy_ctx(ctxs[1]); +} + +static int damon_sample_mtier_enable_store( + const char *val, const struct kernel_param *kp) +{ + bool is_enabled = enabled; + int err; + + err = kstrtobool(val, &enabled); + if (err) + return err; + + if (enabled == is_enabled) + return 0; + + if (!damon_initialized()) + return 0; + + if (enabled) { + err = damon_sample_mtier_start(); + if (err) + enabled = false; + return err; + } + damon_sample_mtier_stop(); + return 0; +} + +static int __init damon_sample_mtier_init(void) +{ + int err = 0; + + if (!damon_initialized()) { + if (enabled) + enabled = false; + return -ENOMEM; + } + + if (enabled) { + err = damon_sample_mtier_start(); + if (err) + enabled = false; + } + return 0; +} + +module_init(damon_sample_mtier_init); diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c new file mode 100644 index 000000000000..b7c50f2656ce --- /dev/null +++ b/samples/damon/prcl.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * proactive reclamation: monitor access pattern of a given process, find + * regions that seems not accessed, and proactively page out the regions. + */ + +#define pr_fmt(fmt) "damon_sample_prcl: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_sample_prcl." + +static int target_pid __read_mostly; +module_param(target_pid, int, 0600); + +static int damon_sample_prcl_enable_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enabled_param_ops = { + .set = damon_sample_prcl_enable_store, + .get = param_get_bool, +}; + +static bool enabled __read_mostly; +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, "Enable or disable DAMON_SAMPLE_PRCL"); + +static struct damon_ctx *ctx; +static struct pid *target_pidp; + +static int damon_sample_prcl_repeat_call_fn(void *data) +{ + struct damon_ctx *c = data; + struct damon_target *t; + + damon_for_each_target(t, c) { + struct damon_region *r; + unsigned long wss = 0; + + damon_for_each_region(r, t) { + if (r->nr_accesses > 0) + wss += r->ar.end - r->ar.start; + } + pr_info("wss: %lu\n", wss); + } + return 0; +} + +static struct damon_call_control repeat_call_control = { + .fn = damon_sample_prcl_repeat_call_fn, + .repeat = true, +}; + +static int damon_sample_prcl_start(void) +{ + struct damon_target *target; + struct damos *scheme; + int err; + + pr_info("start\n"); + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + if (damon_select_ops(ctx, DAMON_OPS_VADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + target_pidp = find_get_pid(target_pid); + if (!target_pidp) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + target->pid = target_pidp; + + scheme = damon_new_scheme( + &(struct damos_access_pattern) { + .min_sz_region = PAGE_SIZE, + .max_sz_region = ULONG_MAX, + .min_nr_accesses = 0, + .max_nr_accesses = 0, + .min_age_region = 50, + .max_age_region = UINT_MAX}, + DAMOS_PAGEOUT, + 0, + &(struct damos_quota){}, + &(struct damos_watermarks){}, + NUMA_NO_NODE); + if (!scheme) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_set_schemes(ctx, &scheme, 1); + + err = damon_start(&ctx, 1, true); + if (err) + return err; + + repeat_call_control.data = ctx; + return damon_call(ctx, &repeat_call_control); +} + +static void damon_sample_prcl_stop(void) +{ + pr_info("stop\n"); + if (ctx) { + damon_stop(&ctx, 1); + damon_destroy_ctx(ctx); + } +} + +static int damon_sample_prcl_enable_store( + const char *val, const struct kernel_param *kp) +{ + bool is_enabled = enabled; + int err; + + err = kstrtobool(val, &enabled); + if (err) + return err; + + if (enabled == is_enabled) + return 0; + + if (!damon_initialized()) + return 0; + + if (enabled) { + err = damon_sample_prcl_start(); + if (err) + enabled = false; + return err; + } + damon_sample_prcl_stop(); + return 0; +} + +static int __init damon_sample_prcl_init(void) +{ + int err = 0; + + if (!damon_initialized()) { + if (enabled) + enabled = false; + return -ENOMEM; + } + + if (enabled) { + err = damon_sample_prcl_start(); + if (err) + enabled = false; + } + return 0; +} + +module_init(damon_sample_prcl_init); diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c new file mode 100644 index 000000000000..799ad4443943 --- /dev/null +++ b/samples/damon/wsse.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * working set size estimation: monitor access pattern of given process and + * print estimated working set size (total size of regions that showing some + * access). + */ + +#define pr_fmt(fmt) "damon_sample_wsse: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_sample_wsse." + +static int target_pid __read_mostly; +module_param(target_pid, int, 0600); + +static int damon_sample_wsse_enable_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enabled_param_ops = { + .set = damon_sample_wsse_enable_store, + .get = param_get_bool, +}; + +static bool enabled __read_mostly; +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, "Enable or disable DAMON_SAMPLE_WSSE"); + +static struct damon_ctx *ctx; +static struct pid *target_pidp; + +static int damon_sample_wsse_repeat_call_fn(void *data) +{ + struct damon_ctx *c = data; + struct damon_target *t; + + damon_for_each_target(t, c) { + struct damon_region *r; + unsigned long wss = 0; + + damon_for_each_region(r, t) { + if (r->nr_accesses > 0) + wss += r->ar.end - r->ar.start; + } + pr_info("wss: %lu\n", wss); + } + return 0; +} + +static struct damon_call_control repeat_call_control = { + .fn = damon_sample_wsse_repeat_call_fn, + .repeat = true, +}; + +static int damon_sample_wsse_start(void) +{ + struct damon_target *target; + int err; + + pr_info("start\n"); + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + if (damon_select_ops(ctx, DAMON_OPS_VADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + target_pidp = find_get_pid(target_pid); + if (!target_pidp) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + target->pid = target_pidp; + + err = damon_start(&ctx, 1, true); + if (err) + return err; + repeat_call_control.data = ctx; + return damon_call(ctx, &repeat_call_control); +} + +static void damon_sample_wsse_stop(void) +{ + pr_info("stop\n"); + if (ctx) { + damon_stop(&ctx, 1); + damon_destroy_ctx(ctx); + } +} + +static int damon_sample_wsse_enable_store( + const char *val, const struct kernel_param *kp) +{ + bool is_enabled = enabled; + int err; + + err = kstrtobool(val, &enabled); + if (err) + return err; + + if (enabled == is_enabled) + return 0; + + if (!damon_initialized()) + return 0; + + if (enabled) { + err = damon_sample_wsse_start(); + if (err) + enabled = false; + return err; + } + damon_sample_wsse_stop(); + return 0; +} + +static int __init damon_sample_wsse_init(void) +{ + int err = 0; + + if (!damon_initialized()) { + err = -ENOMEM; + if (enabled) + enabled = false; + } + + if (enabled) { + err = damon_sample_wsse_start(); + if (err) + enabled = false; + } + return err; +} + +module_init(damon_sample_wsse_init); diff --git a/samples/fanotify/fs-monitor.c b/samples/fanotify/fs-monitor.c index 608db24c471e..28c0a652ffeb 100644 --- a/samples/fanotify/fs-monitor.c +++ b/samples/fanotify/fs-monitor.c @@ -12,6 +12,9 @@ #include <sys/fanotify.h> #include <sys/types.h> #include <unistd.h> +#ifndef __GLIBC__ +#include <asm-generic/int-ll64.h> +#endif #ifndef FAN_FS_ERROR #define FAN_FS_ERROR 0x00008000 @@ -95,7 +98,11 @@ static void handle_notifications(char *buffer, int len) fid = (struct fanotify_event_info_fid *) info; printf("\tfsid: %x%x\n", +#if defined(__GLIBC__) fid->fsid.val[0], fid->fsid.val[1]); +#else + fid->fsid.__val[0], fid->fsid.__val[1]); +#endif print_fh((struct file_handle *) &fid->handle); break; diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c index e22da8573116..bfe98ce826f3 100644 --- a/samples/fprobe/fprobe_example.c +++ b/samples/fprobe/fprobe_example.c @@ -48,7 +48,9 @@ static void show_backtrace(void) stack_trace_print(stacks, len, 24); } -static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) +static int sample_entry_handler(struct fprobe *fp, unsigned long ip, + unsigned long ret_ip, + struct ftrace_regs *fregs, void *data) { if (use_trace) /* @@ -61,11 +63,14 @@ static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_ nhit++; if (stackdump) show_backtrace(); + return 0; } -static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) +static void sample_exit_handler(struct fprobe *fp, unsigned long ip, + unsigned long ret_ip, struct ftrace_regs *regs, + void *data) { - unsigned long rip = instruction_pointer(regs); + unsigned long rip = ret_ip; if (use_trace) /* @@ -145,4 +150,5 @@ static void __exit fprobe_exit(void) module_init(fprobe_init) module_exit(fprobe_exit) +MODULE_DESCRIPTION("sample kernel module showing the use of fprobe"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile index faf8cdb79c5f..589baf2ec4e3 100644 --- a/samples/ftrace/Makefile +++ b/samples/ftrace/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o +obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace-ops.o CFLAGS_sample-trace-array.o := -I$(src) obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index de5a0f67f320..da3a9f2091f5 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -2,8 +2,9 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/ftrace.h> +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include <asm/asm-offsets.h> -#include <asm/nospec-branch.h> +#endif extern void my_direct_func1(void); extern void my_direct_func2(void); @@ -23,9 +24,45 @@ extern void my_tramp2(void *); static unsigned long my_ip = (unsigned long)schedule; +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi sp,sp,-2*"SZREG"\n" +" "REG_S" t0,0*"SZREG"(sp)\n" +" "REG_S" ra,1*"SZREG"(sp)\n" +" call my_direct_func1\n" +" "REG_L" t0,0*"SZREG"(sp)\n" +" "REG_L" ra,1*"SZREG"(sp)\n" +" addi sp,sp,2*"SZREG"\n" +" jr t0\n" +" .size my_tramp1, .-my_tramp1\n" +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" + +" my_tramp2:\n" +" addi sp,sp,-2*"SZREG"\n" +" "REG_S" t0,0*"SZREG"(sp)\n" +" "REG_S" ra,1*"SZREG"(sp)\n" +" call my_direct_func2\n" +" "REG_L" t0,0*"SZREG"(sp)\n" +" "REG_L" ra,1*"SZREG"(sp)\n" +" addi sp,sp,2*"SZREG"\n" +" jr t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> +#include <asm/nospec-branch.h> asm ( " .pushsection .text, \"ax\", @progbits\n" @@ -38,8 +75,8 @@ asm ( CALL_DEPTH_ACCOUNT " call my_direct_func1\n" " leave\n" -" .size my_tramp1, .-my_tramp1\n" ASM_RET +" .size my_tramp1, .-my_tramp1\n" " .type my_tramp2, @function\n" " .globl my_tramp2\n" @@ -96,6 +133,157 @@ asm ( #endif /* CONFIG_S390 */ +#ifdef CONFIG_ARM64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:" +" hint 34\n" // bti c +" sub sp, sp, #16\n" +" stp x9, x30, [sp]\n" +" bl my_direct_func1\n" +" ldp x30, x9, [sp]\n" +" add sp, sp, #16\n" +" ret x9\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:" +" hint 34\n" // bti c +" sub sp, sp, #16\n" +" stp x9, x30, [sp]\n" +" bl my_direct_func2\n" +" ldp x30, x9, [sp]\n" +" add sp, sp, #16\n" +" ret x9\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_ARM64 */ + +#ifdef CONFIG_LOONGARCH + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi.d $sp, $sp, -16\n" +" st.d $t0, $sp, 0\n" +" st.d $ra, $sp, 8\n" +" bl my_direct_func1\n" +" ld.d $t0, $sp, 0\n" +" ld.d $ra, $sp, 8\n" +" addi.d $sp, $sp, 16\n" +" jr $t0\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" addi.d $sp, $sp, -16\n" +" st.d $t0, $sp, 0\n" +" st.d $ra, $sp, 8\n" +" bl my_direct_func2\n" +" ld.d $t0, $sp, 0\n" +" ld.d $ra, $sp, 8\n" +" addi.d $sp, $sp, 16\n" +" jr $t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_LOONGARCH */ + +#ifdef CONFIG_PPC +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE +" bl my_direct_func1\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE +" bl my_direct_func2\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + +static struct ftrace_ops direct; + static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, @@ -114,7 +302,7 @@ static int simple_thread(void *arg) if (ret) continue; t ^= 1; - ret = modify_ftrace_direct(my_ip, my_tramp, tramps[t]); + ret = modify_ftrace_direct(&direct, tramps[t]); if (!ret) my_tramp = tramps[t]; WARN_ON_ONCE(ret); @@ -129,7 +317,9 @@ static int __init ftrace_direct_init(void) { int ret; - ret = register_ftrace_direct(my_ip, my_tramp); + ftrace_set_filter_ip(&direct, (unsigned long) my_ip, 0, 0); + ret = register_ftrace_direct(&direct, my_tramp); + if (!ret) simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn"); return ret; @@ -138,7 +328,7 @@ static int __init ftrace_direct_init(void) static void __exit ftrace_direct_exit(void) { kthread_stop(simple_tsk); - unregister_ftrace_direct(my_ip, my_tramp); + unregister_ftrace_direct(&direct, my_tramp, true); } module_init(ftrace_direct_init); diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index a825dbd2c9cf..8f7986d698d8 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -2,8 +2,9 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/ftrace.h> +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include <asm/asm-offsets.h> -#include <asm/nospec-branch.h> +#endif extern void my_direct_func1(unsigned long ip); extern void my_direct_func2(unsigned long ip); @@ -21,9 +22,51 @@ void my_direct_func2(unsigned long ip) extern void my_tramp1(void *); extern void my_tramp2(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func1\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func2\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> +#include <asm/nospec-branch.h> asm ( " .pushsection .text, \"ax\", @progbits\n" @@ -103,6 +146,184 @@ asm ( #endif /* CONFIG_S390 */ +#ifdef CONFIG_ARM64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:" +" hint 34\n" // bti c +" sub sp, sp, #32\n" +" stp x9, x30, [sp]\n" +" str x0, [sp, #16]\n" +" mov x0, x30\n" +" bl my_direct_func1\n" +" ldp x30, x9, [sp]\n" +" ldr x0, [sp, #16]\n" +" add sp, sp, #32\n" +" ret x9\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:" +" hint 34\n" // bti c +" sub sp, sp, #32\n" +" stp x9, x30, [sp]\n" +" str x0, [sp, #16]\n" +" mov x0, x30\n" +" bl my_direct_func2\n" +" ldp x30, x9, [sp]\n" +" ldr x0, [sp, #16]\n" +" add sp, sp, #32\n" +" ret x9\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_ARM64 */ + +#ifdef CONFIG_LOONGARCH +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi.d $sp, $sp, -32\n" +" st.d $a0, $sp, 0\n" +" st.d $t0, $sp, 8\n" +" st.d $ra, $sp, 16\n" +" move $a0, $t0\n" +" bl my_direct_func1\n" +" ld.d $a0, $sp, 0\n" +" ld.d $t0, $sp, 8\n" +" ld.d $ra, $sp, 16\n" +" addi.d $sp, $sp, 32\n" +" jr $t0\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" addi.d $sp, $sp, -32\n" +" st.d $a0, $sp, 0\n" +" st.d $t0, $sp, 8\n" +" st.d $ra, $sp, 16\n" +" move $a0, $t0\n" +" bl my_direct_func2\n" +" ld.d $a0, $sp, 0\n" +" ld.d $t0, $sp, 8\n" +" ld.d $ra, $sp, 16\n" +" addi.d $sp, $sp, 32\n" +" jr $t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_LOONGARCH */ + +#ifdef CONFIG_PPC +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#define PPC_FTRACE_RECOVER_IP \ +" lwz 8, 4(3)\n" \ +" li 9, 6\n" \ +" slw 8, 8, 9\n" \ +" sraw 8, 8, 9\n" \ +" add 3, 3, 8\n" \ +" addi 3, 3, 4\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#define PPC_FTRACE_RECOVER_IP "" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func1\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func2\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC + " .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, @@ -123,7 +344,7 @@ static int simple_thread(void *arg) if (ret) continue; t ^= 1; - ret = modify_ftrace_direct_multi(&direct, tramps[t]); + ret = modify_ftrace_direct(&direct, tramps[t]); if (!ret) my_tramp = tramps[t]; WARN_ON_ONCE(ret); @@ -141,7 +362,7 @@ static int __init ftrace_direct_multi_init(void) ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0); - ret = register_ftrace_direct_multi(&direct, my_tramp); + ret = register_ftrace_direct(&direct, my_tramp); if (!ret) simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn"); @@ -151,13 +372,12 @@ static int __init ftrace_direct_multi_init(void) static void __exit ftrace_direct_multi_exit(void) { kthread_stop(simple_tsk); - unregister_ftrace_direct_multi(&direct, my_tramp); - ftrace_free_filter(&direct); + unregister_ftrace_direct(&direct, my_tramp, true); } module_init(ftrace_direct_multi_init); module_exit(ftrace_direct_multi_exit); MODULE_AUTHOR("Jiri Olsa"); -MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()"); +MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct()"); MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index d955a2650605..db326c81a27d 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -4,8 +4,9 @@ #include <linux/mm.h> /* for handle_mm_fault() */ #include <linux/ftrace.h> #include <linux/sched/stat.h> +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include <asm/asm-offsets.h> -#include <asm/nospec-branch.h> +#endif extern void my_direct_func(unsigned long ip); @@ -16,9 +17,35 @@ void my_direct_func(unsigned long ip) extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> +#include <asm/nospec-branch.h> asm ( " .pushsection .text, \"ax\", @progbits\n" @@ -66,6 +93,131 @@ asm ( #endif /* CONFIG_S390 */ +#ifdef CONFIG_ARM64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:" +" hint 34\n" // bti c +" sub sp, sp, #32\n" +" stp x9, x30, [sp]\n" +" str x0, [sp, #16]\n" +" mov x0, x30\n" +" bl my_direct_func\n" +" ldp x30, x9, [sp]\n" +" ldr x0, [sp, #16]\n" +" add sp, sp, #32\n" +" ret x9\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_ARM64 */ + +#ifdef CONFIG_LOONGARCH + +#include <asm/asm.h> +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi.d $sp, $sp, -32\n" +" st.d $a0, $sp, 0\n" +" st.d $t0, $sp, 8\n" +" st.d $ra, $sp, 16\n" +" move $a0, $t0\n" +" bl my_direct_func\n" +" ld.d $a0, $sp, 0\n" +" ld.d $t0, $sp, 8\n" +" ld.d $ra, $sp, 16\n" +" addi.d $sp, $sp, 32\n" +" jr $t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_LOONGARCH */ + +#ifdef CONFIG_PPC +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#define PPC_FTRACE_RECOVER_IP \ +" lwz 8, 4(3)\n" \ +" li 9, 6\n" \ +" slw 8, 8, 9\n" \ +" sraw 8, 8, 9\n" \ +" add 3, 3, 8\n" \ +" addi 3, 3, 4\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#define PPC_FTRACE_RECOVER_IP "" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_multi_init(void) @@ -73,13 +225,12 @@ static int __init ftrace_direct_multi_init(void) ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0); - return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); + return register_ftrace_direct(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_multi_exit(void) { - unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp); - ftrace_free_filter(&direct); + unregister_ftrace_direct(&direct, (unsigned long) my_tramp, true); } module_init(ftrace_direct_multi_init); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index e13fb59a2b47..3d0fa260332d 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -3,24 +3,54 @@ #include <linux/mm.h> /* for handle_mm_fault() */ #include <linux/ftrace.h> +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include <asm/asm-offsets.h> -#include <asm/nospec-branch.h> +#endif -extern void my_direct_func(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); +extern void my_direct_func(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, struct pt_regs *regs); -void my_direct_func(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +void my_direct_func(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, struct pt_regs *regs) { - trace_printk("handle mm fault vma=%p address=%lx flags=%x\n", - vma, address, flags); + trace_printk("handle mm fault vma=%p address=%lx flags=%x regs=%p\n", + vma, address, flags, regs); } extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-5*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" a1,1*"SZREG"(sp)\n" +" "REG_S" a2,2*"SZREG"(sp)\n" +" "REG_S" t0,3*"SZREG"(sp)\n" +" "REG_S" ra,4*"SZREG"(sp)\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" a1,1*"SZREG"(sp)\n" +" "REG_L" a2,2*"SZREG"(sp)\n" +" "REG_L" t0,3*"SZREG"(sp)\n" +" "REG_L" ra,4*"SZREG"(sp)\n" +" addi sp,sp,5*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> +#include <asm/nospec-branch.h> asm ( " .pushsection .text, \"ax\", @progbits\n" @@ -34,7 +64,9 @@ asm ( " pushq %rdi\n" " pushq %rsi\n" " pushq %rdx\n" +" pushq %rcx\n" " call my_direct_func\n" +" popq %rcx\n" " popq %rdx\n" " popq %rsi\n" " popq %rdi\n" @@ -70,16 +102,150 @@ asm ( #endif /* CONFIG_S390 */ +#ifdef CONFIG_ARM64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:" +" hint 34\n" // bti c +" sub sp, sp, #48\n" +" stp x9, x30, [sp]\n" +" stp x0, x1, [sp, #16]\n" +" stp x2, x3, [sp, #32]\n" +" bl my_direct_func\n" +" ldp x30, x9, [sp]\n" +" ldp x0, x1, [sp, #16]\n" +" ldp x2, x3, [sp, #32]\n" +" add sp, sp, #48\n" +" ret x9\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_ARM64 */ + +#ifdef CONFIG_LOONGARCH + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi.d $sp, $sp, -48\n" +" st.d $a0, $sp, 0\n" +" st.d $a1, $sp, 8\n" +" st.d $a2, $sp, 16\n" +" st.d $t0, $sp, 24\n" +" st.d $ra, $sp, 32\n" +" bl my_direct_func\n" +" ld.d $a0, $sp, 0\n" +" ld.d $a1, $sp, 8\n" +" ld.d $a2, $sp, 16\n" +" ld.d $t0, $sp, 24\n" +" ld.d $ra, $sp, 32\n" +" addi.d $sp, $sp, 48\n" +" jr $t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_LOONGARCH */ + +#ifdef CONFIG_PPC +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 64 +#define STACK_FRAME_ARG1 32 +#define STACK_FRAME_ARG2 40 +#define STACK_FRAME_ARG3 48 +#define STACK_FRAME_ARG4 56 +#else +#define STACK_FRAME_SIZE 32 +#define STACK_FRAME_ARG1 16 +#define STACK_FRAME_ARG2 20 +#define STACK_FRAME_ARG3 24 +#define STACK_FRAME_ARG4 28 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n" + PPC_STL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n" + PPC_STL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n" + PPC_STL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n" +" bl my_direct_func\n" + PPC_LL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n" + PPC_LL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n" + PPC_LL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n" + PPC_LL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + +static struct ftrace_ops direct; + static int __init ftrace_direct_init(void) { - return register_ftrace_direct((unsigned long)handle_mm_fault, - (unsigned long)my_tramp); + ftrace_set_filter_ip(&direct, (unsigned long) handle_mm_fault, 0, 0); + + return register_ftrace_direct(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_exit(void) { - unregister_ftrace_direct((unsigned long)handle_mm_fault, - (unsigned long)my_tramp); + unregister_ftrace_direct(&direct, (unsigned long)my_tramp, true); } module_init(ftrace_direct_init); diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 1f769d0db20f..956834b0d19a 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -3,8 +3,9 @@ #include <linux/sched.h> /* for wake_up_process() */ #include <linux/ftrace.h> +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include <asm/asm-offsets.h> -#include <asm/nospec-branch.h> +#endif extern void my_direct_func(struct task_struct *p); @@ -15,9 +16,34 @@ void my_direct_func(struct task_struct *p) extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> +#include <asm/nospec-branch.h> asm ( " .pushsection .text, \"ax\", @progbits\n" @@ -63,16 +89,130 @@ asm ( #endif /* CONFIG_S390 */ +#ifdef CONFIG_ARM64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:" +" hint 34\n" // bti c +" sub sp, sp, #32\n" +" stp x9, x30, [sp]\n" +" str x0, [sp, #16]\n" +" bl my_direct_func\n" +" ldp x30, x9, [sp]\n" +" ldr x0, [sp, #16]\n" +" add sp, sp, #32\n" +" ret x9\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_ARM64 */ + +#ifdef CONFIG_LOONGARCH + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi.d $sp, $sp, -32\n" +" st.d $a0, $sp, 0\n" +" st.d $t0, $sp, 8\n" +" st.d $ra, $sp, 16\n" +" bl my_direct_func\n" +" ld.d $a0, $sp, 0\n" +" ld.d $t0, $sp, 8\n" +" ld.d $ra, $sp, 16\n" +" addi.d $sp, $sp, 32\n" +" jr $t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_LOONGARCH */ + +#ifdef CONFIG_PPC +#include <asm/ppc_asm.h> + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" bl my_direct_func\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + +static struct ftrace_ops direct; + static int __init ftrace_direct_init(void) { - return register_ftrace_direct((unsigned long)wake_up_process, - (unsigned long)my_tramp); + ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); + + return register_ftrace_direct(&direct, (unsigned long) my_tramp); } static void __exit ftrace_direct_exit(void) { - unregister_ftrace_direct((unsigned long)wake_up_process, - (unsigned long)my_tramp); + unregister_ftrace_direct(&direct, (unsigned long)my_tramp, true); } module_init(ftrace_direct_init); diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c new file mode 100644 index 000000000000..68d6685c80bd --- /dev/null +++ b/samples/ftrace/ftrace-ops.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/ftrace.h> +#include <linux/ktime.h> +#include <linux/module.h> + +#include <asm/barrier.h> + +/* + * Arbitrary large value chosen to be sufficiently large to minimize noise but + * sufficiently small to complete quickly. + */ +static unsigned int nr_function_calls = 100000; +module_param(nr_function_calls, uint, 0); +MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee"); + +/* + * The number of ops associated with a call site affects whether a tracer can + * be called directly or whether it's necessary to go via the list func, which + * can be significantly more expensive. + */ +static unsigned int nr_ops_relevant = 1; +module_param(nr_ops_relevant, uint, 0); +MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the relevant tracee"); + +/* + * On architectures where all call sites share the same trampoline, having + * tracers enabled for distinct functions can force the use of the list func + * and incur overhead for all call sites. + */ +static unsigned int nr_ops_irrelevant; +module_param(nr_ops_irrelevant, uint, 0); +MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the irrelevant tracee"); + +/* + * On architectures with DYNAMIC_FTRACE_WITH_REGS, saving the full pt_regs can + * be more expensive than only saving the minimal necessary regs. + */ +static bool save_regs; +module_param(save_regs, bool, 0); +MODULE_PARM_DESC(save_regs, "Register ops with FTRACE_OPS_FL_SAVE_REGS (save all registers in the trampoline)"); + +static bool assist_recursion; +module_param(assist_recursion, bool, 0); +MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RECURSION"); + +static bool assist_rcu; +module_param(assist_rcu, bool, 0); +MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU"); + +/* + * By default, a trivial tracer is used which immediately returns to mimimize + * overhead. Sometimes a consistency check using a more expensive tracer is + * desireable. + */ +static bool check_count; +module_param(check_count, bool, 0); +MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number of times\n"); + +/* + * Usually it's not interesting to leave the ops registered after the test + * runs, but sometimes it can be useful to leave them registered so that they + * can be inspected through the tracefs 'enabled_functions' file. + */ +static bool persist; +module_param(persist, bool, 0); +MODULE_PARM_DESC(persist, "Successfully load module and leave ftrace ops registered after test completes\n"); + +/* + * Marked as noinline to ensure that an out-of-line traceable copy is + * generated by the compiler. + * + * The barrier() ensures the compiler won't elide calls by determining there + * are no side-effects. + */ +static noinline void tracee_relevant(void) +{ + barrier(); +} + +/* + * Marked as noinline to ensure that an out-of-line traceable copy is + * generated by the compiler. + * + * The barrier() ensures the compiler won't elide calls by determining there + * are no side-effects. + */ +static noinline void tracee_irrelevant(void) +{ + barrier(); +} + +struct sample_ops { + struct ftrace_ops ops; + unsigned int count; +}; + +static void ops_func_nop(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + /* do nothing */ +} + +static void ops_func_count(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + struct sample_ops *self; + + self = container_of(op, struct sample_ops, ops); + self->count++; +} + +static struct sample_ops *ops_relevant; +static struct sample_ops *ops_irrelevant; + +static struct sample_ops *ops_alloc_init(void *tracee, ftrace_func_t func, + unsigned long flags, int nr) +{ + struct sample_ops *ops; + + ops = kcalloc(nr, sizeof(*ops), GFP_KERNEL); + if (WARN_ON_ONCE(!ops)) + return NULL; + + for (unsigned int i = 0; i < nr; i++) { + ops[i].ops.func = func; + ops[i].ops.flags = flags; + WARN_ON_ONCE(ftrace_set_filter_ip(&ops[i].ops, (unsigned long)tracee, 0, 0)); + WARN_ON_ONCE(register_ftrace_function(&ops[i].ops)); + } + + return ops; +} + +static void ops_destroy(struct sample_ops *ops, int nr) +{ + if (!ops) + return; + + for (unsigned int i = 0; i < nr; i++) { + WARN_ON_ONCE(unregister_ftrace_function(&ops[i].ops)); + ftrace_free_filter(&ops[i].ops); + } + + kfree(ops); +} + +static void ops_check(struct sample_ops *ops, int nr, + unsigned int expected_count) +{ + if (!ops || !check_count) + return; + + for (unsigned int i = 0; i < nr; i++) { + if (ops->count == expected_count) + continue; + pr_warn("Counter called %u times (expected %u)\n", + ops->count, expected_count); + } +} + +static ftrace_func_t tracer_relevant = ops_func_nop; +static ftrace_func_t tracer_irrelevant = ops_func_nop; + +static int __init ftrace_ops_sample_init(void) +{ + unsigned long flags = 0; + ktime_t start, end; + u64 period; + + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && save_regs) { + pr_info("this kernel does not support saving registers\n"); + save_regs = false; + } else if (save_regs) { + flags |= FTRACE_OPS_FL_SAVE_REGS; + } + + if (assist_recursion) + flags |= FTRACE_OPS_FL_RECURSION; + + if (assist_rcu) + flags |= FTRACE_OPS_FL_RCU; + + if (check_count) { + tracer_relevant = ops_func_count; + tracer_irrelevant = ops_func_count; + } + + pr_info("registering:\n" + " relevant ops: %u\n" + " tracee: %ps\n" + " tracer: %ps\n" + " irrelevant ops: %u\n" + " tracee: %ps\n" + " tracer: %ps\n" + " saving registers: %s\n" + " assist recursion: %s\n" + " assist RCU: %s\n", + nr_ops_relevant, tracee_relevant, tracer_relevant, + nr_ops_irrelevant, tracee_irrelevant, tracer_irrelevant, + save_regs ? "YES" : "NO", + assist_recursion ? "YES" : "NO", + assist_rcu ? "YES" : "NO"); + + ops_relevant = ops_alloc_init(tracee_relevant, tracer_relevant, + flags, nr_ops_relevant); + ops_irrelevant = ops_alloc_init(tracee_irrelevant, tracer_irrelevant, + flags, nr_ops_irrelevant); + + start = ktime_get(); + for (unsigned int i = 0; i < nr_function_calls; i++) + tracee_relevant(); + end = ktime_get(); + + ops_check(ops_relevant, nr_ops_relevant, nr_function_calls); + ops_check(ops_irrelevant, nr_ops_irrelevant, 0); + + period = ktime_to_ns(ktime_sub(end, start)); + + pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n", + nr_function_calls, tracee_relevant, + period, div_u64(period, nr_function_calls)); + + if (persist) + return 0; + + ops_destroy(ops_relevant, nr_ops_relevant); + ops_destroy(ops_irrelevant, nr_ops_irrelevant); + + /* + * The benchmark completed sucessfully, but there's no reason to keep + * the module around. Return an error do the user doesn't have to + * manually unload the module. + */ + return -EINVAL; +} +module_init(ftrace_ops_sample_init); + +static void __exit ftrace_ops_sample_exit(void) +{ + ops_destroy(ops_relevant, nr_ops_relevant); + ops_destroy(ops_irrelevant, nr_ops_irrelevant); +} +module_exit(ftrace_ops_sample_exit); + +MODULE_AUTHOR("Mark Rutland"); +MODULE_DESCRIPTION("Example of using custom ftrace_ops"); +MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index 6aba02a31c96..4147616102f9 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -82,7 +82,7 @@ static int simple_thread(void *arg) while (!kthread_should_stop()) simple_thread_func(count++); - del_timer(&mytimer); + timer_delete(&mytimer); cancel_work_sync(&trace_work); /* @@ -105,14 +105,14 @@ static int __init sample_trace_array_init(void) * NOTE: This function increments the reference counter * associated with the trace array - "tr". */ - tr = trace_array_get_by_name("sample-instance"); + tr = trace_array_get_by_name("sample-instance", "sched,timer,kprobes"); if (!tr) return -1; /* * If context specific per-cpu buffers havent already been allocated. */ - trace_printk_init_buffers(); + trace_array_init_printk(tr); simple_tsk = kthread_run(simple_thread, NULL, "sample-instance"); if (IS_ERR(simple_tsk)) { diff --git a/samples/hid/.gitignore b/samples/hid/.gitignore new file mode 100644 index 000000000000..3ea0fed3bbad --- /dev/null +++ b/samples/hid/.gitignore @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +hid_mouse +hid_surface_dial +*.out +*.skel.h +/vmlinux.h +/bpftool/ +/libbpf/ diff --git a/samples/hid/Makefile b/samples/hid/Makefile new file mode 100644 index 000000000000..db5a077c77fc --- /dev/null +++ b/samples/hid/Makefile @@ -0,0 +1,250 @@ +# SPDX-License-Identifier: GPL-2.0 + +HID_SAMPLES_PATH ?= $(abspath $(src)) +TOOLS_PATH := $(HID_SAMPLES_PATH)/../../tools + +pound := \# + +# List of programs to build +tprogs-y += hid_mouse +tprogs-y += hid_surface_dial + +# Libbpf dependencies +LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf +LIBBPF_OUTPUT = $(abspath $(HID_SAMPLES_PATH))/libbpf +LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include +LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a + +EXTRA_BPF_HEADERS := hid_bpf_helpers.h + +hid_mouse-objs := hid_mouse.o +hid_surface_dial-objs := hid_surface_dial.o + +# Tell kbuild to always build the programs +always-y := $(tprogs-y) + +ifeq ($(ARCH), arm) +# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux +# headers when arm instruction set identification is requested. +ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS)) +BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR) +TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR) +endif + +ifeq ($(ARCH), mips) +TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__ +ifdef CONFIG_MACH_LOONGSON64 +BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-loongson64 +BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic +endif +endif + +COMMON_CFLAGS += -Wall -O2 +COMMON_CFLAGS += -Wmissing-prototypes +COMMON_CFLAGS += -Wstrict-prototypes + +TPROGS_CFLAGS += $(COMMON_CFLAGS) +TPROGS_CFLAGS += -I$(objtree)/usr/include +TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) +TPROGS_CFLAGS += -I$(srctree)/tools/include + +ifdef SYSROOT +COMMON_CFLAGS += --sysroot=$(SYSROOT) +TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib +endif + +TPROGS_LDLIBS += $(LIBBPF) -lelf -lz + +# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: +# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang +LLC ?= llc +CLANG ?= clang +OPT ?= opt +LLVM_DIS ?= llvm-dis +LLVM_OBJCOPY ?= llvm-objcopy +LLVM_READELF ?= llvm-readelf +BTF_PAHOLE ?= pahole + +# Detect that we're cross compiling and use the cross compiler +ifdef CROSS_COMPILE +CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +# Don't evaluate probes and warnings if we need to run make recursively +ifneq ($(src),) +HDR_PROBE := $(shell printf "$(pound)include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \ + $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \ + -o /dev/null 2>/dev/null && echo okay) + +ifeq ($(HDR_PROBE),) +$(warning WARNING: Detected possible issues with include path.) +$(warning WARNING: Please install kernel headers locally (make headers_install).) +endif + +BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) +BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) + +BPF_EXTRA_CFLAGS += -fno-stack-protector +ifneq ($(BTF_LLVM_PROBE),) + BPF_EXTRA_CFLAGS += -g +else +ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),) + BPF_EXTRA_CFLAGS += -g + LLC_FLAGS += -mattr=dwarfris + DWARF2BTF = y +endif +endif +endif + +# Trick to allow make to be run from this directory +all: + $(MAKE) -C ../../ M=$(CURDIR) HID_SAMPLES_PATH=$(CURDIR) + +clean: + $(MAKE) -C ../../ M=$(CURDIR) clean + @find $(CURDIR) -type f -name '*~' -delete + @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool + +$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) +# Fix up variables inherited from Kbuild that tools/ build system won't like + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ + LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(HID_SAMPLES_PATH)/../../ \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $@ install_headers + +BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool +BPFTOOL_OUTPUT := $(abspath $(HID_SAMPLES_PATH))/bpftool +BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool +$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) + $(MAKE) -C $(BPFTOOLDIR) srctree=$(HID_SAMPLES_PATH)/../../ \ + OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap + +$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ + +FORCE: + + +# Verify LLVM compiler tools are available and bpf target is supported by llc +.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) + +verify_cmds: $(CLANG) $(LLC) + @for TOOL in $^ ; do \ + if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \ + echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\ + exit 1; \ + else true; fi; \ + done + +verify_target_bpf: verify_cmds + @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \ + echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\ + echo " NOTICE: LLVM version >= 3.7.1 required" ;\ + exit 2; \ + else true; fi + +$(HID_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) +$(src)/*.c: verify_target_bpf $(LIBBPF) + +libbpf_hdrs: $(LIBBPF) + +.PHONY: libbpf_hdrs + +$(obj)/hid_mouse.o: $(obj)/hid_mouse.skel.h +$(obj)/hid_surface_dial.o: $(obj)/hid_surface_dial.skel.h + +-include $(HID_SAMPLES_PATH)/Makefile.target + +VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ + $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ + $(abspath $(objtree)/vmlinux) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + +$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) +ifeq ($(VMLINUX_H),) +ifeq ($(VMLINUX_BTF),) + $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ + build the kernel or set VMLINUX_BTF or VMLINUX_H variable) +endif + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif + +clean-files += vmlinux.h + +# Get Clang's default includes on this system, as opposed to those seen by +# '--target=bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/') +endef + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) + +EXTRA_BPF_HEADERS_SRC := $(addprefix $(src)/,$(EXTRA_BPF_HEADERS)) + +$(obj)/%.bpf.o: $(src)/%.bpf.c $(EXTRA_BPF_HEADERS_SRC) $(obj)/vmlinux.h + @echo " CLANG-BPF " $@ + $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \ + -Wno-compare-distinct-pointer-types -I$(srctree)/include \ + -I$(srctree)/samples/bpf -I$(srctree)/tools/include \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ + -c $(filter %.bpf.c,$^) -o $@ + +LINKED_SKELS := hid_mouse.skel.h hid_surface_dial.skel.h +clean-files += $(LINKED_SKELS) + +hid_mouse.skel.h-deps := hid_mouse.bpf.o +hid_surface_dial.skel.h-deps := hid_surface_dial.bpf.o + +LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) + +BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c)) +BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED)) +BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS)) + +$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL) + @echo " BPF GEN-OBJ " $(@:.skel.h=) + $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps)) + @echo " BPF GEN-SKEL" $(@:.skel.h=) + $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@ + +# asm/sysreg.h - inline assembly used by it is incompatible with llvm. +# But, there is no easy way to fix it, so just exclude it since it is +# useless for BPF samples. +# below we use long chain of commands, clang | opt | llvm-dis | llc, +# to generate final object file. 'clang' compiles the source into IR +# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin +# processing (llvm12) and IR optimizations. 'llvm-dis' converts +# 'opt' output to IR, and finally 'llc' generates bpf byte code. +$(obj)/%.o: $(src)/%.c + @echo " CLANG-bpf " $@ + $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ + -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ + -I$(LIBBPF_INCLUDE) \ + -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ + -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ + -Wno-gnu-variable-sized-type-not-at-end \ + -Wno-address-of-packed-member -Wno-tautological-compare \ + -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ + -fno-asynchronous-unwind-tables \ + -I$(srctree)/samples/hid/ \ + -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ + $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ + $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif diff --git a/samples/hid/Makefile.target b/samples/hid/Makefile.target new file mode 100644 index 000000000000..7621f55e2947 --- /dev/null +++ b/samples/hid/Makefile.target @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0 +# ========================================================================== +# Building binaries on the host system +# Binaries are not used during the compilation of the kernel, and intended +# to be build for target board, target board can be host of course. Added to +# build binaries to run not on host system. +# +# Sample syntax +# tprogs-y := xsk_example +# Will compile xsk_example.c and create an executable named xsk_example +# +# tprogs-y := xdpsock +# xdpsock-objs := xdpsock_1.o xdpsock_2.o +# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable +# xdpsock, based on xdpsock_1.o and xdpsock_2.o +# +# Derived from scripts/Makefile.host +# +__tprogs := $(sort $(tprogs-y)) + +# C code +# Executables compiled from a single .c file +tprog-csingle := $(foreach m,$(__tprogs), \ + $(if $($(m)-objs),,$(m))) + +# C executables linked based on several .o files +tprog-cmulti := $(foreach m,$(__tprogs),\ + $(if $($(m)-objs),$(m))) + +# Object (.o) files compiled from .c files +tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs))) + +tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle)) +tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti)) +tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs)) + +##### +# Handle options to gcc. Support building with separate output directory + +_tprogc_flags = $(TPROGS_CFLAGS) \ + $(TPROGCFLAGS_$(basetarget).o) + +# $(objtree)/$(obj) for including generated headers from checkin source files +ifeq ($(KBUILD_EXTMOD),) +ifdef building_out_of_srctree +_tprogc_flags += -I $(objtree)/$(obj) +endif +endif + +tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags) + +# Create executable from a single .c file +# tprog-csingle -> Executable +quiet_cmd_tprog-csingle = CC $@ + cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \ + $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F)) +$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE + $(call if_changed_dep,tprog-csingle) + +# Link an executable based on list of .o files, all plain c +# tprog-cmulti -> executable +quiet_cmd_tprog-cmulti = LD $@ + cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \ + $(addprefix $(obj)/,$($(@F)-objs)) \ + $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F)) +$(tprog-cmulti): $(tprog-cobjs) FORCE + $(call if_changed,tprog-cmulti) +$(call multi_depend, $(tprog-cmulti), , -objs) + +# Create .o file from a single .c file +# tprog-cobjs -> .o +quiet_cmd_tprog-cobjs = CC $@ + cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $< +$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE + $(call if_changed_dep,tprog-cobjs) diff --git a/samples/hid/hid_bpf_helpers.h b/samples/hid/hid_bpf_helpers.h new file mode 100644 index 000000000000..4fff31dbe0e7 --- /dev/null +++ b/samples/hid/hid_bpf_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2022 Benjamin Tissoires + */ + +#ifndef __HID_BPF_HELPERS_H +#define __HID_BPF_HELPERS_H + +/* following are kfuncs exported by HID for HID-BPF */ +extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, + unsigned int offset, + const size_t __sz) __ksym; +extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym; +extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; +extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; +extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, + __u8 *data, + size_t buf__sz, + enum hid_report_type type, + enum hid_class_request reqtype) __ksym; + +#endif /* __HID_BPF_HELPERS_H */ diff --git a/samples/hid/hid_mouse.bpf.c b/samples/hid/hid_mouse.bpf.c new file mode 100644 index 000000000000..f7f722dcf56d --- /dev/null +++ b/samples/hid/hid_mouse.bpf.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "hid_bpf_helpers.h" + +static int hid_y_event(struct hid_bpf_ctx *hctx) +{ + s16 y; + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 9 /* size */); + + if (!data) + return 0; /* EPERM check */ + + bpf_printk("event: size: %d", hctx->size); + bpf_printk("incoming event: %02x %02x %02x", + data[0], + data[1], + data[2]); + bpf_printk(" %02x %02x %02x", + data[3], + data[4], + data[5]); + bpf_printk(" %02x %02x %02x", + data[6], + data[7], + data[8]); + + y = data[3] | (data[4] << 8); + + y = -y; + + data[3] = y & 0xFF; + data[4] = (y >> 8) & 0xFF; + + bpf_printk("modified event: %02x %02x %02x", + data[0], + data[1], + data[2]); + bpf_printk(" %02x %02x %02x", + data[3], + data[4], + data[5]); + bpf_printk(" %02x %02x %02x", + data[6], + data[7], + data[8]); + + return 0; +} + +static int hid_x_event(struct hid_bpf_ctx *hctx) +{ + s16 x; + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 9 /* size */); + + if (!data) + return 0; /* EPERM check */ + + x = data[1] | (data[2] << 8); + + x = -x; + + data[1] = x & 0xFF; + data[2] = (x >> 8) & 0xFF; + return 0; +} + +SEC("struct_ops/hid_device_event") +int BPF_PROG(hid_event, struct hid_bpf_ctx *hctx, enum hid_report_type type) +{ + int ret = hid_y_event(hctx); + + if (ret) + return ret; + + return hid_x_event(hctx); +} + + +SEC("struct_ops/hid_rdesc_fixup") +int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 4096 /* size */); + + if (!data) + return 0; /* EPERM check */ + + bpf_printk("rdesc: %02x %02x %02x", + data[0], + data[1], + data[2]); + bpf_printk(" %02x %02x %02x", + data[3], + data[4], + data[5]); + bpf_printk(" %02x %02x %02x ...", + data[6], + data[7], + data[8]); + + /* + * The original report descriptor contains: + * + * 0x05, 0x01, // Usage Page (Generic Desktop) 30 + * 0x16, 0x01, 0x80, // Logical Minimum (-32767) 32 + * 0x26, 0xff, 0x7f, // Logical Maximum (32767) 35 + * 0x09, 0x30, // Usage (X) 38 + * 0x09, 0x31, // Usage (Y) 40 + * + * So byte 39 contains Usage X and byte 41 Usage Y. + * + * We simply swap the axes here. + */ + data[39] = 0x31; + data[41] = 0x30; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops mouse_invert = { + .hid_rdesc_fixup = (void *)hid_rdesc_fixup, + .hid_device_event = (void *)hid_event, +}; + +char _license[] SEC("license") = "GPL"; diff --git a/samples/hid/hid_mouse.c b/samples/hid/hid_mouse.c new file mode 100644 index 000000000000..4b80d4e4c154 --- /dev/null +++ b/samples/hid/hid_mouse.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Benjamin Tissoires + * + * This is a pure HID-BPF example, and should be considered as such: + * on the Etekcity Scroll 6E, the X and Y axes will be swapped and + * inverted. On any other device... Not sure what this will do. + * + * This C main file is generic though. To adapt the code and test, users + * must amend only the .bpf.c file, which this program will load any + * eBPF program it finds. + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <unistd.h> + +#include <linux/bpf.h> +#include <linux/errno.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "hid_mouse.skel.h" + +static bool running = true; + +static void int_exit(int sig) +{ + running = false; + exit(0); +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s /sys/bus/hid/devices/0BUS:0VID:0PID:00ID\n\n", + __func__, prog); + fprintf(stderr, + "This program will upload and attach a HID-BPF program to the given device.\n" + "On the Etekcity Scroll 6E, the X and Y axis will be inverted, but on any other\n" + "device, chances are high that the device will not be working anymore\n\n" + "consider this as a demo and adapt the eBPF program to your needs\n" + "Hit Ctrl-C to unbind the program and reset the device\n"); +} + +static int get_hid_id(const char *path) +{ + const char *str_id, *dir; + char uevent[1024]; + int fd; + + memset(uevent, 0, sizeof(uevent)); + snprintf(uevent, sizeof(uevent) - 1, "%s/uevent", path); + + fd = open(uevent, O_RDONLY | O_NONBLOCK); + if (fd < 0) + return -ENOENT; + + close(fd); + + dir = basename((char *)path); + + str_id = dir + sizeof("0003:0001:0A37."); + return (int)strtol(str_id, NULL, 16); +} + +int main(int argc, char **argv) +{ + struct hid_mouse *skel; + struct bpf_link *link; + int err; + const char *optstr = ""; + const char *sysfs_path; + int opt, hid_id; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + usage(basename(argv[0])); + return 1; + } + + sysfs_path = argv[optind]; + if (!sysfs_path) { + perror("sysfs"); + return 1; + } + + skel = hid_mouse__open(); + if (!skel) { + fprintf(stderr, "%s %s:%d", __func__, __FILE__, __LINE__); + return -1; + } + + hid_id = get_hid_id(sysfs_path); + + if (hid_id < 0) { + fprintf(stderr, "can not open HID device: %m\n"); + return 1; + } + skel->struct_ops.mouse_invert->hid_id = hid_id; + + err = hid_mouse__load(skel); + if (err < 0) { + fprintf(stderr, "can not load HID-BPF program: %m\n"); + return 1; + } + + link = bpf_map__attach_struct_ops(skel->maps.mouse_invert); + if (!link) { + fprintf(stderr, "can not attach HID-BPF program: %m\n"); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + while (running) + sleep(1); + + hid_mouse__destroy(skel); + + return 0; +} diff --git a/samples/hid/hid_surface_dial.bpf.c b/samples/hid/hid_surface_dial.bpf.c new file mode 100644 index 000000000000..527d584812ab --- /dev/null +++ b/samples/hid/hid_surface_dial.bpf.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Benjamin Tissoires + */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "hid_bpf_helpers.h" + +#define HID_UP_BUTTON 0x0009 +#define HID_GD_WHEEL 0x0038 + +SEC("struct_ops/hid_device_event") +int BPF_PROG(hid_event, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 9 /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* Touch */ + data[1] &= 0xfd; + + /* X */ + data[4] = 0; + data[5] = 0; + + /* Y */ + data[6] = 0; + data[7] = 0; + + return 0; +} + +/* 72 == 360 / 5 -> 1 report every 5 degrees */ +int resolution = 72; +int physical = 5; + +struct haptic_syscall_args { + unsigned int hid; + int retval; +}; + +static __u8 haptic_data[8]; + +SEC("syscall") +int set_haptic(struct haptic_syscall_args *args) +{ + struct hid_bpf_ctx *ctx; + const size_t size = sizeof(haptic_data); + u16 *res; + int ret; + + if (size > sizeof(haptic_data)) + return -7; /* -E2BIG */ + + ctx = hid_bpf_allocate_context(args->hid); + if (!ctx) + return -1; /* EPERM check */ + + haptic_data[0] = 1; /* report ID */ + + ret = hid_bpf_hw_request(ctx, haptic_data, size, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + + bpf_printk("probed/remove event ret value: %d", ret); + bpf_printk("buf: %02x %02x %02x", + haptic_data[0], + haptic_data[1], + haptic_data[2]); + bpf_printk(" %02x %02x %02x", + haptic_data[3], + haptic_data[4], + haptic_data[5]); + bpf_printk(" %02x %02x", + haptic_data[6], + haptic_data[7]); + + /* whenever resolution multiplier is not 3600, we have the fixed report descriptor */ + res = (u16 *)&haptic_data[1]; + if (*res != 3600) { +// haptic_data[1] = 72; /* resolution multiplier */ +// haptic_data[2] = 0; /* resolution multiplier */ +// haptic_data[3] = 0; /* Repeat Count */ + haptic_data[4] = 3; /* haptic Auto Trigger */ +// haptic_data[5] = 5; /* Waveform Cutoff Time */ +// haptic_data[6] = 80; /* Retrigger Period */ +// haptic_data[7] = 0; /* Retrigger Period */ + } else { + haptic_data[4] = 0; + } + + ret = hid_bpf_hw_request(ctx, haptic_data, size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + + bpf_printk("set haptic ret value: %d -> %d", ret, haptic_data[4]); + + args->retval = ret; + + hid_bpf_release_context(ctx); + + return 0; +} + +/* Convert REL_DIAL into REL_WHEEL */ +SEC("struct_ops/hid_rdesc_fixup") +int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 4096 /* size */); + __u16 *res, *phys; + + if (!data) + return 0; /* EPERM check */ + + /* Convert TOUCH into a button */ + data[31] = HID_UP_BUTTON; + data[33] = 2; + + /* Convert REL_DIAL into REL_WHEEL */ + data[45] = HID_GD_WHEEL; + + /* Change Resolution Multiplier */ + phys = (__u16 *)&data[61]; + *phys = physical; + res = (__u16 *)&data[66]; + *res = resolution; + + /* Convert X,Y from Abs to Rel */ + data[88] = 0x06; + data[98] = 0x06; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops surface_dial = { + .hid_rdesc_fixup = (void *)hid_rdesc_fixup, + .hid_device_event = (void *)hid_event, +}; + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = 1; diff --git a/samples/hid/hid_surface_dial.c b/samples/hid/hid_surface_dial.c new file mode 100644 index 000000000000..9dd363845a85 --- /dev/null +++ b/samples/hid/hid_surface_dial.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Benjamin Tissoires + * + * This program will morph the Microsoft Surface Dial into a mouse, + * and depending on the chosen resolution enable or not the haptic feedback: + * - a resolution (-r) of 3600 will report 3600 "ticks" in one full rotation + * without haptic feedback + * - any other resolution will report N "ticks" in a full rotation with haptic + * feedback + * + * A good default for low resolution haptic scrolling is 72 (1 "tick" every 5 + * degrees), and set to 3600 for smooth scrolling. + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <unistd.h> + +#include <linux/bpf.h> +#include <linux/errno.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "hid_surface_dial.skel.h" + +static bool running = true; + +struct haptic_syscall_args { + unsigned int hid; + int retval; +}; + +static void int_exit(int sig) +{ + running = false; + exit(0); +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTIONS] /sys/bus/hid/devices/0BUS:0VID:0PID:00ID\n\n" + " OPTIONS:\n" + " -r N\t set the given resolution to the device (number of ticks per 360°)\n\n", + __func__, prog); + fprintf(stderr, + "This program will morph the Microsoft Surface Dial into a mouse,\n" + "and depending on the chosen resolution enable or not the haptic feedback:\n" + "- a resolution (-r) of 3600 will report 3600 'ticks' in one full rotation\n" + " without haptic feedback\n" + "- any other resolution will report N 'ticks' in a full rotation with haptic\n" + " feedback\n" + "\n" + "A good default for low resolution haptic scrolling is 72 (1 'tick' every 5\n" + "degrees), and set to 3600 for smooth scrolling.\n"); +} + +static int get_hid_id(const char *path) +{ + const char *str_id, *dir; + char uevent[1024]; + int fd; + + memset(uevent, 0, sizeof(uevent)); + snprintf(uevent, sizeof(uevent) - 1, "%s/uevent", path); + + fd = open(uevent, O_RDONLY | O_NONBLOCK); + if (fd < 0) + return -ENOENT; + + close(fd); + + dir = basename((char *)path); + + str_id = dir + sizeof("0003:0001:0A37."); + return (int)strtol(str_id, NULL, 16); +} + +static int set_haptic(struct hid_surface_dial *skel, int hid_id) +{ + struct haptic_syscall_args args = { + .hid = hid_id, + .retval = -1, + }; + int haptic_fd, err; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr, + .ctx_in = &args, + .ctx_size_in = sizeof(args), + ); + + haptic_fd = bpf_program__fd(skel->progs.set_haptic); + if (haptic_fd < 0) { + fprintf(stderr, "can't locate haptic prog: %m\n"); + return 1; + } + + err = bpf_prog_test_run_opts(haptic_fd, &tattr); + if (err) { + fprintf(stderr, "can't set haptic configuration to hid device %d: %m (err: %d)\n", + hid_id, err); + return 1; + } + return 0; +} + +int main(int argc, char **argv) +{ + struct hid_surface_dial *skel; + const char *optstr = "r:"; + struct bpf_link *link; + const char *sysfs_path; + int err, opt, hid_id, resolution = 72; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'r': + { + char *endp = NULL; + long l = -1; + + if (optarg) { + l = strtol(optarg, &endp, 10); + if (endp && *endp) + l = -1; + } + + if (l < 0) { + fprintf(stderr, + "invalid r option %s - expecting a number\n", + optarg ? optarg : ""); + exit(EXIT_FAILURE); + }; + + resolution = (int) l; + break; + } + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + usage(basename(argv[0])); + return 1; + } + + sysfs_path = argv[optind]; + if (!sysfs_path) { + perror("sysfs"); + return 1; + } + + skel = hid_surface_dial__open(); + if (!skel) { + fprintf(stderr, "%s %s:%d", __func__, __FILE__, __LINE__); + return -1; + } + + hid_id = get_hid_id(sysfs_path); + if (hid_id < 0) { + fprintf(stderr, "can not open HID device: %m\n"); + return 1; + } + + skel->struct_ops.surface_dial->hid_id = hid_id; + + err = hid_surface_dial__load(skel); + if (err < 0) { + fprintf(stderr, "can not load HID-BPF program: %m\n"); + return 1; + } + + skel->data->resolution = resolution; + skel->data->physical = (int)(resolution / 72); + + link = bpf_map__attach_struct_ops(skel->maps.surface_dial); + if (!link) { + fprintf(stderr, "can not attach HID-BPF program: %m\n"); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + set_haptic(skel, hid_id); + + while (running) + sleep(1); + + hid_surface_dial__destroy(skel); + + return 0; +} diff --git a/samples/hung_task/Makefile b/samples/hung_task/Makefile new file mode 100644 index 000000000000..86036f1a204d --- /dev/null +++ b/samples/hung_task/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task_tests.o diff --git a/samples/hung_task/hung_task_tests.c b/samples/hung_task/hung_task_tests.c new file mode 100644 index 000000000000..0360ec916890 --- /dev/null +++ b/samples/hung_task/hung_task_tests.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * hung_task_tests.c - Sample code for testing hung tasks with mutex, + * semaphore, etc. + * + * Usage: Load this module and read `<debugfs>/hung_task/mutex`, + * `<debugfs>/hung_task/semaphore`, `<debugfs>/hung_task/rw_semaphore_read`, + * `<debugfs>/hung_task/rw_semaphore_write`, etc., with 2 or more processes. + * + * This is for testing kernel hung_task error messages with various locking + * mechanisms (e.g., mutex, semaphore, rw_semaphore_read, rw_semaphore_write, etc.). + * Note that this may freeze your system or cause a panic. Use only for testing purposes. + */ + +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/semaphore.h> +#include <linux/rwsem.h> + +#define HUNG_TASK_DIR "hung_task" +#define HUNG_TASK_MUTEX_FILE "mutex" +#define HUNG_TASK_SEM_FILE "semaphore" +#define HUNG_TASK_RWSEM_READ_FILE "rw_semaphore_read" +#define HUNG_TASK_RWSEM_WRITE_FILE "rw_semaphore_write" +#define SLEEP_SECOND 256 + +static const char dummy_string[] = "This is a dummy string."; +static DEFINE_MUTEX(dummy_mutex); +static DEFINE_SEMAPHORE(dummy_sem, 1); +static DECLARE_RWSEM(dummy_rwsem); +static struct dentry *hung_task_dir; + +/* Mutex-based read function */ +static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Second task waits on mutex, entering uninterruptible sleep */ + guard(mutex)(&dummy_mutex); + + /* First task sleeps here, interruptible */ + msleep_interruptible(SLEEP_SECOND * 1000); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Semaphore-based read function */ +static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Second task waits on semaphore, entering uninterruptible sleep */ + down(&dummy_sem); + + /* First task sleeps here, interruptible */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up(&dummy_sem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Read-write semaphore read function */ +static ssize_t read_dummy_rwsem_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Acquires read lock, allowing concurrent readers but blocks if write lock is held */ + down_read(&dummy_rwsem); + + /* Sleeps here, potentially triggering hung task detection if lock is held too long */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up_read(&dummy_rwsem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Read-write semaphore write function */ +static ssize_t read_dummy_rwsem_write(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Check if data is already read */ + if (*ppos >= sizeof(dummy_string)) + return 0; + + /* Acquires exclusive write lock, blocking all other readers and writers */ + down_write(&dummy_rwsem); + + /* Sleeps here, potentially triggering hung task detection if lock is held too long */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up_write(&dummy_rwsem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* File operations for mutex */ +static const struct file_operations hung_task_mutex_fops = { + .read = read_dummy_mutex, +}; + +/* File operations for semaphore */ +static const struct file_operations hung_task_sem_fops = { + .read = read_dummy_semaphore, +}; + +/* File operations for rw_semaphore read */ +static const struct file_operations hung_task_rwsem_read_fops = { + .read = read_dummy_rwsem_read, +}; + +/* File operations for rw_semaphore write */ +static const struct file_operations hung_task_rwsem_write_fops = { + .read = read_dummy_rwsem_write, +}; + +static int __init hung_task_tests_init(void) +{ + hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); + if (IS_ERR(hung_task_dir)) + return PTR_ERR(hung_task_dir); + + /* Create debugfs files for mutex and semaphore tests */ + debugfs_create_file(HUNG_TASK_MUTEX_FILE, 0400, hung_task_dir, NULL, + &hung_task_mutex_fops); + debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL, + &hung_task_sem_fops); + debugfs_create_file(HUNG_TASK_RWSEM_READ_FILE, 0400, hung_task_dir, NULL, + &hung_task_rwsem_read_fops); + debugfs_create_file(HUNG_TASK_RWSEM_WRITE_FILE, 0400, hung_task_dir, NULL, + &hung_task_rwsem_write_fops); + + return 0; +} + +static void __exit hung_task_tests_exit(void) +{ + debugfs_remove_recursive(hung_task_dir); +} + +module_init(hung_task_tests_init); +module_exit(hung_task_tests_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Masami Hiramatsu <mhiramat@kernel.org>"); +MODULE_AUTHOR("Zi Li <amaindex@outlook.com>"); +MODULE_DESCRIPTION("Simple sleep under lock files for testing hung task"); diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 418c46fe5ffc..fbb03b66dcbd 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -21,7 +21,7 @@ #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> -struct perf_event * __percpu *sample_hbp; +static struct perf_event * __percpu *sample_hbp; static char ksym_name[KSYM_NAME_LEN] = "jiffies"; module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); @@ -52,8 +52,8 @@ static int __init hw_break_module_init(void) attr.bp_type = HW_BREAKPOINT_W; sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL); - if (IS_ERR((void __force *)sample_hbp)) { - ret = PTR_ERR((void __force *)sample_hbp); + if (IS_ERR_PCPU(sample_hbp)) { + ret = PTR_ERR_PCPU(sample_hbp); goto fail; } @@ -70,7 +70,9 @@ fail: static void __exit hw_break_module_exit(void) { unregister_wide_hw_breakpoint(sample_hbp); - symbol_put(ksym_name); +#ifdef CONFIG_MODULE_UNLOAD + __symbol_put(ksym_name); +#endif printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); } diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index 642d0748c169..4ae29a12cc8a 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -191,5 +191,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample kfifo byte stream implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 0cf27483cb36..8076ac410161 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -6,8 +6,10 @@ */ #include <linux/init.h> -#include <linux/module.h> #include <linux/kfifo.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <linux/dma-mapping.h> /* * This module shows how to handle fifo dma operations. @@ -137,5 +139,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample fifo dma implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c index c61482ba94f4..e4f93317c5d0 100644 --- a/samples/kfifo/inttype-example.c +++ b/samples/kfifo/inttype-example.c @@ -182,5 +182,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample kfifo int type implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c index e4087b2d3fc4..e4d1a2d7983c 100644 --- a/samples/kfifo/record-example.c +++ b/samples/kfifo/record-example.c @@ -198,5 +198,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample dynamic sized record fifo implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/samples/kmemleak/Makefile b/samples/kmemleak/Makefile index 16b6132c540c..8a999ab43b6d 100644 --- a/samples/kmemleak/Makefile +++ b/samples/kmemleak/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o +obj-$(CONFIG_SAMPLE_KMEMLEAK) += kmemleak-test.o diff --git a/samples/kmemleak/kmemleak-test.c b/samples/kmemleak/kmemleak-test.c index 7b476eb8285f..8609812a37eb 100644 --- a/samples/kmemleak/kmemleak-test.c +++ b/samples/kmemleak/kmemleak-test.c @@ -32,7 +32,7 @@ static DEFINE_PER_CPU(void *, kmemleak_test_pointer); * Some very simple testing. This function needs to be extended for * proper testing. */ -static int __init kmemleak_test_init(void) +static int kmemleak_test_init(void) { struct test_node *elem; int i; @@ -40,25 +40,25 @@ static int __init kmemleak_test_init(void) pr_info("Kmemleak testing\n"); /* make some orphan objects */ - pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); - pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); + pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL)); + pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL)); + pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL)); + pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL)); + pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL)); + pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL)); + pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL)); + pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL)); #ifndef CONFIG_MODULES - pr_info("kmem_cache_alloc(files_cachep) = %p\n", + pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n", kmem_cache_alloc(files_cachep, GFP_KERNEL)); - pr_info("kmem_cache_alloc(files_cachep) = %p\n", + pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n", kmem_cache_alloc(files_cachep, GFP_KERNEL)); #endif - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); /* * Add elements to a list. They should only appear as orphan @@ -66,7 +66,7 @@ static int __init kmemleak_test_init(void) */ for (i = 0; i < 10; i++) { elem = kzalloc(sizeof(*elem), GFP_KERNEL); - pr_info("kzalloc(sizeof(*elem)) = %p\n", elem); + pr_info("kzalloc(sizeof(*elem)) = 0x%px\n", elem); if (!elem) return -ENOMEM; INIT_LIST_HEAD(&elem->list); @@ -75,10 +75,12 @@ static int __init kmemleak_test_init(void) for_each_possible_cpu(i) { per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL); - pr_info("kmalloc(129) = %p\n", + pr_info("kmalloc(129) = 0x%px\n", per_cpu(kmemleak_test_pointer, i)); } + pr_info("__alloc_percpu(64, 4) = 0x%px\n", __alloc_percpu(64, 4)); + return 0; } module_init(kmemleak_test_init); @@ -96,4 +98,5 @@ static void __exit kmemleak_test_exit(void) } module_exit(kmemleak_test_exit); +MODULE_DESCRIPTION("Sample module to leak memory for kmemleak testing"); MODULE_LICENSE("GPL"); diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c index 96678ed73216..36d87ca0bee2 100644 --- a/samples/kobject/kobject-example.c +++ b/samples/kobject/kobject-example.c @@ -13,7 +13,7 @@ /* * This module shows how to create a simple subdirectory in sysfs called - * /sys/kernel/kobject-example In that directory, 3 files are created: + * /sys/kernel/kobject_example In that directory, 3 files are created: * "foo", "baz", and "bar". If an integer is written to these files, it can be * later read out of it. */ @@ -102,7 +102,7 @@ static struct attribute *attrs[] = { * created for the attributes with the directory being the name of the * attribute group. */ -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = attrs, }; @@ -140,5 +140,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample kobject implementation"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>"); diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c index 52f1acabd479..d0103904e5dd 100644 --- a/samples/kobject/kset-example.c +++ b/samples/kobject/kset-example.c @@ -14,8 +14,8 @@ /* * This module shows how to create a kset in sysfs called - * /sys/kernel/kset-example - * Then tree kobjects are created and assigned to this kset, "foo", "baz", + * /sys/kernel/kset_example + * Then three kobjects are created and assigned to this kset, "foo", "baz", * and "bar". In those kobjects, attributes of the same name are also * created and if an integer is written to these files, it can be later * read out of it. @@ -37,10 +37,11 @@ struct foo_obj { /* a custom attribute that works just for a struct foo_obj. */ struct foo_attribute { struct attribute attr; - ssize_t (*show)(struct foo_obj *foo, struct foo_attribute *attr, char *buf); - ssize_t (*store)(struct foo_obj *foo, struct foo_attribute *attr, const char *buf, size_t count); + ssize_t (*show)(struct foo_obj *foo, const struct foo_attribute *attr, char *buf); + ssize_t (*store)(struct foo_obj *foo, const struct foo_attribute *attr, + const char *buf, size_t count); }; -#define to_foo_attr(x) container_of(x, struct foo_attribute, attr) +#define to_foo_attr(x) container_of_const(x, struct foo_attribute, attr) /* * The default show function that must be passed to sysfs. This will be @@ -53,7 +54,7 @@ static ssize_t foo_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { - struct foo_attribute *attribute; + const struct foo_attribute *attribute; struct foo_obj *foo; attribute = to_foo_attr(attr); @@ -73,7 +74,7 @@ static ssize_t foo_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len) { - struct foo_attribute *attribute; + const struct foo_attribute *attribute; struct foo_obj *foo; attribute = to_foo_attr(attr); @@ -109,13 +110,13 @@ static void foo_release(struct kobject *kobj) /* * The "foo" file where the .foo variable is read from and written to. */ -static ssize_t foo_show(struct foo_obj *foo_obj, struct foo_attribute *attr, +static ssize_t foo_show(struct foo_obj *foo_obj, const struct foo_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", foo_obj->foo); } -static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr, +static ssize_t foo_store(struct foo_obj *foo_obj, const struct foo_attribute *attr, const char *buf, size_t count) { int ret; @@ -128,14 +129,14 @@ static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr, } /* Sysfs attributes cannot be world-writable. */ -static struct foo_attribute foo_attribute = +static const struct foo_attribute foo_attribute = __ATTR(foo, 0664, foo_show, foo_store); /* * More complex function where we determine which variable is being accessed by * looking at the attribute for the "baz" and "bar" files. */ -static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr, +static ssize_t b_show(struct foo_obj *foo_obj, const struct foo_attribute *attr, char *buf) { int var; @@ -147,7 +148,7 @@ static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr, return sysfs_emit(buf, "%d\n", var); } -static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr, +static ssize_t b_store(struct foo_obj *foo_obj, const struct foo_attribute *attr, const char *buf, size_t count) { int var, ret; @@ -163,29 +164,44 @@ static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr, return count; } -static struct foo_attribute baz_attribute = +static const struct foo_attribute baz_attribute = __ATTR(baz, 0664, b_show, b_store); -static struct foo_attribute bar_attribute = +static const struct foo_attribute bar_attribute = __ATTR(bar, 0664, b_show, b_store); /* * Create a group of attributes so that we can create and destroy them all * at once. */ -static struct attribute *foo_default_attrs[] = { +static const struct attribute *const foo_default_attrs[] = { &foo_attribute.attr, &baz_attribute.attr, &bar_attribute.attr, NULL, /* need to NULL terminate the list of attributes */ }; -ATTRIBUTE_GROUPS(foo_default); + +static umode_t foo_default_attrs_is_visible(struct kobject *kobj, + const struct attribute *attr, + int n) +{ + /* Hide attributes with the same name as the kobject. */ + if (strcmp(kobject_name(kobj), attr->name) == 0) + return 0; + return attr->mode; +} + +static const struct attribute_group foo_default_group = { + .attrs_const = foo_default_attrs, + .is_visible_const = foo_default_attrs_is_visible, +}; +__ATTRIBUTE_GROUPS(foo_default); /* * Our own ktype for our kobjects. Here we specify our sysfs ops, the * release function, and the set of default attributes we want created * whenever a kobject of this type is registered with the kernel. */ -static struct kobj_type foo_ktype = { +static const struct kobj_type foo_ktype = { .sysfs_ops = &foo_sysfs_ops, .release = foo_release, .default_groups = foo_default_groups, @@ -284,5 +300,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample kset and ktype implementation"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>"); diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index fd346f58ddba..53ec6c8b8c40 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -55,6 +55,10 @@ static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) pr_info("<%s> p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n", p->symbol_name, p->addr, regs->psw.addr, regs->flags); #endif +#ifdef CONFIG_LOONGARCH + pr_info("<%s> p->addr = 0x%p, era = 0x%lx, estat = 0x%lx\n", + p->symbol_name, p->addr, regs->csr_era, regs->csr_estat); +#endif /* A dump_stack() here will give a stack backtrace */ return 0; @@ -92,6 +96,10 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, pr_info("<%s> p->addr, 0x%p, flags = 0x%lx\n", p->symbol_name, p->addr, regs->flags); #endif +#ifdef CONFIG_LOONGARCH + pr_info("<%s> p->addr = 0x%p, estat = 0x%lx\n", + p->symbol_name, p->addr, regs->csr_estat); +#endif } static int __init kprobe_init(void) @@ -117,4 +125,5 @@ static void __exit kprobe_exit(void) module_init(kprobe_init) module_exit(kprobe_exit) +MODULE_DESCRIPTION("sample kernel module showing the use of kprobes"); MODULE_LICENSE("GPL"); diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index cbf16542d84e..65d6dcafd742 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -35,7 +35,7 @@ struct my_data { ktime_t entry_stamp; }; -/* Here we use the entry_hanlder to timestamp function entry */ +/* Here we use the entry_handler to timestamp function entry */ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { struct my_data *data; @@ -104,4 +104,5 @@ static void __exit kretprobe_exit(void) module_init(kretprobe_init) module_exit(kretprobe_exit) +MODULE_DESCRIPTION("sample kernel module showing the use of return probes"); MODULE_LICENSE("GPL"); diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index e2056c8b902c..e7af02f98208 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -1,17 +1,19 @@ // SPDX-License-Identifier: BSD-3-Clause /* - * Simple Landlock sandbox manager able to launch a process restricted by a - * user-defined filesystem access control policy. + * Simple Landlock sandbox manager able to execute a process restricted by + * user-defined file system and network access control policies. * * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2020 ANSSI */ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <arpa/inet.h> #include <errno.h> #include <fcntl.h> #include <linux/landlock.h> -#include <linux/prctl.h> +#include <linux/socket.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> @@ -20,6 +22,11 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <unistd.h> +#include <stdbool.h> + +#if defined(__GLIBC__) +#include <linux/prctl.h> +#endif #ifndef landlock_create_ruleset static inline int @@ -51,7 +58,30 @@ static inline int landlock_restrict_self(const int ruleset_fd, #define ENV_FS_RO_NAME "LL_FS_RO" #define ENV_FS_RW_NAME "LL_FS_RW" -#define ENV_PATH_TOKEN ":" +#define ENV_TCP_BIND_NAME "LL_TCP_BIND" +#define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT" +#define ENV_SCOPED_NAME "LL_SCOPED" +#define ENV_FORCE_LOG_NAME "LL_FORCE_LOG" +#define ENV_DELIMITER ":" + +static int str2num(const char *numstr, __u64 *num_dst) +{ + char *endptr = NULL; + int err = 0; + __u64 num; + + errno = 0; + num = strtoull(numstr, &endptr, 10); + if (errno != 0) + err = errno; + /* Was the string empty, or not entirely parsed successfully? */ + else if ((*numstr == '\0') || (*endptr != '\0')) + err = EINVAL; + else + *num_dst = num; + + return err; +} static int parse_path(char *env_path, const char ***const path_list) { @@ -60,13 +90,16 @@ static int parse_path(char *env_path, const char ***const path_list) if (env_path) { num_paths++; for (i = 0; env_path[i]; i++) { - if (env_path[i] == ENV_PATH_TOKEN[0]) + if (env_path[i] == ENV_DELIMITER[0]) num_paths++; } } *path_list = malloc(num_paths * sizeof(**path_list)); + if (!*path_list) + return -1; + for (i = 0; i < num_paths; i++) - (*path_list)[i] = strsep(&env_path, ENV_PATH_TOKEN); + (*path_list)[i] = strsep(&env_path, ENV_DELIMITER); return num_paths; } @@ -77,12 +110,13 @@ static int parse_path(char *env_path, const char ***const path_list) LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ LANDLOCK_ACCESS_FS_READ_FILE | \ - LANDLOCK_ACCESS_FS_TRUNCATE) + LANDLOCK_ACCESS_FS_TRUNCATE | \ + LANDLOCK_ACCESS_FS_IOCTL_DEV) /* clang-format on */ -static int populate_ruleset(const char *const env_var, const int ruleset_fd, - const __u64 allowed_access) +static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd, + const __u64 allowed_access) { int num_paths, i, ret = 1; char *env_path_name; @@ -100,6 +134,10 @@ static int populate_ruleset(const char *const env_var, const int ruleset_fd, env_path_name = strdup(env_path_name); unsetenv(env_var); num_paths = parse_path(env_path_name, &path_list); + if (num_paths < 0) { + fprintf(stderr, "Failed to allocate memory\n"); + goto out_free_name; + } if (num_paths == 1 && path_list[0][0] == '\0') { /* * Allows to not use all possible restrictions (e.g. use @@ -116,9 +154,11 @@ static int populate_ruleset(const char *const env_var, const int ruleset_fd, if (path_beneath.parent_fd < 0) { fprintf(stderr, "Failed to open \"%s\": %s\n", path_list[i], strerror(errno)); - goto out_free_name; + continue; } if (fstat(path_beneath.parent_fd, &statbuf)) { + fprintf(stderr, "Failed to stat \"%s\": %s\n", + path_list[i], strerror(errno)); close(path_beneath.parent_fd); goto out_free_name; } @@ -143,6 +183,98 @@ out_free_name: return ret; } +static int populate_ruleset_net(const char *const env_var, const int ruleset_fd, + const __u64 allowed_access) +{ + int ret = 1; + char *env_port_name, *env_port_name_next, *strport; + struct landlock_net_port_attr net_port = { + .allowed_access = allowed_access, + }; + + env_port_name = getenv(env_var); + if (!env_port_name) + return 0; + env_port_name = strdup(env_port_name); + unsetenv(env_var); + + env_port_name_next = env_port_name; + while ((strport = strsep(&env_port_name_next, ENV_DELIMITER))) { + __u64 port; + + if (strcmp(strport, "") == 0) + continue; + + if (str2num(strport, &port)) { + fprintf(stderr, "Failed to parse port at \"%s\"\n", + strport); + goto out_free_name; + } + net_port.port = port; + if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0)) { + fprintf(stderr, + "Failed to update the ruleset with port \"%llu\": %s\n", + net_port.port, strerror(errno)); + goto out_free_name; + } + } + ret = 0; + +out_free_name: + free(env_port_name); + return ret; +} + +/* Returns true on error, false otherwise. */ +static bool check_ruleset_scope(const char *const env_var, + struct landlock_ruleset_attr *ruleset_attr) +{ + char *env_type_scope, *env_type_scope_next, *ipc_scoping_name; + bool error = false; + bool abstract_scoping = false; + bool signal_scoping = false; + + /* Scoping is not supported by Landlock ABI */ + if (!(ruleset_attr->scoped & + (LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL))) + goto out_unset; + + env_type_scope = getenv(env_var); + /* Scoping is not supported by the user */ + if (!env_type_scope || strcmp("", env_type_scope) == 0) + goto out_unset; + + env_type_scope = strdup(env_type_scope); + env_type_scope_next = env_type_scope; + while ((ipc_scoping_name = + strsep(&env_type_scope_next, ENV_DELIMITER))) { + if (strcmp("a", ipc_scoping_name) == 0 && !abstract_scoping) { + abstract_scoping = true; + } else if (strcmp("s", ipc_scoping_name) == 0 && + !signal_scoping) { + signal_scoping = true; + } else { + fprintf(stderr, "Unknown or duplicate scope \"%s\"\n", + ipc_scoping_name); + error = true; + goto out_free_name; + } + } + +out_free_name: + free(env_type_scope); + +out_unset: + if (!abstract_scoping) + ruleset_attr->scoped &= ~LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET; + if (!signal_scoping) + ruleset_attr->scoped &= ~LANDLOCK_SCOPE_SIGNAL; + + unsetenv(env_var); + return error; +} + /* clang-format off */ #define ACCESS_FS_ROUGHLY_READ ( \ @@ -162,47 +294,75 @@ out_free_name: LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ LANDLOCK_ACCESS_FS_MAKE_SYM | \ LANDLOCK_ACCESS_FS_REFER | \ - LANDLOCK_ACCESS_FS_TRUNCATE) + LANDLOCK_ACCESS_FS_TRUNCATE | \ + LANDLOCK_ACCESS_FS_IOCTL_DEV) /* clang-format on */ -#define LANDLOCK_ABI_LAST 3 +#define LANDLOCK_ABI_LAST 7 + +#define XSTR(s) #s +#define STR(s) XSTR(s) + +/* clang-format off */ + +static const char help[] = + "usage: " ENV_FS_RO_NAME "=\"...\" " ENV_FS_RW_NAME "=\"...\" " + "[other environment variables] %1$s <cmd> [args]...\n" + "\n" + "Execute the given command in a restricted environment.\n" + "Multi-valued settings (lists of ports, paths, scopes) are colon-delimited.\n" + "\n" + "Mandatory settings:\n" + "* " ENV_FS_RO_NAME ": paths allowed to be used in a read-only way\n" + "* " ENV_FS_RW_NAME ": paths allowed to be used in a read-write way\n" + "\n" + "Optional settings (when not set, their associated access check " + "is always allowed, which is different from an empty string which " + "means an empty list):\n" + "* " ENV_TCP_BIND_NAME ": ports allowed to bind (server)\n" + "* " ENV_TCP_CONNECT_NAME ": ports allowed to connect (client)\n" + "* " ENV_SCOPED_NAME ": actions denied on the outside of the landlock domain\n" + " - \"a\" to restrict opening abstract unix sockets\n" + " - \"s\" to restrict sending signals\n" + "\n" + "A sandboxer should not log denied access requests to avoid spamming logs, " + "but to test audit we can set " ENV_FORCE_LOG_NAME "=1\n" + "\n" + "Example:\n" + ENV_FS_RO_NAME "=\"${PATH}:/lib:/usr:/proc:/etc:/dev/urandom\" " + ENV_FS_RW_NAME "=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" " + ENV_TCP_BIND_NAME "=\"9418\" " + ENV_TCP_CONNECT_NAME "=\"80:443\" " + ENV_SCOPED_NAME "=\"a:s\" " + "%1$s bash -i\n" + "\n" + "This sandboxer can use Landlock features up to ABI version " + STR(LANDLOCK_ABI_LAST) ".\n"; + +/* clang-format on */ int main(const int argc, char *const argv[], char *const *const envp) { const char *cmd_path; char *const *cmd_argv; int ruleset_fd, abi; + char *env_port_name, *env_force_log; __u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ, access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE; + struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = access_fs_rw, + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | + LANDLOCK_SCOPE_SIGNAL, }; + int supported_restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + int set_restrict_flags = 0; if (argc < 2) { - fprintf(stderr, - "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n", - ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]); - fprintf(stderr, - "Launch a command in a restricted environment.\n\n"); - fprintf(stderr, "Environment variables containing paths, " - "each separated by a colon:\n"); - fprintf(stderr, - "* %s: list of paths allowed to be used in a read-only way.\n", - ENV_FS_RO_NAME); - fprintf(stderr, - "* %s: list of paths allowed to be used in a read-write way.\n", - ENV_FS_RW_NAME); - fprintf(stderr, - "\nexample:\n" - "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" " - "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" " - "%s bash -i\n\n", - ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]); - fprintf(stderr, - "This sandboxer can use Landlock features " - "up to ABI version %d.\n", - LANDLOCK_ABI_LAST); + fprintf(stderr, help, argv[0]); return 1; } @@ -255,7 +415,29 @@ int main(const int argc, char *const argv[], char *const *const envp) case 2: /* Removes LANDLOCK_ACCESS_FS_TRUNCATE for ABI < 3 */ ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_TRUNCATE; + __attribute__((fallthrough)); + case 3: + /* Removes network support for ABI < 4 */ + ruleset_attr.handled_access_net &= + ~(LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP); + __attribute__((fallthrough)); + case 4: + /* Removes LANDLOCK_ACCESS_FS_IOCTL_DEV for ABI < 5 */ + ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_IOCTL_DEV; + __attribute__((fallthrough)); + case 5: + /* Removes LANDLOCK_SCOPE_* for ABI < 6 */ + ruleset_attr.scoped &= ~(LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | + LANDLOCK_SCOPE_SIGNAL); + __attribute__((fallthrough)); + case 6: + /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */ + supported_restrict_flags &= + ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + + /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */ fprintf(stderr, "Hint: You should update the running kernel " "to leverage Landlock features " @@ -274,23 +456,68 @@ int main(const int argc, char *const argv[], char *const *const envp) access_fs_ro &= ruleset_attr.handled_access_fs; access_fs_rw &= ruleset_attr.handled_access_fs; + /* Removes bind access attribute if not supported by a user. */ + env_port_name = getenv(ENV_TCP_BIND_NAME); + if (!env_port_name) { + ruleset_attr.handled_access_net &= + ~LANDLOCK_ACCESS_NET_BIND_TCP; + } + /* Removes connect access attribute if not supported by a user. */ + env_port_name = getenv(ENV_TCP_CONNECT_NAME); + if (!env_port_name) { + ruleset_attr.handled_access_net &= + ~LANDLOCK_ACCESS_NET_CONNECT_TCP; + } + + if (check_ruleset_scope(ENV_SCOPED_NAME, &ruleset_attr)) + return 1; + + /* Enables optional logs. */ + env_force_log = getenv(ENV_FORCE_LOG_NAME); + if (env_force_log) { + if (strcmp(env_force_log, "1") != 0) { + fprintf(stderr, "Unknown value for " ENV_FORCE_LOG_NAME + " (only \"1\" is handled)\n"); + return 1; + } + if (!(supported_restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON)) { + fprintf(stderr, + "Audit logs not supported by current kernel\n"); + return 1; + } + set_restrict_flags |= LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + unsetenv(ENV_FORCE_LOG_NAME); + } + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); if (ruleset_fd < 0) { perror("Failed to create a ruleset"); return 1; } - if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) { + + if (populate_ruleset_fs(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) { + goto err_close_ruleset; + } + if (populate_ruleset_fs(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) { goto err_close_ruleset; } - if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) { + + if (populate_ruleset_net(ENV_TCP_BIND_NAME, ruleset_fd, + LANDLOCK_ACCESS_NET_BIND_TCP)) { goto err_close_ruleset; } + if (populate_ruleset_net(ENV_TCP_CONNECT_NAME, ruleset_fd, + LANDLOCK_ACCESS_NET_CONNECT_TCP)) { + goto err_close_ruleset; + } + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("Failed to restrict privileges"); goto err_close_ruleset; } - if (landlock_restrict_self(ruleset_fd, 0)) { + if (landlock_restrict_self(ruleset_fd, set_restrict_flags)) { perror("Failed to enforce ruleset"); goto err_close_ruleset; } @@ -298,6 +525,7 @@ int main(const int argc, char *const argv[], char *const *const envp) cmd_path = argv[1]; cmd_argv = argv + 1; + fprintf(stderr, "Executing the sandboxed command...\n"); execvpe(cmd_path, cmd_argv, envp); fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path, strerror(errno)); diff --git a/samples/livepatch/livepatch-callbacks-busymod.c b/samples/livepatch/livepatch-callbacks-busymod.c index 378e2d40271a..fadc2a85cb35 100644 --- a/samples/livepatch/livepatch-callbacks-busymod.c +++ b/samples/livepatch/livepatch-callbacks-busymod.c @@ -44,8 +44,7 @@ static void busymod_work_func(struct work_struct *work) static int livepatch_callbacks_mod_init(void) { pr_info("%s\n", __func__); - schedule_delayed_work(&work, - msecs_to_jiffies(1000 * 0)); + schedule_delayed_work(&work, 0); return 0; } @@ -57,4 +56,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 11c3f4357812..9e69d9caed25 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -192,5 +192,6 @@ static void livepatch_callbacks_demo_exit(void) module_init(livepatch_callbacks_demo_init); module_exit(livepatch_callbacks_demo_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-callbacks-mod.c b/samples/livepatch/livepatch-callbacks-mod.c index 2a074f422a51..d1851b471ad9 100644 --- a/samples/livepatch/livepatch-callbacks-mod.c +++ b/samples/livepatch/livepatch-callbacks-mod.c @@ -38,4 +38,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index cd76d7ebe598..5263a2f31c48 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -66,5 +66,6 @@ static void livepatch_exit(void) module_init(livepatch_init); module_exit(livepatch_exit); +MODULE_DESCRIPTION("Kernel Live Patching Sample Module"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index 6701641bf12d..cbf68ca40097 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -72,8 +72,7 @@ static struct dummy *livepatch_fix1_dummy_alloc(void) if (!d) return NULL; - d->jiffies_expire = jiffies + - msecs_to_jiffies(1000 * EXPIRE_PERIOD); + d->jiffies_expire = jiffies + secs_to_jiffies(EXPIRE_PERIOD); /* * Patch: save the extra memory location into a SV_LEAK shadow @@ -169,5 +168,6 @@ static void livepatch_shadow_fix1_exit(void) module_init(livepatch_shadow_fix1_init); module_exit(livepatch_shadow_fix1_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index 361046a4f10c..b99122cb221f 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -128,5 +128,6 @@ static void livepatch_shadow_fix2_exit(void) module_init(livepatch_shadow_fix2_init); module_exit(livepatch_shadow_fix2_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-mod.c b/samples/livepatch/livepatch-shadow-mod.c index 7e753b0d2fa6..5d83ad5a8118 100644 --- a/samples/livepatch/livepatch-shadow-mod.c +++ b/samples/livepatch/livepatch-shadow-mod.c @@ -101,8 +101,7 @@ static __used noinline struct dummy *dummy_alloc(void) if (!d) return NULL; - d->jiffies_expire = jiffies + - msecs_to_jiffies(1000 * EXPIRE_PERIOD); + d->jiffies_expire = jiffies + secs_to_jiffies(EXPIRE_PERIOD); /* Oops, forgot to save leak! */ leak = kzalloc(sizeof(*leak), GFP_KERNEL); @@ -152,8 +151,7 @@ static void alloc_work_func(struct work_struct *work) list_add(&d->list, &dummy_list); mutex_unlock(&dummy_list_mutex); - schedule_delayed_work(&alloc_dwork, - msecs_to_jiffies(1000 * ALLOC_PERIOD)); + schedule_delayed_work(&alloc_dwork, secs_to_jiffies(ALLOC_PERIOD)); } /* @@ -184,16 +182,13 @@ static void cleanup_work_func(struct work_struct *work) } mutex_unlock(&dummy_list_mutex); - schedule_delayed_work(&cleanup_dwork, - msecs_to_jiffies(1000 * CLEANUP_PERIOD)); + schedule_delayed_work(&cleanup_dwork, secs_to_jiffies(CLEANUP_PERIOD)); } static int livepatch_shadow_mod_init(void) { - schedule_delayed_work(&alloc_dwork, - msecs_to_jiffies(1000 * ALLOC_PERIOD)); - schedule_delayed_work(&cleanup_dwork, - msecs_to_jiffies(1000 * CLEANUP_PERIOD)); + schedule_delayed_work(&alloc_dwork, secs_to_jiffies(ALLOC_PERIOD)); + schedule_delayed_work(&cleanup_dwork, secs_to_jiffies(CLEANUP_PERIOD)); return 0; } diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index 867debd3b912..1d7254bcb44c 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -69,11 +69,11 @@ #include <string.h> #include <fcntl.h> #include <sys/ioctl.h> +#include <sys/time.h> #include <unistd.h> #include <errno.h> #include <stdint.h> #include <stdbool.h> -#include <bits/wordsize.h> #include <linux/mei.h> /***************************************************************************** diff --git a/samples/pfsm/.gitignore b/samples/pfsm/.gitignore new file mode 100644 index 000000000000..f350a030a060 --- /dev/null +++ b/samples/pfsm/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +/pfsm-wakeup diff --git a/samples/pfsm/Makefile b/samples/pfsm/Makefile new file mode 100644 index 000000000000..213e8d9f5dbc --- /dev/null +++ b/samples/pfsm/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +userprogs-always-y += pfsm-wakeup + +userccflags += -I usr/include diff --git a/samples/pfsm/pfsm-wakeup.c b/samples/pfsm/pfsm-wakeup.c new file mode 100644 index 000000000000..299dd9e1f607 --- /dev/null +++ b/samples/pfsm/pfsm-wakeup.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * TPS6594 PFSM userspace example + * + * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/ + * + * This example shows how to use PFSMs from a userspace application, + * on TI j721s2 platform. The PMIC is armed to be triggered by a RTC + * alarm to execute state transition (RETENTION to ACTIVE). + */ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <linux/rtc.h> +#include <linux/tps6594_pfsm.h> + +#define ALARM_DELTA_SEC 30 + +#define RTC_A "/dev/rtc0" + +#define PMIC_NB 3 +#define PMIC_A "/dev/pfsm-0-0x48" +#define PMIC_B "/dev/pfsm-0-0x4c" +#define PMIC_C "/dev/pfsm-2-0x58" + +static const char * const dev_pfsm[] = {PMIC_A, PMIC_B, PMIC_C}; + +int main(int argc, char *argv[]) +{ + int i, ret, fd_rtc, fd_pfsm[PMIC_NB] = { 0 }; + struct rtc_time rtc_tm; + struct pmic_state_opt pmic_opt = { 0 }; + unsigned long data; + + fd_rtc = open(RTC_A, O_RDONLY); + if (fd_rtc < 0) { + perror("Failed to open RTC device."); + goto out; + } + + for (i = 0 ; i < PMIC_NB ; i++) { + fd_pfsm[i] = open(dev_pfsm[i], O_RDWR); + if (fd_pfsm[i] < 0) { + perror("Failed to open PFSM device."); + goto out; + } + } + + /* Read RTC date/time */ + ret = ioctl(fd_rtc, RTC_RD_TIME, &rtc_tm); + if (ret < 0) { + perror("Failed to read RTC date/time."); + goto out; + } + printf("Current RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Set RTC alarm to ALARM_DELTA_SEC sec in the future, and check for rollover */ + rtc_tm.tm_sec += ALARM_DELTA_SEC; + if (rtc_tm.tm_sec >= 60) { + rtc_tm.tm_sec %= 60; + rtc_tm.tm_min++; + } + if (rtc_tm.tm_min == 60) { + rtc_tm.tm_min = 0; + rtc_tm.tm_hour++; + } + if (rtc_tm.tm_hour == 24) + rtc_tm.tm_hour = 0; + ret = ioctl(fd_rtc, RTC_ALM_SET, &rtc_tm); + if (ret < 0) { + perror("Failed to set RTC alarm."); + goto out; + } + + /* Enable alarm interrupts */ + ret = ioctl(fd_rtc, RTC_AIE_ON, 0); + if (ret < 0) { + perror("Failed to enable alarm interrupts."); + goto out; + } + printf("Waiting %d seconds for alarm...\n", ALARM_DELTA_SEC); + + /* + * Set RETENTION state with options for PMIC_C/B/A respectively. + * Since PMIC_A is master, it should be the last one to be configured. + */ + pmic_opt.ddr_retention = 1; + for (i = PMIC_NB - 1 ; i >= 0 ; i--) { + printf("Set RETENTION state for PMIC_%d.\n", i); + sleep(1); + ret = ioctl(fd_pfsm[i], PMIC_SET_RETENTION_STATE, &pmic_opt); + if (ret < 0) { + perror("Failed to set RETENTION state."); + goto out_reset; + } + } + + /* This blocks until the alarm ring causes an interrupt */ + ret = read(fd_rtc, &data, sizeof(unsigned long)); + if (ret < 0) + perror("Failed to get RTC alarm."); + else + puts("Alarm rang.\n"); + +out_reset: + ioctl(fd_rtc, RTC_AIE_OFF, 0); + + /* Set ACTIVE state for PMIC_A */ + ioctl(fd_pfsm[0], PMIC_SET_ACTIVE_STATE, 0); + +out: + for (i = 0 ; i < PMIC_NB ; i++) + if (fd_pfsm[i]) + close(fd_pfsm[i]); + + if (fd_rtc) + close(fd_rtc); + + return 0; +} diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index dd4e53ae9b73..c08cefb8eb1f 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -108,12 +108,13 @@ function pgset() { fi } -if [[ -z "$APPEND" ]]; then - if [[ $EUID -eq 0 ]]; then - # Cleanup pktgen setup on exit if thats not "append mode" - trap 'pg_ctrl "reset"' EXIT - fi -fi +function trap_exit() +{ + # Cleanup pktgen setup on exit if thats not "append mode" + if [[ -z "$APPEND" ]] && [[ $EUID -eq 0 ]]; then + trap 'pg_ctrl "reset"' EXIT + fi +} ## -- General shell tricks -- diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index 99ec0688b044..b4328db4a164 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -33,6 +33,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Using invalid DST_MAC will cause the packets to get dropped in # ip_rcv() which is part of the test if [ -z "$DEST_IP" ]; then diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index 04b0dd0c36d6..f2beb512c5cd 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -14,6 +14,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" fi diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index 09a92ea963f9..66cb707479e6 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -13,6 +13,10 @@ root_check_run_with_sudo "$@" # - go look in parameters.sh to see which setting are avail # - required param is the interface "-i" stored in $DEV source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then @@ -72,7 +76,7 @@ if [ -n "$DST_PORT" ]; then pg_set $DEV "udp_dst_max $UDP_DST_MAX" fi -[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM" +[ ! -z "$UDP_CSUM" ] && pg_set $DEV "flag UDPCSUM" # Setup random UDP port src range pg_set $DEV "flag UDPSRC_RND" diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index 7fa41c84c32f..93f33d7d0a81 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -14,6 +14,9 @@ root_check_run_with_sudo "$@" # Required param: -i dev in $DEV source ${basedir}/parameters.sh +# Trap EXIT first +trap_exit + [ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely # Base Config diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index 8bf2fdffba16..8f8ed1ac46a0 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -25,6 +25,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index cff51f861506..65ed486ce4f1 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -12,6 +12,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 3578d0aa4ac5..bcbc386b2284 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -16,6 +16,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh index 264cc5db9c49..0c5409cb5bab 100755 --- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh +++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh @@ -14,6 +14,9 @@ root_check_run_with_sudo "$@" # Required param: -i dev in $DEV source ${basedir}/parameters.sh +# Trap EXIT first +trap_exit + # Base Config [ -z "$COUNT" ] && COUNT="20000000" # Zero means indefinitely [ -z "$CLONE_SKB" ] && CLONE_SKB="0" diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c index c045e3d24326..d1814582319b 100644 --- a/samples/qmi/qmi_sample_client.c +++ b/samples/qmi/qmi_sample_client.c @@ -468,7 +468,7 @@ static int qmi_sample_probe(struct platform_device *pdev) return ret; sq = dev_get_platdata(&pdev->dev); - ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq, + ret = kernel_connect(sample->qmi.sock, (struct sockaddr_unsized *)sq, sizeof(*sq), 0); if (ret < 0) { pr_err("failed to connect to remote service port\n"); @@ -511,7 +511,7 @@ err_release_qmi_handle: return ret; } -static int qmi_sample_remove(struct platform_device *pdev) +static void qmi_sample_remove(struct platform_device *pdev) { struct qmi_sample *sample = platform_get_drvdata(pdev); @@ -520,8 +520,6 @@ static int qmi_sample_remove(struct platform_device *pdev) debugfs_remove(sample->de_dir); qmi_handle_release(&sample->qmi); - - return 0; } static struct platform_driver qmi_sample_driver = { diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig index b0f74a81c8f9..3efa51bfc8ef 100644 --- a/samples/rust/Kconfig +++ b/samples/rust/Kconfig @@ -10,6 +10,17 @@ menuconfig SAMPLES_RUST if SAMPLES_RUST +config SAMPLE_RUST_CONFIGFS + tristate "Configfs sample" + depends on CONFIGFS_FS + help + This option builds the Rust configfs sample. + + To compile this as a module, choose M here: + the module will be called rust_configfs. + + If unsure, say N. + config SAMPLE_RUST_MINIMAL tristate "Minimal" help @@ -20,6 +31,16 @@ config SAMPLE_RUST_MINIMAL If unsure, say N. +config SAMPLE_RUST_MISC_DEVICE + tristate "Misc device" + help + This option builds the Rust misc device. + + To compile this as a module, choose M here: + the module will be called rust_misc_device. + + If unsure, say N. + config SAMPLE_RUST_PRINT tristate "Printing macros" help @@ -30,6 +51,116 @@ config SAMPLE_RUST_PRINT If unsure, say N. +config SAMPLE_RUST_DMA + tristate "DMA Test Driver" + depends on PCI + help + This option builds the Rust DMA Test driver sample. + + To compile this as a module, choose M here: + the module will be called rust_dma. + + If unsure, say N. + +config SAMPLE_RUST_DEBUGFS + tristate "DebugFS Test Module" + depends on DEBUG_FS + help + This option builds the Rust DebugFS Test module sample. + + To compile this as a module, choose M here: + the module will be called rust_debugfs. + + If unsure, say N. + +config SAMPLE_RUST_DEBUGFS_SCOPED + tristate "Scoped DebugFS Test Module" + depends on DEBUG_FS + help + This option builds the Rust Scoped DebugFS Test module sample. + + To compile this as a module, choose M here: + the module will be called rust_debugfs_scoped. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_I2C + tristate "I2C Driver" + depends on I2C=y + help + This option builds the Rust I2C driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_i2c. + + If unsure, say N. + +config SAMPLE_RUST_I2C_CLIENT + tristate "I2C Client Registration" + depends on I2C=y + help + This option builds the Rust I2C client manual creation + sample. + + To compile this as a module, choose M here: + the module will be called rust_i2c_client. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_PCI + tristate "PCI Driver" + depends on PCI + help + This option builds the Rust PCI driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_pci. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_PLATFORM + tristate "Platform Driver" + help + This option builds the Rust Platform driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_platform. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_USB + tristate "USB Driver" + depends on USB = y + help + This option builds the Rust USB driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_usb. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_FAUX + tristate "Faux Driver" + help + This option builds the Rust Faux driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_faux. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_AUXILIARY + tristate "Auxiliary Driver" + depends on PCI + select AUXILIARY_BUS + help + This option builds the Rust auxiliary driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_auxiliary. + + If unsure, say N. + config SAMPLE_RUST_HOSTPROGS bool "Host programs" help diff --git a/samples/rust/Makefile b/samples/rust/Makefile index 03086dabbea4..f65885d1d62b 100644 --- a/samples/rust/Makefile +++ b/samples/rust/Makefile @@ -1,6 +1,21 @@ # SPDX-License-Identifier: GPL-2.0 +ccflags-y += -I$(src) # needed for trace events obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o +obj-$(CONFIG_SAMPLE_RUST_MISC_DEVICE) += rust_misc_device.o obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o +obj-$(CONFIG_SAMPLE_RUST_DEBUGFS) += rust_debugfs.o +obj-$(CONFIG_SAMPLE_RUST_DEBUGFS_SCOPED) += rust_debugfs_scoped.o +obj-$(CONFIG_SAMPLE_RUST_DMA) += rust_dma.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_I2C) += rust_driver_i2c.o +obj-$(CONFIG_SAMPLE_RUST_I2C_CLIENT) += rust_i2c_client.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_USB) += rust_driver_usb.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_AUXILIARY) += rust_driver_auxiliary.o +obj-$(CONFIG_SAMPLE_RUST_CONFIGFS) += rust_configfs.o + +rust_print-y := rust_print_main.o rust_print_events.o subdir-$(CONFIG_SAMPLE_RUST_HOSTPROGS) += hostprogs diff --git a/samples/rust/rust_configfs.rs b/samples/rust/rust_configfs.rs new file mode 100644 index 000000000000..0ccc7553ef39 --- /dev/null +++ b/samples/rust/rust_configfs.rs @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust configfs sample. + +use kernel::alloc::flags; +use kernel::c_str; +use kernel::configfs; +use kernel::configfs::configfs_attrs; +use kernel::new_mutex; +use kernel::page::PAGE_SIZE; +use kernel::prelude::*; +use kernel::sync::Mutex; + +module! { + type: RustConfigfs, + name: "rust_configfs", + authors: ["Rust for Linux Contributors"], + description: "Rust configfs sample", + license: "GPL", +} + +#[pin_data] +struct RustConfigfs { + #[pin] + config: configfs::Subsystem<Configuration>, +} + +#[pin_data] +struct Configuration { + message: &'static CStr, + #[pin] + bar: Mutex<(KBox<[u8; PAGE_SIZE]>, usize)>, +} + +impl Configuration { + fn new() -> impl PinInit<Self, Error> { + try_pin_init!(Self { + message: c_str!("Hello World\n"), + bar <- new_mutex!((KBox::new([0; PAGE_SIZE], flags::GFP_KERNEL)?, 0)), + }) + } +} + +impl kernel::InPlaceModule for RustConfigfs { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Rust configfs sample (init)\n"); + + // Define a subsystem with the data type `Configuration`, two + // attributes, `message` and `bar` and child group type `Child`. `mkdir` + // in the directory representing this subsystem will create directories + // backed by the `Child` type. + let item_type = configfs_attrs! { + container: configfs::Subsystem<Configuration>, + data: Configuration, + child: Child, + attributes: [ + message: 0, + bar: 1, + ], + }; + + try_pin_init!(Self { + config <- configfs::Subsystem::new( + c_str!("rust_configfs"), item_type, Configuration::new() + ), + }) + } +} + +#[vtable] +impl configfs::GroupOperations for Configuration { + type Child = Child; + + fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<Child>, Error>> { + // Define a group with data type `Child`, one attribute `baz` and child + // group type `GrandChild`. `mkdir` in the directory representing this + // group will create directories backed by the `GrandChild` type. + let tpe = configfs_attrs! { + container: configfs::Group<Child>, + data: Child, + child: GrandChild, + attributes: [ + baz: 0, + ], + }; + + Ok(configfs::Group::new(name.try_into()?, tpe, Child::new())) + } +} + +#[vtable] +impl configfs::AttributeOperations<0> for Configuration { + type Data = Configuration; + + fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show message\n"); + let data = container.message.to_bytes(); + page[0..data.len()].copy_from_slice(data); + Ok(data.len()) + } +} + +#[vtable] +impl configfs::AttributeOperations<1> for Configuration { + type Data = Configuration; + + fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show bar\n"); + let guard = container.bar.lock(); + let data = guard.0.as_slice(); + let len = guard.1; + page[0..len].copy_from_slice(&data[0..len]); + Ok(len) + } + + fn store(container: &Configuration, page: &[u8]) -> Result { + pr_info!("Store bar\n"); + let mut guard = container.bar.lock(); + guard.0[0..page.len()].copy_from_slice(page); + guard.1 = page.len(); + Ok(()) + } +} + +// `pin_data` cannot handle structs without braces. +#[pin_data] +struct Child {} + +impl Child { + fn new() -> impl PinInit<Self, Error> { + try_pin_init!(Self {}) + } +} + +#[vtable] +impl configfs::GroupOperations for Child { + type Child = GrandChild; + + fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<GrandChild>, Error>> { + // Define a group with data type `GrandChild`, one attribute `gc`. As no + // child type is specified, it will not be possible to create subgroups + // in this group, and `mkdir`in the directory representing this group + // will return an error. + let tpe = configfs_attrs! { + container: configfs::Group<GrandChild>, + data: GrandChild, + attributes: [ + gc: 0, + ], + }; + + Ok(configfs::Group::new( + name.try_into()?, + tpe, + GrandChild::new(), + )) + } +} + +#[vtable] +impl configfs::AttributeOperations<0> for Child { + type Data = Child; + + fn show(_container: &Child, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show baz\n"); + let data = c"Hello Baz\n".to_bytes(); + page[0..data.len()].copy_from_slice(data); + Ok(data.len()) + } +} + +// `pin_data` cannot handle structs without braces. +#[pin_data] +struct GrandChild {} + +impl GrandChild { + fn new() -> impl PinInit<Self, Error> { + try_pin_init!(Self {}) + } +} + +#[vtable] +impl configfs::AttributeOperations<0> for GrandChild { + type Data = GrandChild; + + fn show(_container: &GrandChild, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show grand child\n"); + let data = c"Hello GC\n".to_bytes(); + page[0..data.len()].copy_from_slice(data); + Ok(data.len()) + } +} diff --git a/samples/rust/rust_debugfs.rs b/samples/rust/rust_debugfs.rs new file mode 100644 index 000000000000..025e8f9d12de --- /dev/null +++ b/samples/rust/rust_debugfs.rs @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! Sample DebugFS exporting platform driver +//! +//! To successfully probe this driver with ACPI, use an ssdt that looks like +//! +//! ```dsl +//! DefinitionBlock ("", "SSDT", 2, "TEST", "VIRTACPI", 0x00000001) +//! { +//! Scope (\_SB) +//! { +//! Device (T432) +//! { +//! Name (_HID, "LNUXBEEF") // ACPI hardware ID to match +//! Name (_UID, 1) +//! Name (_STA, 0x0F) // Device present, enabled +//! Name (_DSD, Package () { // Sample attribute +//! ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), +//! Package() { +//! Package(2) {"compatible", "sample-debugfs"} +//! } +//! }) +//! Name (_CRS, ResourceTemplate () +//! { +//! Memory32Fixed (ReadWrite, 0xFED00000, 0x1000) +//! }) +//! } +//! } +//! } +//! ``` + +use core::str::FromStr; +use kernel::c_str; +use kernel::debugfs::{Dir, File}; +use kernel::new_mutex; +use kernel::prelude::*; +use kernel::sizes::*; +use kernel::sync::atomic::{Atomic, Relaxed}; +use kernel::sync::Mutex; +use kernel::{acpi, device::Core, of, platform, str::CString, types::ARef}; + +kernel::module_platform_driver! { + type: RustDebugFs, + name: "rust_debugfs", + authors: ["Matthew Maurer"], + description: "Rust DebugFS usage sample", + license: "GPL", +} + +#[pin_data] +struct RustDebugFs { + pdev: ARef<platform::Device>, + // As we only hold these for drop effect (to remove the directory/files) we have a leading + // underscore to indicate to the compiler that we don't expect to use this field directly. + _debugfs: Dir, + #[pin] + _compatible: File<CString>, + #[pin] + counter: File<Atomic<usize>>, + #[pin] + inner: File<Mutex<Inner>>, + #[pin] + array_blob: File<Mutex<[u8; 4]>>, + #[pin] + vector_blob: File<Mutex<KVec<u8>>>, +} + +#[derive(Debug)] +struct Inner { + x: u32, + y: u32, +} + +impl FromStr for Inner { + type Err = Error; + fn from_str(s: &str) -> Result<Self> { + let mut parts = s.split_whitespace(); + let x = parts + .next() + .ok_or(EINVAL)? + .parse::<u32>() + .map_err(|_| EINVAL)?; + let y = parts + .next() + .ok_or(EINVAL)? + .parse::<u32>() + .map_err(|_| EINVAL)?; + if parts.next().is_some() { + return Err(EINVAL); + } + Ok(Inner { x, y }) + } +} + +kernel::acpi_device_table!( + ACPI_TABLE, + MODULE_ACPI_TABLE, + <RustDebugFs as platform::Driver>::IdInfo, + [(acpi::DeviceId::new(c_str!("LNUXBEEF")), ())] +); + +impl platform::Driver for RustDebugFs { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None; + const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + _info: Option<&Self::IdInfo>, + ) -> impl PinInit<Self, Error> { + RustDebugFs::new(pdev).pin_chain(|this| { + this.counter.store(91, Relaxed); + { + let mut guard = this.inner.lock(); + guard.x = guard.y; + guard.y = 42; + } + + Ok(()) + }) + } +} + +impl RustDebugFs { + fn build_counter(dir: &Dir) -> impl PinInit<File<Atomic<usize>>> + '_ { + dir.read_write_file(c_str!("counter"), Atomic::<usize>::new(0)) + } + + fn build_inner(dir: &Dir) -> impl PinInit<File<Mutex<Inner>>> + '_ { + dir.read_write_file(c_str!("pair"), new_mutex!(Inner { x: 3, y: 10 })) + } + + fn new(pdev: &platform::Device<Core>) -> impl PinInit<Self, Error> + '_ { + let debugfs = Dir::new(c_str!("sample_debugfs")); + let dev = pdev.as_ref(); + + try_pin_init! { + Self { + _compatible <- debugfs.read_only_file( + c_str!("compatible"), + dev.fwnode() + .ok_or(ENOENT)? + .property_read::<CString>(c_str!("compatible")) + .required_by(dev)?, + ), + counter <- Self::build_counter(&debugfs), + inner <- Self::build_inner(&debugfs), + array_blob <- debugfs.read_write_binary_file( + c_str!("array_blob"), + new_mutex!([0x62, 0x6c, 0x6f, 0x62]), + ), + vector_blob <- debugfs.read_write_binary_file( + c_str!("vector_blob"), + new_mutex!(kernel::kvec!(0x42; SZ_4K)?), + ), + _debugfs: debugfs, + pdev: pdev.into(), + } + } + } +} diff --git a/samples/rust/rust_debugfs_scoped.rs b/samples/rust/rust_debugfs_scoped.rs new file mode 100644 index 000000000000..702a6546d3fb --- /dev/null +++ b/samples/rust/rust_debugfs_scoped.rs @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! Sample DebugFS exporting platform driver that demonstrates the use of +//! `Scope::dir` to create a variety of files without the need to separately +//! track them all. + +use kernel::debugfs::{Dir, Scope}; +use kernel::prelude::*; +use kernel::sizes::*; +use kernel::sync::atomic::Atomic; +use kernel::sync::Mutex; +use kernel::{c_str, new_mutex, str::CString}; + +module! { + type: RustScopedDebugFs, + name: "rust_debugfs_scoped", + authors: ["Matthew Maurer"], + description: "Rust Scoped DebugFS usage sample", + license: "GPL", +} + +fn remove_file_write( + mod_data: &ModuleData, + reader: &mut kernel::uaccess::UserSliceReader, +) -> Result { + let mut buf = [0u8; 128]; + if reader.len() >= buf.len() { + return Err(EINVAL); + } + let n = reader.len(); + reader.read_slice(&mut buf[..n])?; + + let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?.trim(); + let nul_idx = s.len(); + buf[nul_idx] = 0; + let to_remove = CStr::from_bytes_with_nul(&buf[..nul_idx + 1]).map_err(|_| EINVAL)?; + mod_data + .devices + .lock() + .retain(|device| device.name.to_bytes() != to_remove.to_bytes()); + Ok(()) +} + +fn create_file_write( + mod_data: &ModuleData, + reader: &mut kernel::uaccess::UserSliceReader, +) -> Result { + let mut buf = [0u8; 128]; + if reader.len() > buf.len() { + return Err(EINVAL); + } + let n = reader.len(); + reader.read_slice(&mut buf[..n])?; + + let mut nums = KVec::new(); + + let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?.trim(); + let mut items = s.split_whitespace(); + let name_str = items.next().ok_or(EINVAL)?; + let name = CString::try_from_fmt(fmt!("{name_str}"))?; + let file_name = CString::try_from_fmt(fmt!("{name_str}"))?; + for sub in items { + nums.push( + Atomic::<usize>::new(sub.parse().map_err(|_| EINVAL)?), + GFP_KERNEL, + )?; + } + let blob = KBox::pin_init(new_mutex!([0x42; SZ_4K]), GFP_KERNEL)?; + + let scope = KBox::pin_init( + mod_data.device_dir.scope( + DeviceData { name, nums, blob }, + &file_name, + |dev_data, dir| { + for (idx, val) in dev_data.nums.iter().enumerate() { + let Ok(name) = CString::try_from_fmt(fmt!("{idx}")) else { + return; + }; + dir.read_write_file(&name, val); + } + dir.read_write_binary_file(c_str!("blob"), &dev_data.blob); + }, + ), + GFP_KERNEL, + )?; + (*mod_data.devices.lock()).push(scope, GFP_KERNEL)?; + + Ok(()) +} + +struct RustScopedDebugFs { + _data: Pin<KBox<Scope<ModuleData>>>, +} + +#[pin_data] +struct ModuleData { + device_dir: Dir, + #[pin] + devices: Mutex<KVec<Pin<KBox<Scope<DeviceData>>>>>, +} + +impl ModuleData { + fn init(device_dir: Dir) -> impl PinInit<Self> { + pin_init! { + Self { + device_dir: device_dir, + devices <- new_mutex!(KVec::new()) + } + } + } +} + +struct DeviceData { + name: CString, + nums: KVec<Atomic<usize>>, + blob: Pin<KBox<Mutex<[u8; SZ_4K]>>>, +} + +fn init_control(base_dir: &Dir, dyn_dirs: Dir) -> impl PinInit<Scope<ModuleData>> + '_ { + base_dir.scope( + ModuleData::init(dyn_dirs), + c_str!("control"), + |data, dir| { + dir.write_only_callback_file(c_str!("create"), data, &create_file_write); + dir.write_only_callback_file(c_str!("remove"), data, &remove_file_write); + }, + ) +} + +impl kernel::Module for RustScopedDebugFs { + fn init(_module: &'static kernel::ThisModule) -> Result<Self> { + let base_dir = Dir::new(c_str!("rust_scoped_debugfs")); + let dyn_dirs = base_dir.subdir(c_str!("dynamic")); + Ok(Self { + _data: KBox::pin_init(init_control(&base_dir, dyn_dirs), GFP_KERNEL)?, + }) + } +} diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs new file mode 100644 index 000000000000..f53bce2a73e3 --- /dev/null +++ b/samples/rust/rust_dma.rs @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust DMA api test (based on QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{ + device::Core, + dma::{CoherentAllocation, DataDirection, Device, DmaMask}, + page, pci, + prelude::*, + scatterlist::{Owned, SGTable}, + sync::aref::ARef, +}; + +#[pin_data(PinnedDrop)] +struct DmaSampleDriver { + pdev: ARef<pci::Device>, + ca: CoherentAllocation<MyStruct>, + #[pin] + sgt: SGTable<Owned<VVec<u8>>>, +} + +const TEST_VALUES: [(u32, u32); 5] = [ + (0xa, 0xb), + (0xc, 0xd), + (0xe, 0xf), + (0xab, 0xba), + (0xcd, 0xef), +]; + +struct MyStruct { + h: u32, + b: u32, +} + +impl MyStruct { + fn new(h: u32, b: u32) -> Self { + Self { h, b } + } +} +// SAFETY: All bit patterns are acceptable values for `MyStruct`. +unsafe impl kernel::transmute::AsBytes for MyStruct {} +// SAFETY: Instances of `MyStruct` have no uninitialized portions. +unsafe impl kernel::transmute::FromBytes for MyStruct {} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <DmaSampleDriver as pci::Driver>::IdInfo, + [(pci::DeviceId::from_id(pci::Vendor::REDHAT, 0x5), ())] +); + +impl pci::Driver for DmaSampleDriver { + type IdInfo = (); + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> { + pin_init::pin_init_scope(move || { + dev_info!(pdev.as_ref(), "Probe DMA test driver.\n"); + + let mask = DmaMask::new::<64>(); + + // SAFETY: There are no concurrent calls to DMA allocation and mapping primitives. + unsafe { pdev.dma_set_mask_and_coherent(mask)? }; + + let ca: CoherentAllocation<MyStruct> = + CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; + + for (i, value) in TEST_VALUES.into_iter().enumerate() { + kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?; + } + + let size = 4 * page::PAGE_SIZE; + let pages = VVec::with_capacity(size, GFP_KERNEL)?; + + let sgt = SGTable::new(pdev.as_ref(), pages, DataDirection::ToDevice, GFP_KERNEL); + + Ok(try_pin_init!(Self { + pdev: pdev.into(), + ca, + sgt <- sgt, + })) + }) + } +} + +#[pinned_drop] +impl PinnedDrop for DmaSampleDriver { + fn drop(self: Pin<&mut Self>) { + let dev = self.pdev.as_ref(); + + dev_info!(dev, "Unload DMA test driver.\n"); + + for (i, value) in TEST_VALUES.into_iter().enumerate() { + let val0 = kernel::dma_read!(self.ca[i].h); + let val1 = kernel::dma_read!(self.ca[i].b); + assert!(val0.is_ok()); + assert!(val1.is_ok()); + + if let Ok(val0) = val0 { + assert_eq!(val0, value.0); + } + if let Ok(val1) = val1 { + assert_eq!(val1, value.1); + } + } + + for (i, entry) in self.sgt.iter().enumerate() { + dev_info!(dev, "Entry[{}]: DMA address: {:#x}", i, entry.dma_address()); + } + } +} + +kernel::module_pci_driver! { + type: DmaSampleDriver, + name: "rust_dma", + authors: ["Abdiel Janulgue"], + description: "Rust DMA test", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs new file mode 100644 index 000000000000..5761ea314f44 --- /dev/null +++ b/samples/rust/rust_driver_auxiliary.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust auxiliary driver sample (based on a PCI driver for QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{ + auxiliary, c_str, + device::{Bound, Core}, + devres::Devres, + driver, + error::Error, + pci, + prelude::*, + InPlaceModule, +}; + +use core::any::TypeId; +use pin_init::PinInit; + +const MODULE_NAME: &CStr = <LocalModule as kernel::ModuleMetadata>::NAME; +const AUXILIARY_NAME: &CStr = c_str!("auxiliary"); + +struct AuxiliaryDriver; + +kernel::auxiliary_device_table!( + AUX_TABLE, + MODULE_AUX_TABLE, + <AuxiliaryDriver as auxiliary::Driver>::IdInfo, + [(auxiliary::DeviceId::new(MODULE_NAME, AUXILIARY_NAME), ())] +); + +impl auxiliary::Driver for AuxiliaryDriver { + type IdInfo = (); + + const ID_TABLE: auxiliary::IdTable<Self::IdInfo> = &AUX_TABLE; + + fn probe(adev: &auxiliary::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> { + dev_info!( + adev.as_ref(), + "Probing auxiliary driver for auxiliary device with id={}\n", + adev.id() + ); + + ParentDriver::connect(adev)?; + + Ok(Self) + } +} + +#[pin_data] +struct ParentDriver { + private: TypeId, + #[pin] + _reg0: Devres<auxiliary::Registration>, + #[pin] + _reg1: Devres<auxiliary::Registration>, +} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <ParentDriver as pci::Driver>::IdInfo, + [(pci::DeviceId::from_id(pci::Vendor::REDHAT, 0x5), ())] +); + +impl pci::Driver for ParentDriver { + type IdInfo = (); + + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> { + try_pin_init!(Self { + private: TypeId::of::<Self>(), + _reg0 <- auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 0, MODULE_NAME), + _reg1 <- auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 1, MODULE_NAME), + }) + } +} + +impl ParentDriver { + fn connect(adev: &auxiliary::Device<Bound>) -> Result { + let dev = adev.parent(); + let pdev: &pci::Device<Bound> = dev.try_into()?; + let drvdata = dev.drvdata::<Self>()?; + + dev_info!( + dev, + "Connect auxiliary {} with parent: VendorID={}, DeviceID={:#x}\n", + adev.id(), + pdev.vendor_id(), + pdev.device_id() + ); + + dev_info!( + dev, + "We have access to the private data of {:?}.\n", + drvdata.private + ); + + Ok(()) + } +} + +#[pin_data] +struct SampleModule { + #[pin] + _pci_driver: driver::Registration<pci::Adapter<ParentDriver>>, + #[pin] + _aux_driver: driver::Registration<auxiliary::Adapter<AuxiliaryDriver>>, +} + +impl InPlaceModule for SampleModule { + fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> { + try_pin_init!(Self { + _pci_driver <- driver::Registration::new(MODULE_NAME, module), + _aux_driver <- driver::Registration::new(MODULE_NAME, module), + }) + } +} + +module! { + type: SampleModule, + name: "rust_driver_auxiliary", + authors: ["Danilo Krummrich"], + description: "Rust auxiliary driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs new file mode 100644 index 000000000000..ecc9fd378cbd --- /dev/null +++ b/samples/rust/rust_driver_faux.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +//! Rust faux device sample. + +use kernel::{c_str, faux, prelude::*, Module}; + +module! { + type: SampleModule, + name: "rust_faux_driver", + authors: ["Lyude Paul"], + description: "Rust faux device sample", + license: "GPL", +} + +struct SampleModule { + _reg: faux::Registration, +} + +impl Module for SampleModule { + fn init(_module: &'static ThisModule) -> Result<Self> { + pr_info!("Initialising Rust Faux Device Sample\n"); + + let reg = faux::Registration::new(c_str!("rust-faux-sample-device"), None)?; + + dev_info!(reg.as_ref(), "Hello from faux device!\n"); + + Ok(Self { _reg: reg }) + } +} diff --git a/samples/rust/rust_driver_i2c.rs b/samples/rust/rust_driver_i2c.rs new file mode 100644 index 000000000000..ecefeca3e22f --- /dev/null +++ b/samples/rust/rust_driver_i2c.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust I2C driver sample. + +use kernel::{ + acpi, + c_str, + device::Core, + i2c, + of, + prelude::*, // +}; + +struct SampleDriver; + +kernel::acpi_device_table! { + ACPI_TABLE, + MODULE_ACPI_TABLE, + <SampleDriver as i2c::Driver>::IdInfo, + [(acpi::DeviceId::new(c_str!("LNUXBEEF")), 0)] +} + +kernel::i2c_device_table! { + I2C_TABLE, + MODULE_I2C_TABLE, + <SampleDriver as i2c::Driver>::IdInfo, + [(i2c::DeviceId::new(c_str!("rust_driver_i2c")), 0)] +} + +kernel::of_device_table! { + OF_TABLE, + MODULE_OF_TABLE, + <SampleDriver as i2c::Driver>::IdInfo, + [(of::DeviceId::new(c_str!("test,rust_driver_i2c")), 0)] +} + +impl i2c::Driver for SampleDriver { + type IdInfo = u32; + + const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE); + const I2C_ID_TABLE: Option<i2c::IdTable<Self::IdInfo>> = Some(&I2C_TABLE); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe( + idev: &i2c::I2cClient<Core>, + info: Option<&Self::IdInfo>, + ) -> impl PinInit<Self, Error> { + let dev = idev.as_ref(); + + dev_info!(dev, "Probe Rust I2C driver sample.\n"); + + if let Some(info) = info { + dev_info!(dev, "Probed with info: '{}'.\n", info); + } + + Ok(Self) + } + + fn shutdown(idev: &i2c::I2cClient<Core>, _this: Pin<&Self>) { + dev_info!(idev.as_ref(), "Shutdown Rust I2C driver sample.\n"); + } + + fn unbind(idev: &i2c::I2cClient<Core>, _this: Pin<&Self>) { + dev_info!(idev.as_ref(), "Unbind Rust I2C driver sample.\n"); + } +} + +kernel::module_i2c_driver! { + type: SampleDriver, + name: "rust_driver_i2c", + authors: ["Igor Korotin"], + description: "Rust I2C driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs new file mode 100644 index 000000000000..5823787bea8e --- /dev/null +++ b/samples/rust/rust_driver_pci.rs @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust PCI driver sample (based on QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{c_str, device::Core, devres::Devres, pci, prelude::*, sync::aref::ARef}; + +struct Regs; + +impl Regs { + const TEST: usize = 0x0; + const OFFSET: usize = 0x4; + const DATA: usize = 0x8; + const COUNT: usize = 0xC; + const END: usize = 0x10; +} + +type Bar0 = pci::Bar<{ Regs::END }>; + +#[derive(Copy, Clone, Debug)] +struct TestIndex(u8); + +impl TestIndex { + const NO_EVENTFD: Self = Self(0); +} + +#[pin_data(PinnedDrop)] +struct SampleDriver { + pdev: ARef<pci::Device>, + #[pin] + bar: Devres<Bar0>, + index: TestIndex, +} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <SampleDriver as pci::Driver>::IdInfo, + [( + pci::DeviceId::from_id(pci::Vendor::REDHAT, 0x5), + TestIndex::NO_EVENTFD + )] +); + +impl SampleDriver { + fn testdev(index: &TestIndex, bar: &Bar0) -> Result<u32> { + // Select the test. + bar.write8(index.0, Regs::TEST); + + let offset = u32::from_le(bar.read32(Regs::OFFSET)) as usize; + let data = bar.read8(Regs::DATA); + + // Write `data` to `offset` to increase `count` by one. + // + // Note that we need `try_write8`, since `offset` can't be checked at compile-time. + bar.try_write8(data, offset)?; + + Ok(bar.read32(Regs::COUNT)) + } +} + +impl pci::Driver for SampleDriver { + type IdInfo = TestIndex; + + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, info: &Self::IdInfo) -> impl PinInit<Self, Error> { + pin_init::pin_init_scope(move || { + let vendor = pdev.vendor_id(); + dev_dbg!( + pdev.as_ref(), + "Probe Rust PCI driver sample (PCI ID: {}, 0x{:x}).\n", + vendor, + pdev.device_id() + ); + + pdev.enable_device_mem()?; + pdev.set_master(); + + Ok(try_pin_init!(Self { + bar <- pdev.iomap_region_sized::<{ Regs::END }>(0, c_str!("rust_driver_pci")), + index: *info, + _: { + let bar = bar.access(pdev.as_ref())?; + + dev_info!( + pdev.as_ref(), + "pci-testdev data-match count: {}\n", + Self::testdev(info, bar)? + ); + }, + pdev: pdev.into(), + })) + }) + } + + fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { + if let Ok(bar) = this.bar.access(pdev.as_ref()) { + // Reset pci-testdev by writing a new test index. + bar.write8(this.index.0, Regs::TEST); + } + } +} + +#[pinned_drop] +impl PinnedDrop for SampleDriver { + fn drop(self: Pin<&mut Self>) { + dev_dbg!(self.pdev.as_ref(), "Remove Rust PCI driver sample.\n"); + } +} + +kernel::module_pci_driver! { + type: SampleDriver, + name: "rust_driver_pci", + authors: ["Danilo Krummrich"], + description: "Rust PCI driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_platform.rs b/samples/rust/rust_driver_platform.rs new file mode 100644 index 000000000000..6bf4f0c9633d --- /dev/null +++ b/samples/rust/rust_driver_platform.rs @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust Platform driver sample. + +//! ACPI match table test +//! +//! This demonstrates how to test an ACPI-based Rust platform driver using QEMU +//! with a custom SSDT. +//! +//! Steps: +//! +//! 1. **Create an SSDT source file** (`ssdt.dsl`) with the following content: +//! +//! ```asl +//! DefinitionBlock ("", "SSDT", 2, "TEST", "VIRTACPI", 0x00000001) +//! { +//! Scope (\_SB) +//! { +//! Device (T432) +//! { +//! Name (_HID, "LNUXBEEF") // ACPI hardware ID to match +//! Name (_UID, 1) +//! Name (_STA, 0x0F) // Device present, enabled +//! Name (_CRS, ResourceTemplate () +//! { +//! Memory32Fixed (ReadWrite, 0xFED00000, 0x1000) +//! }) +//! } +//! } +//! } +//! ``` +//! +//! 2. **Compile the table**: +//! +//! ```sh +//! iasl -tc ssdt.dsl +//! ``` +//! +//! This generates `ssdt.aml` +//! +//! 3. **Run QEMU** with the compiled AML file: +//! +//! ```sh +//! qemu-system-x86_64 -m 512M \ +//! -enable-kvm \ +//! -kernel path/to/bzImage \ +//! -append "root=/dev/sda console=ttyS0" \ +//! -hda rootfs.img \ +//! -serial stdio \ +//! -acpitable file=ssdt.aml +//! ``` +//! +//! Requirements: +//! - The `rust_driver_platform` must be present either: +//! - built directly into the kernel (`bzImage`), or +//! - available as a `.ko` file and loadable from `rootfs.img` +//! +//! 4. **Verify it worked** by checking `dmesg`: +//! +//! ``` +//! rust_driver_platform LNUXBEEF:00: Probed with info: '0'. +//! ``` +//! + +use kernel::{ + acpi, c_str, + device::{ + self, + property::{FwNodeReferenceArgs, NArgs}, + Core, + }, + of, platform, + prelude::*, + str::CString, + sync::aref::ARef, +}; + +struct SampleDriver { + pdev: ARef<platform::Device>, +} + +struct Info(u32); + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <SampleDriver as platform::Driver>::IdInfo, + [(of::DeviceId::new(c_str!("test,rust-device")), Info(42))] +); + +kernel::acpi_device_table!( + ACPI_TABLE, + MODULE_ACPI_TABLE, + <SampleDriver as platform::Driver>::IdInfo, + [(acpi::DeviceId::new(c_str!("LNUXBEEF")), Info(0))] +); + +impl platform::Driver for SampleDriver { + type IdInfo = Info; + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + info: Option<&Self::IdInfo>, + ) -> impl PinInit<Self, Error> { + let dev = pdev.as_ref(); + + dev_dbg!(dev, "Probe Rust Platform driver sample.\n"); + + if let Some(info) = info { + dev_info!(dev, "Probed with info: '{}'.\n", info.0); + } + + if dev.fwnode().is_some_and(|node| node.is_of_node()) { + Self::properties_parse(dev)?; + } + + Ok(Self { pdev: pdev.into() }) + } +} + +impl SampleDriver { + fn properties_parse(dev: &device::Device) -> Result { + let fwnode = dev.fwnode().ok_or(ENOENT)?; + + if let Ok(idx) = + fwnode.property_match_string(c_str!("compatible"), c_str!("test,rust-device")) + { + dev_info!(dev, "matched compatible string idx = {}\n", idx); + } + + let name = c_str!("compatible"); + let prop = fwnode.property_read::<CString>(name).required_by(dev)?; + dev_info!(dev, "'{name}'='{prop:?}'\n"); + + let name = c_str!("test,bool-prop"); + let prop = fwnode.property_read_bool(c_str!("test,bool-prop")); + dev_info!(dev, "'{name}'='{prop}'\n"); + + if fwnode.property_present(c_str!("test,u32-prop")) { + dev_info!(dev, "'test,u32-prop' is present\n"); + } + + let name = c_str!("test,u32-optional-prop"); + let prop = fwnode.property_read::<u32>(name).or(0x12); + dev_info!(dev, "'{name}'='{prop:#x}' (default = 0x12)\n"); + + // A missing required property will print an error. Discard the error to + // prevent properties_parse from failing in that case. + let name = c_str!("test,u32-required-prop"); + let _ = fwnode.property_read::<u32>(name).required_by(dev); + + let name = c_str!("test,u32-prop"); + let prop: u32 = fwnode.property_read(name).required_by(dev)?; + dev_info!(dev, "'{name}'='{prop:#x}'\n"); + + let name = c_str!("test,i16-array"); + let prop: [i16; 4] = fwnode.property_read(name).required_by(dev)?; + dev_info!(dev, "'{name}'='{prop:?}'\n"); + let len = fwnode.property_count_elem::<u16>(name)?; + dev_info!(dev, "'{name}' length is {len}\n"); + + let name = c_str!("test,i16-array"); + let prop: KVec<i16> = fwnode.property_read_array_vec(name, 4)?.required_by(dev)?; + dev_info!(dev, "'{name}'='{prop:?}' (KVec)\n"); + + for child in fwnode.children() { + let name = c_str!("test,ref-arg"); + let nargs = NArgs::N(2); + let prop: FwNodeReferenceArgs = child.property_get_reference_args(name, nargs, 0)?; + dev_info!(dev, "'{name}'='{prop:?}'\n"); + } + + Ok(()) + } +} + +impl Drop for SampleDriver { + fn drop(&mut self) { + dev_dbg!(self.pdev.as_ref(), "Remove Rust Platform driver sample.\n"); + } +} + +kernel::module_platform_driver! { + type: SampleDriver, + name: "rust_driver_platform", + authors: ["Danilo Krummrich"], + description: "Rust Platform driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_usb.rs b/samples/rust/rust_driver_usb.rs new file mode 100644 index 000000000000..4eaad14867b2 --- /dev/null +++ b/samples/rust/rust_driver_usb.rs @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (C) 2025 Collabora Ltd. + +//! Rust USB driver sample. + +use kernel::{device, device::Core, prelude::*, sync::aref::ARef, usb}; + +struct SampleDriver { + _intf: ARef<usb::Interface>, +} + +kernel::usb_device_table!( + USB_TABLE, + MODULE_USB_TABLE, + <SampleDriver as usb::Driver>::IdInfo, + [(usb::DeviceId::from_id(0x1234, 0x5678), ()),] +); + +impl usb::Driver for SampleDriver { + type IdInfo = (); + const ID_TABLE: usb::IdTable<Self::IdInfo> = &USB_TABLE; + + fn probe( + intf: &usb::Interface<Core>, + _id: &usb::DeviceId, + _info: &Self::IdInfo, + ) -> impl PinInit<Self, Error> { + let dev: &device::Device<Core> = intf.as_ref(); + dev_info!(dev, "Rust USB driver sample probed\n"); + + Ok(Self { _intf: intf.into() }) + } + + fn disconnect(intf: &usb::Interface<Core>, _data: Pin<&Self>) { + let dev: &device::Device<Core> = intf.as_ref(); + dev_info!(dev, "Rust USB driver sample disconnected\n"); + } +} + +kernel::module_usb_driver! { + type: SampleDriver, + name: "rust_driver_usb", + authors: ["Daniel Almeida"], + description: "Rust USB driver sample", + license: "GPL v2", +} diff --git a/samples/rust/rust_i2c_client.rs b/samples/rust/rust_i2c_client.rs new file mode 100644 index 000000000000..f67938396dce --- /dev/null +++ b/samples/rust/rust_i2c_client.rs @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust I2C client registration sample. +//! +//! An I2C client in Rust cannot exist on its own. To register a new I2C client, +//! it must be bound to a parent device. In this sample driver, a platform device +//! is used as the parent. +//! + +//! ACPI match table test +//! +//! This demonstrates how to test an ACPI-based Rust I2C client registration driver +//! using QEMU with a custom SSDT. +//! +//! Steps: +//! +//! 1. **Create an SSDT source file** (`ssdt.dsl`) with the following content: +//! +//! ```asl +//! DefinitionBlock ("", "SSDT", 2, "TEST", "VIRTACPI", 0x00000001) +//! { +//! Scope (\_SB) +//! { +//! Device (T432) +//! { +//! Name (_HID, "LNUXBEEF") // ACPI hardware ID to match +//! Name (_UID, 1) +//! Name (_STA, 0x0F) // Device present, enabled +//! Name (_CRS, ResourceTemplate () +//! { +//! Memory32Fixed (ReadWrite, 0xFED00000, 0x1000) +//! }) +//! } +//! } +//! } +//! ``` +//! +//! 2. **Compile the table**: +//! +//! ```sh +//! iasl -tc ssdt.dsl +//! ``` +//! +//! This generates `ssdt.aml` +//! +//! 3. **Run QEMU** with the compiled AML file: +//! +//! ```sh +//! qemu-system-x86_64 -m 512M \ +//! -enable-kvm \ +//! -kernel path/to/bzImage \ +//! -append "root=/dev/sda console=ttyS0" \ +//! -hda rootfs.img \ +//! -serial stdio \ +//! -acpitable file=ssdt.aml +//! ``` +//! +//! Requirements: +//! - The `rust_driver_platform` must be present either: +//! - built directly into the kernel (`bzImage`), or +//! - available as a `.ko` file and loadable from `rootfs.img` +//! +//! 4. **Verify it worked** by checking `dmesg`: +//! +//! ``` +//! rust_driver_platform LNUXBEEF:00: Probed with info: '0'. +//! ``` +//! + +use kernel::{ + acpi, + c_str, + device, + devres::Devres, + i2c, + of, + platform, + prelude::*, + sync::aref::ARef, // +}; + +#[pin_data] +struct SampleDriver { + parent_dev: ARef<platform::Device>, + #[pin] + _reg: Devres<i2c::Registration>, +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <SampleDriver as platform::Driver>::IdInfo, + [(of::DeviceId::new(c_str!("test,rust-device")), ())] +); + +kernel::acpi_device_table!( + ACPI_TABLE, + MODULE_ACPI_TABLE, + <SampleDriver as platform::Driver>::IdInfo, + [(acpi::DeviceId::new(c_str!("LNUXBEEF")), ())] +); + +const SAMPLE_I2C_CLIENT_ADDR: u16 = 0x30; +const SAMPLE_I2C_ADAPTER_INDEX: i32 = 0; +const BOARD_INFO: i2c::I2cBoardInfo = + i2c::I2cBoardInfo::new(c_str!("rust_driver_i2c"), SAMPLE_I2C_CLIENT_ADDR); + +impl platform::Driver for SampleDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE); + + fn probe( + pdev: &platform::Device<device::Core>, + _info: Option<&Self::IdInfo>, + ) -> impl PinInit<Self, Error> { + dev_info!( + pdev.as_ref(), + "Probe Rust I2C Client registration sample.\n" + ); + + kernel::try_pin_init!( Self { + parent_dev: pdev.into(), + + _reg <- { + let adapter = i2c::I2cAdapter::get(SAMPLE_I2C_ADAPTER_INDEX)?; + + i2c::Registration::new(&adapter, &BOARD_INFO, pdev.as_ref()) + } + }) + } + + fn unbind(pdev: &platform::Device<device::Core>, _this: Pin<&Self>) { + dev_info!( + pdev.as_ref(), + "Unbind Rust I2C Client registration sample.\n" + ); + } +} + +kernel::module_platform_driver! { + type: SampleDriver, + name: "rust_device_i2c", + authors: ["Danilo Krummrich", "Igor Korotin"], + description: "Rust I2C client registration", + license: "GPL v2", +} diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs index dc05f4bbe27e..8eb9583571d7 100644 --- a/samples/rust/rust_minimal.rs +++ b/samples/rust/rust_minimal.rs @@ -7,24 +7,34 @@ use kernel::prelude::*; module! { type: RustMinimal, name: "rust_minimal", - author: "Rust for Linux Contributors", + authors: ["Rust for Linux Contributors"], description: "Rust minimal sample", license: "GPL", + params: { + test_parameter: i64 { + default: 1, + description: "This parameter has a default of 1", + }, + }, } struct RustMinimal { - numbers: Vec<i32>, + numbers: KVec<i32>, } impl kernel::Module for RustMinimal { fn init(_module: &'static ThisModule) -> Result<Self> { pr_info!("Rust minimal sample (init)\n"); pr_info!("Am I built-in? {}\n", !cfg!(MODULE)); - - let mut numbers = Vec::new(); - numbers.try_push(72)?; - numbers.try_push(108)?; - numbers.try_push(200)?; + pr_info!( + "test_parameter: {}\n", + *module_parameters::test_parameter.value() + ); + + let mut numbers = KVec::new(); + numbers.push(72, GFP_KERNEL)?; + numbers.push(108, GFP_KERNEL)?; + numbers.push(200, GFP_KERNEL)?; Ok(RustMinimal { numbers }) } diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs new file mode 100644 index 000000000000..d69bc33dbd99 --- /dev/null +++ b/samples/rust/rust_misc_device.rs @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Rust misc device sample. +//! +//! Below is an example userspace C program that exercises this sample's functionality. +//! +//! ```c +//! #include <stdio.h> +//! #include <stdlib.h> +//! #include <errno.h> +//! #include <fcntl.h> +//! #include <unistd.h> +//! #include <sys/ioctl.h> +//! +//! #define RUST_MISC_DEV_FAIL _IO('|', 0) +//! #define RUST_MISC_DEV_HELLO _IO('|', 0x80) +//! #define RUST_MISC_DEV_GET_VALUE _IOR('|', 0x81, int) +//! #define RUST_MISC_DEV_SET_VALUE _IOW('|', 0x82, int) +//! +//! int main() { +//! int value, new_value; +//! int fd, ret; +//! +//! // Open the device file +//! printf("Opening /dev/rust-misc-device for reading and writing\n"); +//! fd = open("/dev/rust-misc-device", O_RDWR); +//! if (fd < 0) { +//! perror("open"); +//! return errno; +//! } +//! +//! // Make call into driver to say "hello" +//! printf("Calling Hello\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_HELLO, NULL); +//! if (ret < 0) { +//! perror("ioctl: Failed to call into Hello"); +//! close(fd); +//! return errno; +//! } +//! +//! // Get initial value +//! printf("Fetching initial value\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &value); +//! if (ret < 0) { +//! perror("ioctl: Failed to fetch the initial value"); +//! close(fd); +//! return errno; +//! } +//! +//! value++; +//! +//! // Set value to something different +//! printf("Submitting new value (%d)\n", value); +//! ret = ioctl(fd, RUST_MISC_DEV_SET_VALUE, &value); +//! if (ret < 0) { +//! perror("ioctl: Failed to submit new value"); +//! close(fd); +//! return errno; +//! } +//! +//! // Ensure new value was applied +//! printf("Fetching new value\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &new_value); +//! if (ret < 0) { +//! perror("ioctl: Failed to fetch the new value"); +//! close(fd); +//! return errno; +//! } +//! +//! if (value != new_value) { +//! printf("Failed: Committed and retrieved values are different (%d - %d)\n", value, new_value); +//! close(fd); +//! return -1; +//! } +//! +//! // Call the unsuccessful ioctl +//! printf("Attempting to call in to an non-existent IOCTL\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_FAIL, NULL); +//! if (ret < 0) { +//! perror("ioctl: Succeeded to fail - this was expected"); +//! } else { +//! printf("ioctl: Failed to fail\n"); +//! close(fd); +//! return -1; +//! } +//! +//! // Close the device file +//! printf("Closing /dev/rust-misc-device\n"); +//! close(fd); +//! +//! printf("Success\n"); +//! return 0; +//! } +//! ``` + +use core::pin::Pin; + +use kernel::{ + c_str, + device::Device, + fs::{File, Kiocb}, + ioctl::{_IO, _IOC_SIZE, _IOR, _IOW}, + iov::{IovIterDest, IovIterSource}, + miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration}, + new_mutex, + prelude::*, + sync::{aref::ARef, Mutex}, + uaccess::{UserSlice, UserSliceReader, UserSliceWriter}, +}; + +const RUST_MISC_DEV_HELLO: u32 = _IO('|' as u32, 0x80); +const RUST_MISC_DEV_GET_VALUE: u32 = _IOR::<i32>('|' as u32, 0x81); +const RUST_MISC_DEV_SET_VALUE: u32 = _IOW::<i32>('|' as u32, 0x82); + +module! { + type: RustMiscDeviceModule, + name: "rust_misc_device", + authors: ["Lee Jones"], + description: "Rust misc device sample", + license: "GPL", +} + +#[pin_data] +struct RustMiscDeviceModule { + #[pin] + _miscdev: MiscDeviceRegistration<RustMiscDevice>, +} + +impl kernel::InPlaceModule for RustMiscDeviceModule { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Initialising Rust Misc Device Sample\n"); + + let options = MiscDeviceOptions { + name: c_str!("rust-misc-device"), + }; + + try_pin_init!(Self { + _miscdev <- MiscDeviceRegistration::register(options), + }) + } +} + +struct Inner { + value: i32, + buffer: KVVec<u8>, +} + +#[pin_data(PinnedDrop)] +struct RustMiscDevice { + #[pin] + inner: Mutex<Inner>, + dev: ARef<Device>, +} + +#[vtable] +impl MiscDevice for RustMiscDevice { + type Ptr = Pin<KBox<Self>>; + + fn open(_file: &File, misc: &MiscDeviceRegistration<Self>) -> Result<Pin<KBox<Self>>> { + let dev = ARef::from(misc.device()); + + dev_info!(dev, "Opening Rust Misc Device Sample\n"); + + KBox::try_pin_init( + try_pin_init! { + RustMiscDevice { + inner <- new_mutex!(Inner { + value: 0_i32, + buffer: KVVec::new(), + }), + dev: dev, + } + }, + GFP_KERNEL, + ) + } + + fn read_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterDest<'_>) -> Result<usize> { + let me = kiocb.file(); + dev_info!(me.dev, "Reading from Rust Misc Device Sample\n"); + + let inner = me.inner.lock(); + // Read the buffer contents, taking the file position into account. + let read = iov.simple_read_from_buffer(kiocb.ki_pos_mut(), &inner.buffer)?; + + Ok(read) + } + + fn write_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterSource<'_>) -> Result<usize> { + let me = kiocb.file(); + dev_info!(me.dev, "Writing to Rust Misc Device Sample\n"); + + let mut inner = me.inner.lock(); + + // Replace buffer contents. + inner.buffer.clear(); + let len = iov.copy_from_iter_vec(&mut inner.buffer, GFP_KERNEL)?; + + // Set position to zero so that future `read` calls will see the new contents. + *kiocb.ki_pos_mut() = 0; + + Ok(len) + } + + fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result<isize> { + dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n"); + + // Treat the ioctl argument as a user pointer. + let arg = UserPtr::from_addr(arg); + let size = _IOC_SIZE(cmd); + + match cmd { + RUST_MISC_DEV_GET_VALUE => me.get_value(UserSlice::new(arg, size).writer())?, + RUST_MISC_DEV_SET_VALUE => me.set_value(UserSlice::new(arg, size).reader())?, + RUST_MISC_DEV_HELLO => me.hello()?, + _ => { + dev_err!(me.dev, "-> IOCTL not recognised: {}\n", cmd); + return Err(ENOTTY); + } + }; + + Ok(0) + } +} + +#[pinned_drop] +impl PinnedDrop for RustMiscDevice { + fn drop(self: Pin<&mut Self>) { + dev_info!(self.dev, "Exiting the Rust Misc Device Sample\n"); + } +} + +impl RustMiscDevice { + fn set_value(&self, mut reader: UserSliceReader) -> Result<isize> { + let new_value = reader.read::<i32>()?; + let mut guard = self.inner.lock(); + + dev_info!( + self.dev, + "-> Copying data from userspace (value: {})\n", + new_value + ); + + guard.value = new_value; + Ok(0) + } + + fn get_value(&self, mut writer: UserSliceWriter) -> Result<isize> { + let guard = self.inner.lock(); + let value = guard.value; + + // Free-up the lock and use our locally cached instance from here + drop(guard); + + dev_info!( + self.dev, + "-> Copying data to userspace (value: {})\n", + &value + ); + + writer.write::<i32>(&value)?; + Ok(0) + } + + fn hello(&self) -> Result<isize> { + dev_info!(self.dev, "-> Hello from the Rust Misc Device\n"); + + Ok(0) + } +} diff --git a/samples/rust/rust_print.rs b/samples/rust/rust_print.rs deleted file mode 100644 index 8b39d9cef6d1..000000000000 --- a/samples/rust/rust_print.rs +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -//! Rust printing macros sample. - -use kernel::pr_cont; -use kernel::prelude::*; - -module! { - type: RustPrint, - name: "rust_print", - author: "Rust for Linux Contributors", - description: "Rust printing macros sample", - license: "GPL", -} - -struct RustPrint; - -impl kernel::Module for RustPrint { - fn init(_module: &'static ThisModule) -> Result<Self> { - pr_info!("Rust printing macros sample (init)\n"); - - pr_emerg!("Emergency message (level 0) without args\n"); - pr_alert!("Alert message (level 1) without args\n"); - pr_crit!("Critical message (level 2) without args\n"); - pr_err!("Error message (level 3) without args\n"); - pr_warn!("Warning message (level 4) without args\n"); - pr_notice!("Notice message (level 5) without args\n"); - pr_info!("Info message (level 6) without args\n"); - - pr_info!("A line that"); - pr_cont!(" is continued"); - pr_cont!(" without args\n"); - - pr_emerg!("{} message (level {}) with args\n", "Emergency", 0); - pr_alert!("{} message (level {}) with args\n", "Alert", 1); - pr_crit!("{} message (level {}) with args\n", "Critical", 2); - pr_err!("{} message (level {}) with args\n", "Error", 3); - pr_warn!("{} message (level {}) with args\n", "Warning", 4); - pr_notice!("{} message (level {}) with args\n", "Notice", 5); - pr_info!("{} message (level {}) with args\n", "Info", 6); - - pr_info!("A {} that", "line"); - pr_cont!(" is {}", "continued"); - pr_cont!(" with {}\n", "args"); - - Ok(RustPrint) - } -} - -impl Drop for RustPrint { - fn drop(&mut self) { - pr_info!("Rust printing macros sample (exit)\n"); - } -} diff --git a/samples/rust/rust_print_events.c b/samples/rust/rust_print_events.c new file mode 100644 index 000000000000..a9169ff0edf1 --- /dev/null +++ b/samples/rust/rust_print_events.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Google LLC + */ + +#define CREATE_TRACE_POINTS +#define CREATE_RUST_TRACE_POINTS +#include <trace/events/rust_sample.h> diff --git a/samples/rust/rust_print_main.rs b/samples/rust/rust_print_main.rs new file mode 100644 index 000000000000..4095c72afeab --- /dev/null +++ b/samples/rust/rust_print_main.rs @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust printing macros sample. + +use kernel::pr_cont; +use kernel::prelude::*; + +module! { + type: RustPrint, + name: "rust_print", + authors: ["Rust for Linux Contributors"], + description: "Rust printing macros sample", + license: "GPL", +} + +struct RustPrint; + +#[expect(clippy::disallowed_macros)] +fn arc_print() -> Result { + use kernel::sync::*; + + let a = Arc::new(1, GFP_KERNEL)?; + let b = UniqueArc::new("hello, world", GFP_KERNEL)?; + + // Prints the value of data in `a`. + pr_info!("{}", a); + + // Uses ":?" to print debug fmt of `b`. + pr_info!("{:?}", b); + + let a: Arc<&str> = b.into(); + let c = a.clone(); + + // Uses `dbg` to print, will move `c` (for temporary debugging purposes). + dbg!(c); + + { + // `Arc` can be used to delegate dynamic dispatch and the following is an example. + // Both `i32` and `&str` implement `Display`. This enables us to express a unified + // behaviour, contract or protocol on both `i32` and `&str` into a single `Arc` of + // type `Arc<dyn Display>`. + + use kernel::fmt::Display; + fn arc_dyn_print(arc: &Arc<dyn Display>) { + pr_info!("Arc<dyn Display> says {arc}"); + } + + let a_i32_display: Arc<dyn Display> = Arc::new(42i32, GFP_KERNEL)?; + let a_str_display: Arc<dyn Display> = a.clone(); + + arc_dyn_print(&a_i32_display); + arc_dyn_print(&a_str_display); + } + + // Pretty-prints the debug formatting with lower-case hexadecimal integers. + pr_info!("{:#x?}", a); + + Ok(()) +} + +impl kernel::Module for RustPrint { + fn init(_module: &'static ThisModule) -> Result<Self> { + pr_info!("Rust printing macros sample (init)\n"); + + pr_emerg!("Emergency message (level 0) without args\n"); + pr_alert!("Alert message (level 1) without args\n"); + pr_crit!("Critical message (level 2) without args\n"); + pr_err!("Error message (level 3) without args\n"); + pr_warn!("Warning message (level 4) without args\n"); + pr_notice!("Notice message (level 5) without args\n"); + pr_info!("Info message (level 6) without args\n"); + + pr_info!("A line that"); + pr_cont!(" is continued"); + pr_cont!(" without args\n"); + + pr_emerg!("{} message (level {}) with args\n", "Emergency", 0); + pr_alert!("{} message (level {}) with args\n", "Alert", 1); + pr_crit!("{} message (level {}) with args\n", "Critical", 2); + pr_err!("{} message (level {}) with args\n", "Error", 3); + pr_warn!("{} message (level {}) with args\n", "Warning", 4); + pr_notice!("{} message (level {}) with args\n", "Notice", 5); + pr_info!("{} message (level {}) with args\n", "Info", 6); + + pr_info!("A {} that", "line"); + pr_cont!(" is {}", "continued"); + pr_cont!(" with {}\n", "args"); + + arc_print()?; + + trace::trace_rust_sample_loaded(42); + + Ok(RustPrint) + } +} + +impl Drop for RustPrint { + fn drop(&mut self) { + pr_info!("Rust printing macros sample (exit)\n"); + } +} + +mod trace { + use kernel::ffi::c_int; + + kernel::declare_trace! { + /// # Safety + /// + /// Always safe to call. + unsafe fn rust_sample_loaded(magic: c_int); + } + + pub(crate) fn trace_rust_sample_loaded(magic: i32) { + // SAFETY: Always safe to call. + unsafe { rust_sample_loaded(magic as c_int) } + } +} diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c index 20291ec6489f..a23fec357b5d 100644 --- a/samples/seccomp/user-trap.c +++ b/samples/seccomp/user-trap.c @@ -33,6 +33,7 @@ static int send_fd(int sock, int fd) { struct msghdr msg = {}; struct cmsghdr *cmsg; + int *fd_ptr; char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c'; struct iovec io = { .iov_base = &c, @@ -47,7 +48,8 @@ static int send_fd(int sock, int fd) cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - *((int *)CMSG_DATA(cmsg)) = fd; + fd_ptr = (int *)CMSG_DATA(cmsg); + *fd_ptr = fd; msg.msg_controllen = cmsg->cmsg_len; if (sendmsg(sock, &msg, 0) < 0) { @@ -62,6 +64,7 @@ static int recv_fd(int sock) { struct msghdr msg = {}; struct cmsghdr *cmsg; + int *fd_ptr; char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c'; struct iovec io = { .iov_base = &c, @@ -79,8 +82,9 @@ static int recv_fd(int sock) } cmsg = CMSG_FIRSTHDR(&msg); + fd_ptr = (int *)CMSG_DATA(cmsg); - return *((int *)CMSG_DATA(cmsg)); + return *fd_ptr; } static int user_trap_syscall(int nr, unsigned int flags) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 1c6b843b8c4e..1a05fc153353 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -136,10 +136,11 @@ * * To assign a string, use the helper macro __assign_str(). * - * __assign_str(foo, bar); + * __assign_str(foo); * - * In most cases, the __assign_str() macro will take the same - * parameters as the __string() macro had to declare the string. + * The __string() macro saves off the string that is passed into + * the second parameter, and the __assign_str() will store than + * saved string into the "foo" field. * * __vstring: This is similar to __string() but instead of taking a * dynamic length, it takes a variable list va_list 'va' variable. @@ -163,8 +164,7 @@ * __string(). * * __string_len: This is a helper to a __dynamic_array, but it understands - * that the array has characters in it, and with the combined - * use of __assign_str_len(), it will allocate 'len' + 1 bytes + * that the array has characters in it, it will allocate 'len' + 1 bytes * in the ring buffer and add a '\0' to the string. This is * useful if the string being saved has no terminating '\0' byte. * It requires that the length of the string is known as it acts @@ -174,9 +174,11 @@ * * __string_len(foo, bar, len) * - * To assign this string, use the helper macro __assign_str_len(). + * To assign this string, use the helper macro __assign_str(). + * The length is saved via the __string_len() and is retrieved in + * __assign_str(). * - * __assign_str_len(foo, bar, len); + * __assign_str(foo); * * Then len + 1 is allocated to the ring buffer, and a nul terminating * byte is added. This is similar to: @@ -302,20 +304,23 @@ TRACE_EVENT(foo_bar, __bitmask( cpus, num_possible_cpus() ) __cpumask( cpum ) __vstring( vstr, fmt, va ) + __string_len( lstr, foo, bar / 2 < strlen(foo) ? bar / 2 : strlen(foo) ) ), TP_fast_assign( - strlcpy(__entry->foo, foo, 10); + strscpy(__entry->foo, foo, 10); __entry->bar = bar; memcpy(__get_dynamic_array(list), lst, __length_of(lst) * sizeof(int)); - __assign_str(str, string); + __assign_str(str); + __assign_str(lstr); __assign_vstr(vstr, fmt, va); __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); __assign_cpumask(cpum, cpumask_bits(mask)); ), - TP_printk("foo %s %d %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s [%d] %*pbl", + __entry->foo, __entry->bar, /* * Notice here the use of some helper functions. This includes: @@ -359,8 +364,17 @@ TRACE_EVENT(foo_bar, __print_array(__get_dynamic_array(list), __get_dynamic_array_len(list) / sizeof(int), sizeof(int)), - __get_str(str), __get_bitmask(cpus), __get_cpumask(cpum), - __get_str(vstr)) + +/* A shortcut is to use __print_dynamic_array for dynamic arrays */ + + __print_dynamic_array(list, sizeof(int)), + + __get_str(str), __get_str(lstr), + __get_bitmask(cpus), __get_cpumask(cpum), + __get_str(vstr), + __get_dynamic_array_len(cpus), + __get_dynamic_array_len(cpus), + __get_dynamic_array(cpus)) ); /* @@ -414,7 +428,7 @@ TRACE_EVENT_CONDITION(foo_bar_with_cond, ), TP_fast_assign( - __assign_str(foo, foo); + __assign_str(foo); __entry->bar = bar; ), @@ -455,7 +469,7 @@ TRACE_EVENT_FN(foo_bar_with_fn, ), TP_fast_assign( - __assign_str(foo, foo); + __assign_str(foo); __entry->bar = bar; ), @@ -502,7 +516,7 @@ DECLARE_EVENT_CLASS(foo_template, ), TP_fast_assign( - __assign_str(foo, foo); + __assign_str(foo); __entry->bar = bar; ), @@ -570,7 +584,7 @@ TRACE_EVENT(foo_rel_loc, ), TP_fast_assign( - __assign_rel_str(foo, foo); + __assign_rel_str(foo); __entry->bar = bar; __assign_rel_bitmask(bitmask, mask, BITS_PER_BYTE * sizeof(unsigned long)); diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c index b99d9ab7db85..dd409b704b35 100644 --- a/samples/trace_events/trace_custom_sched.c +++ b/samples/trace_events/trace_custom_sched.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) fmt #include <linux/trace_events.h> -#include <linux/version.h> #include <linux/module.h> #include <linux/sched.h> diff --git a/samples/tsm-mr/Makefile b/samples/tsm-mr/Makefile new file mode 100644 index 000000000000..587c3947b3a7 --- /dev/null +++ b/samples/tsm-mr/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_SAMPLE_TSM_MR) += tsm_mr_sample.o diff --git a/samples/tsm-mr/tsm_mr_sample.c b/samples/tsm-mr/tsm_mr_sample.c new file mode 100644 index 000000000000..a2c652148639 --- /dev/null +++ b/samples/tsm-mr/tsm_mr_sample.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024-2005 Intel Corporation. All rights reserved. */ + +#define pr_fmt(x) KBUILD_MODNAME ": " x + +#include <linux/module.h> +#include <linux/tsm-mr.h> +#include <linux/miscdevice.h> +#include <crypto/hash.h> + +static struct { + u8 static_mr[SHA384_DIGEST_SIZE]; + u8 config_mr[SHA512_DIGEST_SIZE]; + u8 rtmr0[SHA256_DIGEST_SIZE]; + u8 rtmr1[SHA384_DIGEST_SIZE]; + u8 report_digest[SHA512_DIGEST_SIZE]; +} sample_report = { + .static_mr = "static_mr", + .config_mr = "config_mr", + .rtmr0 = "rtmr0", + .rtmr1 = "rtmr1", +}; + +static int sample_report_refresh(const struct tsm_measurements *tm) +{ + struct crypto_shash *tfm; + int rc; + + tfm = crypto_alloc_shash(hash_algo_name[HASH_ALGO_SHA512], 0, 0); + if (IS_ERR(tfm)) { + pr_err("crypto_alloc_shash failed: %ld\n", PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + + rc = crypto_shash_tfm_digest(tfm, (u8 *)&sample_report, + offsetof(typeof(sample_report), + report_digest), + sample_report.report_digest); + crypto_free_shash(tfm); + if (rc) + pr_err("crypto_shash_tfm_digest failed: %d\n", rc); + return rc; +} + +static int sample_report_extend_mr(const struct tsm_measurements *tm, + const struct tsm_measurement_register *mr, + const u8 *data) +{ + SHASH_DESC_ON_STACK(desc, 0); + int rc; + + desc->tfm = crypto_alloc_shash(hash_algo_name[mr->mr_hash], 0, 0); + if (IS_ERR(desc->tfm)) { + pr_err("crypto_alloc_shash failed: %ld\n", PTR_ERR(desc->tfm)); + return PTR_ERR(desc->tfm); + } + + rc = crypto_shash_init(desc); + if (!rc) + rc = crypto_shash_update(desc, mr->mr_value, mr->mr_size); + if (!rc) + rc = crypto_shash_finup(desc, data, mr->mr_size, mr->mr_value); + crypto_free_shash(desc->tfm); + if (rc) + pr_err("SHA calculation failed: %d\n", rc); + return rc; +} + +#define MR_(mr, hash) .mr_value = &sample_report.mr, TSM_MR_(mr, hash) +static const struct tsm_measurement_register sample_mrs[] = { + /* static MR, read-only */ + { MR_(static_mr, SHA384) }, + /* config MR, read-only */ + { MR_(config_mr, SHA512) | TSM_MR_F_NOHASH }, + /* RTMR, direct extension prohibited */ + { MR_(rtmr0, SHA256) | TSM_MR_F_LIVE }, + /* RTMR, direct extension allowed */ + { MR_(rtmr1, SHA384) | TSM_MR_F_RTMR }, + /* RTMR, crypto agile, alaised to rtmr0 and rtmr1, respectively */ + { .mr_value = &sample_report.rtmr0, + TSM_MR_(rtmr_crypto_agile, SHA256) | TSM_MR_F_RTMR }, + { .mr_value = &sample_report.rtmr1, + TSM_MR_(rtmr_crypto_agile, SHA384) | TSM_MR_F_RTMR }, + /* sha512 digest of the whole structure */ + { MR_(report_digest, SHA512) | TSM_MR_F_LIVE }, +}; +#undef MR_ + +static struct tsm_measurements sample_tm = { + .mrs = sample_mrs, + .nr_mrs = ARRAY_SIZE(sample_mrs), + .refresh = sample_report_refresh, + .write = sample_report_extend_mr, +}; + +static const struct attribute_group *sample_groups[] = { + NULL, + NULL, +}; + +static struct miscdevice sample_misc_dev = { + .name = KBUILD_MODNAME, + .minor = MISC_DYNAMIC_MINOR, + .groups = sample_groups, +}; + +static int __init tsm_mr_sample_init(void) +{ + int rc; + + sample_groups[0] = tsm_mr_create_attribute_group(&sample_tm); + if (IS_ERR(sample_groups[0])) + return PTR_ERR(sample_groups[0]); + + rc = misc_register(&sample_misc_dev); + if (rc) + tsm_mr_free_attribute_group(sample_groups[0]); + return rc; +} + +static void __exit tsm_mr_sample_exit(void) +{ + misc_deregister(&sample_misc_dev); + tsm_mr_free_attribute_group(sample_groups[0]); +} + +module_init(tsm_mr_sample_init); +module_exit(tsm_mr_sample_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Sample module using tsm-mr to expose emulated MRs"); diff --git a/samples/user_events/example.c b/samples/user_events/example.c index d06dc24156ec..28165a096697 100644 --- a/samples/user_events/example.c +++ b/samples/user_events/example.c @@ -9,51 +9,28 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <sys/uio.h> #include <fcntl.h> #include <stdio.h> #include <unistd.h> -#include <asm/bitsperlong.h> -#include <endian.h> #include <linux/user_events.h> -#if __BITS_PER_LONG == 64 -#define endian_swap(x) htole64(x) -#else -#define endian_swap(x) htole32(x) -#endif +const char *data_file = "/sys/kernel/tracing/user_events_data"; +int enabled = 0; -/* Assumes debugfs is mounted */ -const char *data_file = "/sys/kernel/debug/tracing/user_events_data"; -const char *status_file = "/sys/kernel/debug/tracing/user_events_status"; - -static int event_status(long **status) -{ - int fd = open(status_file, O_RDONLY); - - *status = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, - MAP_SHARED, fd, 0); - - close(fd); - - if (*status == MAP_FAILED) - return -1; - - return 0; -} - -static int event_reg(int fd, const char *command, long *index, long *mask, - int *write) +static int event_reg(int fd, const char *command, int *write, int *enabled) { struct user_reg reg = {0}; reg.size = sizeof(reg); + reg.enable_bit = 31; + reg.enable_size = sizeof(*enabled); + reg.enable_addr = (__u64)enabled; reg.name_args = (__u64)command; if (ioctl(fd, DIAG_IOCSREG, ®) == -1) return -1; - *index = reg.status_bit / __BITS_PER_LONG; - *mask = endian_swap(1L << (reg.status_bit % __BITS_PER_LONG)); *write = reg.write_index; return 0; @@ -62,17 +39,12 @@ static int event_reg(int fd, const char *command, long *index, long *mask, int main(int argc, char **argv) { int data_fd, write; - long index, mask; - long *status_page; struct iovec io[2]; __u32 count = 0; - if (event_status(&status_page) == -1) - return errno; - data_fd = open(data_file, O_RDWR); - if (event_reg(data_fd, "test u32 count", &index, &mask, &write) == -1) + if (event_reg(data_fd, "test u32 count", &write, &enabled) == -1) return errno; /* Setup iovec */ @@ -80,13 +52,12 @@ int main(int argc, char **argv) io[0].iov_len = sizeof(write); io[1].iov_base = &count; io[1].iov_len = sizeof(count); - ask: printf("Press enter to check status...\n"); getchar(); /* Check if anyone is listening */ - if (status_page[index] & mask) { + if (enabled) { /* Yep, trace out our data */ writev(data_fd, (const struct iovec *)io, 2); diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c index a61f94db18d9..69925d30329e 100644 --- a/samples/v4l/v4l2-pci-skeleton.c +++ b/samples/v4l/v4l2-pci-skeleton.c @@ -155,6 +155,7 @@ static int queue_setup(struct vb2_queue *vq, unsigned int sizes[], struct device *alloc_devs[]) { struct skeleton *skel = vb2_get_drv_priv(vq); + unsigned int q_num_bufs = vb2_get_num_buffers(vq); skel->field = skel->format.field; if (skel->field == V4L2_FIELD_ALTERNATE) { @@ -167,8 +168,8 @@ static int queue_setup(struct vb2_queue *vq, skel->field = V4L2_FIELD_TOP; } - if (vq->num_buffers + *nbuffers < 3) - *nbuffers = 3 - vq->num_buffers; + if (q_num_bufs + *nbuffers < 3) + *nbuffers = 3 - q_num_bufs; if (*nplanes) return sizes[0] < skel->format.sizeimage ? -EINVAL : 0; @@ -268,9 +269,7 @@ static void stop_streaming(struct vb2_queue *vq) } /* - * The vb2 queue ops. Note that since q->lock is set we can use the standard - * vb2_ops_wait_prepare/finish helper functions. If q->lock would be NULL, - * then this driver would have to provide these ops. + * The vb2 queue ops. */ static const struct vb2_ops skel_qops = { .queue_setup = queue_setup, @@ -278,8 +277,6 @@ static const struct vb2_ops skel_qops = { .buf_queue = buffer_queue, .start_streaming = start_streaming, .stop_streaming = stop_streaming, - .wait_prepare = vb2_ops_wait_prepare, - .wait_finish = vb2_ops_wait_finish, }; /* @@ -291,8 +288,8 @@ static int skeleton_querycap(struct file *file, void *priv, { struct skeleton *skel = video_drvdata(file); - strlcpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver)); - strlcpy(cap->card, "V4L2 PCI Skeleton", sizeof(cap->card)); + strscpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver)); + strscpy(cap->card, "V4L2 PCI Skeleton", sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", pci_name(skel->pdev)); return 0; @@ -473,7 +470,7 @@ static int skeleton_querystd(struct file *file, void *priv, v4l2_std_id *std) return 0; } -static int skeleton_s_dv_timings(struct file *file, void *_fh, +static int skeleton_s_dv_timings(struct file *file, void *priv, struct v4l2_dv_timings *timings) { struct skeleton *skel = video_drvdata(file); @@ -512,7 +509,7 @@ static int skeleton_s_dv_timings(struct file *file, void *_fh, return 0; } -static int skeleton_g_dv_timings(struct file *file, void *_fh, +static int skeleton_g_dv_timings(struct file *file, void *priv, struct v4l2_dv_timings *timings) { struct skeleton *skel = video_drvdata(file); @@ -525,7 +522,7 @@ static int skeleton_g_dv_timings(struct file *file, void *_fh, return 0; } -static int skeleton_enum_dv_timings(struct file *file, void *_fh, +static int skeleton_enum_dv_timings(struct file *file, void *priv, struct v4l2_enum_dv_timings *timings) { struct skeleton *skel = video_drvdata(file); @@ -547,7 +544,7 @@ static int skeleton_enum_dv_timings(struct file *file, void *_fh, * can lock but that the DMA engine it is connected to cannot handle * pixelclocks above a certain frequency), then -ERANGE is returned. */ -static int skeleton_query_dv_timings(struct file *file, void *_fh, +static int skeleton_query_dv_timings(struct file *file, void *priv, struct v4l2_dv_timings *timings) { struct skeleton *skel = video_drvdata(file); @@ -576,7 +573,7 @@ static int skeleton_query_dv_timings(struct file *file, void *_fh, return 0; } -static int skeleton_dv_timings_cap(struct file *file, void *fh, +static int skeleton_dv_timings_cap(struct file *file, void *priv, struct v4l2_dv_timings_cap *cap) { struct skeleton *skel = video_drvdata(file); @@ -597,11 +594,11 @@ static int skeleton_enum_input(struct file *file, void *priv, i->type = V4L2_INPUT_TYPE_CAMERA; if (i->index == 0) { i->std = SKEL_TVNORMS; - strlcpy(i->name, "S-Video", sizeof(i->name)); + strscpy(i->name, "S-Video", sizeof(i->name)); i->capabilities = V4L2_IN_CAP_STD; } else { i->std = 0; - strlcpy(i->name, "HDMI", sizeof(i->name)); + strscpy(i->name, "HDMI", sizeof(i->name)); i->capabilities = V4L2_IN_CAP_DV_TIMINGS; } return 0; @@ -820,7 +817,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * available before it can be started. The start_streaming() op * won't be called until at least this many buffers are queued up. */ - q->min_buffers_needed = 2; + q->min_queued_buffers = 2; /* * The serialization lock for the streaming ioctls. This is the same * as the main serialization lock, but if some of the non-streaming @@ -845,7 +842,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Initialize the video_device structure */ vdev = &skel->vdev; - strlcpy(vdev->name, KBUILD_MODNAME, sizeof(vdev->name)); + strscpy(vdev->name, KBUILD_MODNAME, sizeof(vdev->name)); /* * There is nothing to clean up, so release is set to an empty release * function. The release callback must be non-NULL. diff --git a/samples/vfio-mdev/README.rst b/samples/vfio-mdev/README.rst new file mode 100644 index 000000000000..b52eb37739c0 --- /dev/null +++ b/samples/vfio-mdev/README.rst @@ -0,0 +1,100 @@ +Using the mtty vfio-mdev sample code +==================================== + +mtty is a sample vfio-mdev driver that demonstrates how to use the mediated +device framework. + +The sample driver creates an mdev device that simulates a serial port over a PCI +card. + +1. Build and load the mtty.ko module. + + This step creates a dummy device, /sys/devices/virtual/mtty/mtty/ + + Files in this device directory in sysfs are similar to the following:: + + # tree /sys/devices/virtual/mtty/mtty/ + /sys/devices/virtual/mtty/mtty/ + |-- mdev_supported_types + | |-- mtty-1 + | | |-- available_instances + | | |-- create + | | |-- device_api + | | |-- devices + | | `-- name + | `-- mtty-2 + | |-- available_instances + | |-- create + | |-- device_api + | |-- devices + | `-- name + |-- mtty_dev + | `-- sample_mtty_dev + |-- power + | |-- autosuspend_delay_ms + | |-- control + | |-- runtime_active_time + | |-- runtime_status + | `-- runtime_suspended_time + |-- subsystem -> ../../../../class/mtty + `-- uevent + +2. Create a mediated device by using the dummy device that you created in the + previous step:: + + # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \ + /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create + +3. Add parameters to qemu-kvm:: + + -device vfio-pci,\ + sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 + +4. Boot the VM. + + In the Linux guest VM, with no hardware on the host, the device appears + as follows:: + + # lspci -s 00:05.0 -xxvv + 00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550]) + Subsystem: Device 4348:3253 + Physical Slot: 5 + Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- + Stepping- SERR- FastB2B- DisINTx- + Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- + <TAbort- <MAbort- >SERR- <PERR- INTx- + Interrupt: pin A routed to IRQ 10 + Region 0: I/O ports at c150 [size=8] + Region 1: I/O ports at c158 [size=8] + Kernel driver in use: serial + 00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00 + 10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00 + 20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32 + 30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00 + + In the Linux guest VM, dmesg output for the device is as follows: + + serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10 + 0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A + 0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A + + +5. In the Linux guest VM, check the serial ports:: + + # setserial -g /dev/ttyS* + /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4 + /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10 + /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10 + +6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or + /dev/ttyS2 with hardware flow control disabled. + +7. Type data on the minicom terminal or send data to the terminal emulation + program and read the data. + + Data is loop backed from hosts mtty driver. + +8. Destroy the mediated device that you created:: + + # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove + diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index e54eb752e1ba..64ea19253ee3 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -88,6 +88,7 @@ #define STORE_LE32(addr, val) (*(u32 *)addr = val) +MODULE_DESCRIPTION("Mediated virtual PCI display host device driver"); MODULE_LICENSE("GPL v2"); static int max_mbytes = 256; @@ -133,18 +134,15 @@ static struct mdev_type *mbochs_mdev_types[] = { }; static dev_t mbochs_devt; -static struct class *mbochs_class; +static const struct class mbochs_class = { + .name = MBOCHS_CLASS_NAME, +}; static struct cdev mbochs_cdev; static struct device mbochs_dev; static struct mdev_parent mbochs_parent; static atomic_t mbochs_avail_mbytes; static const struct vfio_device_ops mbochs_dev_ops; -struct vfio_region_info_ext { - struct vfio_region_info base; - struct vfio_region_info_cap_type type; -}; - struct mbochs_mode { u32 drm_format; u32 bytepp; @@ -1030,10 +1028,12 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf) return 0; } -static int mbochs_get_region_info(struct mdev_state *mdev_state, - struct vfio_region_info_ext *ext) +static int mbochs_ioctl_get_region_info(struct vfio_device *vdev, + struct vfio_region_info *region_info, + struct vfio_info_cap *caps) { - struct vfio_region_info *region_info = &ext->base; + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); if (region_info->index >= MBOCHS_NUM_REGIONS) return -EINVAL; @@ -1058,20 +1058,23 @@ static int mbochs_get_region_info(struct mdev_state *mdev_state, region_info->flags = (VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE); break; - case MBOCHS_EDID_REGION_INDEX: - ext->base.argsz = sizeof(*ext); - ext->base.offset = MBOCHS_EDID_OFFSET; - ext->base.size = MBOCHS_EDID_SIZE; - ext->base.flags = (VFIO_REGION_INFO_FLAG_READ | - VFIO_REGION_INFO_FLAG_WRITE | - VFIO_REGION_INFO_FLAG_CAPS); - ext->base.cap_offset = offsetof(typeof(*ext), type); - ext->type.header.id = VFIO_REGION_INFO_CAP_TYPE; - ext->type.header.version = 1; - ext->type.header.next = 0; - ext->type.type = VFIO_REGION_TYPE_GFX; - ext->type.subtype = VFIO_REGION_SUBTYPE_GFX_EDID; - break; + case MBOCHS_EDID_REGION_INDEX: { + struct vfio_region_info_cap_type cap_type = { + .header.id = VFIO_REGION_INFO_CAP_TYPE, + .header.version = 1, + .type = VFIO_REGION_TYPE_GFX, + .subtype = VFIO_REGION_SUBTYPE_GFX_EDID, + }; + + region_info->offset = MBOCHS_EDID_OFFSET; + region_info->size = MBOCHS_EDID_SIZE; + region_info->flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_CAPS); + + return vfio_info_add_capability(caps, &cap_type.header, + sizeof(cap_type)); + } default: region_info->size = 0; region_info->offset = 0; @@ -1188,7 +1191,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, struct mdev_state *mdev_state = container_of(vdev, struct mdev_state, vdev); int ret = 0; - unsigned long minsz, outsz; + unsigned long minsz; switch (cmd) { case VFIO_DEVICE_GET_INFO: @@ -1212,30 +1215,6 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, return 0; } - case VFIO_DEVICE_GET_REGION_INFO: - { - struct vfio_region_info_ext info; - - minsz = offsetofend(typeof(info), base.offset); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - - outsz = info.base.argsz; - if (outsz < minsz) - return -EINVAL; - if (outsz > sizeof(info)) - return -EINVAL; - - ret = mbochs_get_region_info(mdev_state, &info); - if (ret) - return ret; - - if (copy_to_user((void __user *)arg, &info, outsz)) - return -EFAULT; - - return 0; - } case VFIO_DEVICE_GET_IRQ_INFO: { @@ -1262,7 +1241,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, case VFIO_DEVICE_QUERY_GFX_PLANE: { - struct vfio_device_gfx_plane_info plane; + struct vfio_device_gfx_plane_info plane = {}; minsz = offsetofend(struct vfio_device_gfx_plane_info, region_index); @@ -1373,7 +1352,12 @@ static const struct vfio_device_ops mbochs_dev_ops = { .read = mbochs_read, .write = mbochs_write, .ioctl = mbochs_ioctl, + .get_region_info_caps = mbochs_ioctl_get_region_info, .mmap = mbochs_mmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static struct mdev_driver mbochs_driver = { @@ -1418,13 +1402,10 @@ static int __init mbochs_dev_init(void) if (ret) goto err_cdev; - mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME); - if (IS_ERR(mbochs_class)) { - pr_err("Error: failed to register mbochs_dev class\n"); - ret = PTR_ERR(mbochs_class); + ret = class_register(&mbochs_class); + if (ret) goto err_driver; - } - mbochs_dev.class = mbochs_class; + mbochs_dev.class = &mbochs_class; mbochs_dev.release = mbochs_device_release; dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME); @@ -1444,7 +1425,7 @@ err_device: device_del(&mbochs_dev); err_put: put_device(&mbochs_dev); - class_destroy(mbochs_class); + class_unregister(&mbochs_class); err_driver: mdev_unregister_driver(&mbochs_driver); err_cdev: @@ -1462,10 +1443,9 @@ static void __exit mbochs_dev_exit(void) mdev_unregister_driver(&mbochs_driver); cdev_del(&mbochs_cdev); unregister_chrdev_region(mbochs_devt, MINORMASK + 1); - class_destroy(mbochs_class); - mbochs_class = NULL; + class_unregister(&mbochs_class); } -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); module_init(mbochs_dev_init) module_exit(mbochs_dev_exit) diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c index 3c8001b9e407..149af7f598f8 100644 --- a/samples/vfio-mdev/mdpy-fb.c +++ b/samples/vfio-mdev/mdpy-fb.c @@ -88,11 +88,9 @@ static void mdpy_fb_destroy(struct fb_info *info) static const struct fb_ops mdpy_fb_ops = { .owner = THIS_MODULE, + FB_DEFAULT_IOMEM_OPS, .fb_destroy = mdpy_fb_destroy, .fb_setcolreg = mdpy_fb_setcolreg, - .fb_fillrect = cfb_fillrect, - .fb_copyarea = cfb_copyarea, - .fb_imageblit = cfb_imageblit, }; static int mdpy_fb_probe(struct pci_dev *pdev, @@ -162,7 +160,6 @@ static int mdpy_fb_probe(struct pci_dev *pdev, } info->fbops = &mdpy_fb_ops; - info->flags = FBINFO_DEFAULT; info->pseudo_palette = par->palette; ret = register_framebuffer(info); @@ -232,4 +229,5 @@ static int __init mdpy_fb_init(void) module_init(mdpy_fb_init); MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table); +MODULE_DESCRIPTION("Framebuffer driver for mdpy (mediated virtual pci display device)"); MODULE_LICENSE("GPL v2"); diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index e8400fdab71d..0759bd68edca 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -40,6 +40,7 @@ #define STORE_LE32(addr, val) (*(u32 *)addr = val) +MODULE_DESCRIPTION("Mediated virtual PCI display host device driver"); MODULE_LICENSE("GPL v2"); #define MDPY_TYPE_1 "vga" @@ -84,7 +85,9 @@ static struct mdev_type *mdpy_mdev_types[] = { }; static dev_t mdpy_devt; -static struct class *mdpy_class; +static const struct class mdpy_class = { + .name = MDPY_CLASS_NAME, +}; static struct cdev mdpy_cdev; static struct device mdpy_dev; static struct mdev_parent mdpy_parent; @@ -432,10 +435,13 @@ static int mdpy_mmap(struct vfio_device *vdev, struct vm_area_struct *vma) return remap_vmalloc_range(vma, mdev_state->memblk, 0); } -static int mdpy_get_region_info(struct mdev_state *mdev_state, - struct vfio_region_info *region_info, - u16 *cap_type_id, void **cap_type) +static int mdpy_ioctl_get_region_info(struct vfio_device *vdev, + struct vfio_region_info *region_info, + struct vfio_info_cap *caps) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + if (region_info->index >= VFIO_PCI_NUM_REGIONS && region_info->index != MDPY_DISPLAY_REGION) return -EINVAL; @@ -541,30 +547,6 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, return 0; } - case VFIO_DEVICE_GET_REGION_INFO: - { - struct vfio_region_info info; - u16 cap_type_id = 0; - void *cap_type = NULL; - - minsz = offsetofend(struct vfio_region_info, offset); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - - if (info.argsz < minsz) - return -EINVAL; - - ret = mdpy_get_region_info(mdev_state, &info, &cap_type_id, - &cap_type); - if (ret) - return ret; - - if (copy_to_user((void __user *)arg, &info, minsz)) - return -EFAULT; - - return 0; - } case VFIO_DEVICE_GET_IRQ_INFO: { @@ -591,7 +573,7 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, case VFIO_DEVICE_QUERY_GFX_PLANE: { - struct vfio_device_gfx_plane_info plane; + struct vfio_device_gfx_plane_info plane = {}; minsz = offsetofend(struct vfio_device_gfx_plane_info, region_index); @@ -662,7 +644,12 @@ static const struct vfio_device_ops mdpy_dev_ops = { .read = mdpy_read, .write = mdpy_write, .ioctl = mdpy_ioctl, + .get_region_info_caps = mdpy_ioctl_get_region_info, .mmap = mdpy_mmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, }; static struct mdev_driver mdpy_driver = { @@ -705,13 +692,10 @@ static int __init mdpy_dev_init(void) if (ret) goto err_cdev; - mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME); - if (IS_ERR(mdpy_class)) { - pr_err("Error: failed to register mdpy_dev class\n"); - ret = PTR_ERR(mdpy_class); + ret = class_register(&mdpy_class); + if (ret) goto err_driver; - } - mdpy_dev.class = mdpy_class; + mdpy_dev.class = &mdpy_class; mdpy_dev.release = mdpy_device_release; dev_set_name(&mdpy_dev, "%s", MDPY_NAME); @@ -731,7 +715,7 @@ err_device: device_del(&mdpy_dev); err_put: put_device(&mdpy_dev); - class_destroy(mdpy_class); + class_unregister(&mdpy_class); err_driver: mdev_unregister_driver(&mdpy_driver); err_cdev: @@ -749,8 +733,7 @@ static void __exit mdpy_dev_exit(void) mdev_unregister_driver(&mdpy_driver); cdev_del(&mdpy_cdev); unregister_chrdev_region(mdpy_devt, MINORMASK + 1); - class_destroy(mdpy_class); - mdpy_class = NULL; + class_unregister(&mdpy_class); } module_param_named(count, mdpy_driver.max_instances, int, 0444); diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index e887de672c52..bd92c38379b8 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -29,6 +29,8 @@ #include <linux/serial.h> #include <uapi/linux/serial_reg.h> #include <linux/eventfd.h> +#include <linux/anon_inodes.h> + /* * #defines */ @@ -124,10 +126,32 @@ struct serial_port { u8 intr_trigger_level; /* interrupt trigger level */ }; +struct mtty_data { + u64 magic; +#define MTTY_MAGIC 0x7e9d09898c3e2c4e /* Nothing clever, just random */ + u32 major_ver; +#define MTTY_MAJOR_VER 1 + u32 minor_ver; +#define MTTY_MINOR_VER 0 + u32 nr_ports; + u32 flags; + struct serial_port ports[2]; +}; + +struct mdev_state; + +struct mtty_migration_file { + struct file *filp; + struct mutex lock; + struct mdev_state *mdev_state; + struct mtty_data data; + ssize_t filled_size; + u8 disabled:1; +}; + /* State of each mdev device */ struct mdev_state { struct vfio_device vdev; - int irq_fd; struct eventfd_ctx *intx_evtfd; struct eventfd_ctx *msi_evtfd; int irq_index; @@ -141,6 +165,13 @@ struct mdev_state { struct mutex rxtx_lock; struct vfio_device_info dev_info; int nr_ports; + enum vfio_device_mig_state state; + struct mutex state_mutex; + struct mutex reset_mutex; + struct mtty_migration_file *saving_migf; + struct mtty_migration_file *resuming_migf; + u8 deferred_reset:1; + u8 intx_mask:1; }; static struct mtty_type { @@ -166,10 +197,6 @@ static const struct file_operations vd_fops = { static const struct vfio_device_ops mtty_dev_ops; -/* function prototypes */ - -static int mtty_trigger_interrupt(struct mdev_state *mdev_state); - /* Helper functions */ static void dump_buffer(u8 *buf, uint32_t count) @@ -186,6 +213,36 @@ static void dump_buffer(u8 *buf, uint32_t count) #endif } +static bool is_intx(struct mdev_state *mdev_state) +{ + return mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX; +} + +static bool is_msi(struct mdev_state *mdev_state) +{ + return mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX; +} + +static bool is_noirq(struct mdev_state *mdev_state) +{ + return !is_intx(mdev_state) && !is_msi(mdev_state); +} + +static void mtty_trigger_interrupt(struct mdev_state *mdev_state) +{ + lockdep_assert_held(&mdev_state->ops_lock); + + if (is_msi(mdev_state)) { + if (mdev_state->msi_evtfd) + eventfd_signal(mdev_state->msi_evtfd); + } else if (is_intx(mdev_state)) { + if (mdev_state->intx_evtfd && !mdev_state->intx_mask) { + eventfd_signal(mdev_state->intx_evtfd); + mdev_state->intx_mask = true; + } + } +} + static void mtty_create_config_space(struct mdev_state *mdev_state) { /* PCI dev ID */ @@ -567,7 +624,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, u8 lsr = 0; mutex_lock(&mdev_state->rxtx_lock); - /* atleast one char in FIFO */ + /* at least one char in FIFO */ if (mdev_state->s[index].rxtx.head != mdev_state->s[index].rxtx.tail) lsr |= UART_LSR_DR; @@ -717,6 +774,541 @@ accessfailed: return ret; } +static size_t mtty_data_size(struct mdev_state *mdev_state) +{ + return offsetof(struct mtty_data, ports) + + (mdev_state->nr_ports * sizeof(struct serial_port)); +} + +static void mtty_disable_file(struct mtty_migration_file *migf) +{ + mutex_lock(&migf->lock); + migf->disabled = true; + migf->filled_size = 0; + migf->filp->f_pos = 0; + mutex_unlock(&migf->lock); +} + +static void mtty_disable_files(struct mdev_state *mdev_state) +{ + if (mdev_state->saving_migf) { + mtty_disable_file(mdev_state->saving_migf); + fput(mdev_state->saving_migf->filp); + mdev_state->saving_migf = NULL; + } + + if (mdev_state->resuming_migf) { + mtty_disable_file(mdev_state->resuming_migf); + fput(mdev_state->resuming_migf->filp); + mdev_state->resuming_migf = NULL; + } +} + +static void mtty_state_mutex_unlock(struct mdev_state *mdev_state) +{ +again: + mutex_lock(&mdev_state->reset_mutex); + if (mdev_state->deferred_reset) { + mdev_state->deferred_reset = false; + mutex_unlock(&mdev_state->reset_mutex); + mdev_state->state = VFIO_DEVICE_STATE_RUNNING; + mtty_disable_files(mdev_state); + goto again; + } + mutex_unlock(&mdev_state->state_mutex); + mutex_unlock(&mdev_state->reset_mutex); +} + +static int mtty_release_migf(struct inode *inode, struct file *filp) +{ + struct mtty_migration_file *migf = filp->private_data; + + mtty_disable_file(migf); + mutex_destroy(&migf->lock); + kfree(migf); + + return 0; +} + +static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct mtty_migration_file *migf = filp->private_data; + struct mdev_state *mdev_state = migf->mdev_state; + loff_t *pos = &filp->f_pos; + struct vfio_precopy_info info = {}; + unsigned long minsz; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&mdev_state->state_mutex); + if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY && + mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + ret = -EINVAL; + goto unlock; + } + + mutex_lock(&migf->lock); + + if (migf->disabled) { + mutex_unlock(&migf->lock); + ret = -ENODEV; + goto unlock; + } + + if (*pos > migf->filled_size) { + mutex_unlock(&migf->lock); + ret = -EINVAL; + goto unlock; + } + + info.dirty_bytes = 0; + info.initial_bytes = migf->filled_size - *pos; + mutex_unlock(&migf->lock); + + ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; +unlock: + mtty_state_mutex_unlock(mdev_state); + return ret; +} + +static ssize_t mtty_save_read(struct file *filp, char __user *buf, + size_t len, loff_t *pos) +{ + struct mtty_migration_file *migf = filp->private_data; + ssize_t ret = 0; + + if (pos) + return -ESPIPE; + + pos = &filp->f_pos; + + mutex_lock(&migf->lock); + + dev_dbg(migf->mdev_state->vdev.dev, "%s ask %zu\n", __func__, len); + + if (migf->disabled) { + ret = -ENODEV; + goto out_unlock; + } + + if (*pos > migf->filled_size) { + ret = -EINVAL; + goto out_unlock; + } + + len = min_t(size_t, migf->filled_size - *pos, len); + if (len) { + if (copy_to_user(buf, (void *)&migf->data + *pos, len)) { + ret = -EFAULT; + goto out_unlock; + } + *pos += len; + ret = len; + } +out_unlock: + dev_dbg(migf->mdev_state->vdev.dev, "%s read %zu\n", __func__, ret); + mutex_unlock(&migf->lock); + return ret; +} + +static const struct file_operations mtty_save_fops = { + .owner = THIS_MODULE, + .read = mtty_save_read, + .unlocked_ioctl = mtty_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .release = mtty_release_migf, +}; + +static void mtty_save_state(struct mdev_state *mdev_state) +{ + struct mtty_migration_file *migf = mdev_state->saving_migf; + int i; + + mutex_lock(&migf->lock); + for (i = 0; i < mdev_state->nr_ports; i++) { + memcpy(&migf->data.ports[i], + &mdev_state->s[i], sizeof(struct serial_port)); + migf->filled_size += sizeof(struct serial_port); + } + dev_dbg(mdev_state->vdev.dev, + "%s filled to %zu\n", __func__, migf->filled_size); + mutex_unlock(&migf->lock); +} + +static int mtty_load_state(struct mdev_state *mdev_state) +{ + struct mtty_migration_file *migf = mdev_state->resuming_migf; + int i; + + mutex_lock(&migf->lock); + /* magic and version already tested by resume write fn */ + if (migf->filled_size < mtty_data_size(mdev_state)) { + dev_dbg(mdev_state->vdev.dev, "%s expected %zu, got %zu\n", + __func__, mtty_data_size(mdev_state), + migf->filled_size); + mutex_unlock(&migf->lock); + return -EINVAL; + } + + for (i = 0; i < mdev_state->nr_ports; i++) + memcpy(&mdev_state->s[i], + &migf->data.ports[i], sizeof(struct serial_port)); + + mutex_unlock(&migf->lock); + return 0; +} + +static struct mtty_migration_file * +mtty_save_device_data(struct mdev_state *mdev_state, + enum vfio_device_mig_state state) +{ + struct mtty_migration_file *migf = mdev_state->saving_migf; + struct mtty_migration_file *ret = NULL; + + if (migf) { + if (state == VFIO_DEVICE_STATE_STOP_COPY) + goto fill_data; + return ret; + } + + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("mtty_mig", &mtty_save_fops, + migf, O_RDONLY); + if (IS_ERR(migf->filp)) { + int rc = PTR_ERR(migf->filp); + + kfree(migf); + return ERR_PTR(rc); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + migf->mdev_state = mdev_state; + + migf->data.magic = MTTY_MAGIC; + migf->data.major_ver = MTTY_MAJOR_VER; + migf->data.minor_ver = MTTY_MINOR_VER; + migf->data.nr_ports = mdev_state->nr_ports; + + migf->filled_size = offsetof(struct mtty_data, ports); + + dev_dbg(mdev_state->vdev.dev, "%s filled header to %zu\n", + __func__, migf->filled_size); + + ret = mdev_state->saving_migf = migf; + +fill_data: + if (state == VFIO_DEVICE_STATE_STOP_COPY) + mtty_save_state(mdev_state); + + return ret; +} + +static ssize_t mtty_resume_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct mtty_migration_file *migf = filp->private_data; + struct mdev_state *mdev_state = migf->mdev_state; + loff_t requested_length; + ssize_t ret = 0; + + if (pos) + return -ESPIPE; + + pos = &filp->f_pos; + + if (*pos < 0 || + check_add_overflow((loff_t)len, *pos, &requested_length)) + return -EINVAL; + + if (requested_length > mtty_data_size(mdev_state)) + return -ENOMEM; + + mutex_lock(&migf->lock); + + if (migf->disabled) { + ret = -ENODEV; + goto out_unlock; + } + + if (copy_from_user((void *)&migf->data + *pos, buf, len)) { + ret = -EFAULT; + goto out_unlock; + } + + *pos += len; + ret = len; + + dev_dbg(migf->mdev_state->vdev.dev, "%s received %zu, total %zu\n", + __func__, len, migf->filled_size + len); + + if (migf->filled_size < offsetof(struct mtty_data, ports) && + migf->filled_size + len >= offsetof(struct mtty_data, ports)) { + if (migf->data.magic != MTTY_MAGIC || migf->data.flags || + migf->data.major_ver != MTTY_MAJOR_VER || + migf->data.minor_ver != MTTY_MINOR_VER || + migf->data.nr_ports != mdev_state->nr_ports) { + dev_dbg(migf->mdev_state->vdev.dev, + "%s failed validation\n", __func__); + ret = -EFAULT; + } else { + dev_dbg(migf->mdev_state->vdev.dev, + "%s header validated\n", __func__); + } + } + + migf->filled_size += len; + +out_unlock: + mutex_unlock(&migf->lock); + return ret; +} + +static const struct file_operations mtty_resume_fops = { + .owner = THIS_MODULE, + .write = mtty_resume_write, + .release = mtty_release_migf, +}; + +static struct mtty_migration_file * +mtty_resume_device_data(struct mdev_state *mdev_state) +{ + struct mtty_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("mtty_mig", &mtty_resume_fops, + migf, O_WRONLY); + if (IS_ERR(migf->filp)) { + ret = PTR_ERR(migf->filp); + kfree(migf); + return ERR_PTR(ret); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + migf->mdev_state = mdev_state; + + mdev_state->resuming_migf = migf; + + return migf; +} + +static struct file *mtty_step_state(struct mdev_state *mdev_state, + enum vfio_device_mig_state new) +{ + enum vfio_device_mig_state cur = mdev_state->state; + + dev_dbg(mdev_state->vdev.dev, "%s: %d -> %d\n", __func__, cur, new); + + /* + * The following state transitions are no-op considering + * mtty does not do DMA nor require any explicit start/stop. + * + * RUNNING -> RUNNING_P2P + * RUNNING_P2P -> RUNNING + * RUNNING_P2P -> STOP + * PRE_COPY -> PRE_COPY_P2P + * PRE_COPY_P2P -> PRE_COPY + * STOP -> RUNNING_P2P + */ + if ((cur == VFIO_DEVICE_STATE_RUNNING && + new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + (new == VFIO_DEVICE_STATE_RUNNING || + new == VFIO_DEVICE_STATE_STOP)) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_STOP && + new == VFIO_DEVICE_STATE_RUNNING_P2P)) + return NULL; + + /* + * The following state transitions simply close migration files, + * with the exception of RESUMING -> STOP, which needs to load + * the state first. + * + * RESUMING -> STOP + * PRE_COPY -> RUNNING + * PRE_COPY_P2P -> RUNNING_P2P + * STOP_COPY -> STOP + */ + if (cur == VFIO_DEVICE_STATE_RESUMING && + new == VFIO_DEVICE_STATE_STOP) { + int ret; + + ret = mtty_load_state(mdev_state); + if (ret) + return ERR_PTR(ret); + mtty_disable_files(mdev_state); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_PRE_COPY && + new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_STOP_COPY && + new == VFIO_DEVICE_STATE_STOP)) { + mtty_disable_files(mdev_state); + return NULL; + } + + /* + * The following state transitions return migration files. + * + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P + * STOP -> STOP_COPY + * STOP -> RESUMING + * PRE_COPY_P2P -> STOP_COPY + */ + if ((cur == VFIO_DEVICE_STATE_RUNNING && + new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P) || + (cur == VFIO_DEVICE_STATE_STOP && + new == VFIO_DEVICE_STATE_STOP_COPY) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_STOP_COPY)) { + struct mtty_migration_file *migf; + + migf = mtty_save_device_data(mdev_state, new); + if (IS_ERR(migf)) + return ERR_CAST(migf); + + if (migf) { + get_file(migf->filp); + + return migf->filp; + } + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && + new == VFIO_DEVICE_STATE_RESUMING) { + struct mtty_migration_file *migf; + + migf = mtty_resume_device_data(mdev_state); + if (IS_ERR(migf)) + return ERR_CAST(migf); + + get_file(migf->filp); + + return migf->filp; + } + + /* vfio_mig_get_next_state() does not use arcs other than the above */ + WARN_ON(true); + return ERR_PTR(-EINVAL); +} + +static struct file *mtty_set_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + struct file *ret = NULL; + + dev_dbg(vdev->dev, "%s -> %d\n", __func__, new_state); + + mutex_lock(&mdev_state->state_mutex); + while (mdev_state->state != new_state) { + enum vfio_device_mig_state next_state; + int rc = vfio_mig_get_next_state(vdev, mdev_state->state, + new_state, &next_state); + if (rc) { + ret = ERR_PTR(rc); + break; + } + + ret = mtty_step_state(mdev_state, next_state); + if (IS_ERR(ret)) + break; + + mdev_state->state = next_state; + + if (WARN_ON(ret && new_state != next_state)) { + fput(ret); + ret = ERR_PTR(-EINVAL); + break; + } + } + mtty_state_mutex_unlock(mdev_state); + return ret; +} + +static int mtty_get_state(struct vfio_device *vdev, + enum vfio_device_mig_state *current_state) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + + mutex_lock(&mdev_state->state_mutex); + *current_state = mdev_state->state; + mtty_state_mutex_unlock(mdev_state); + return 0; +} + +static int mtty_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + + *stop_copy_length = mtty_data_size(mdev_state); + return 0; +} + +static const struct vfio_migration_ops mtty_migration_ops = { + .migration_set_state = mtty_set_state, + .migration_get_state = mtty_get_state, + .migration_get_data_size = mtty_get_data_size, +}; + +static int mtty_log_start(struct vfio_device *vdev, + struct rb_root_cached *ranges, + u32 nnodes, u64 *page_size) +{ + return 0; +} + +static int mtty_log_stop(struct vfio_device *vdev) +{ + return 0; +} + +static int mtty_log_read_and_clear(struct vfio_device *vdev, + unsigned long iova, unsigned long length, + struct iova_bitmap *dirty) +{ + return 0; +} + +static const struct vfio_log_ops mtty_log_ops = { + .log_start = mtty_log_start, + .log_stop = mtty_log_stop, + .log_read_and_clear = mtty_log_read_and_clear, +}; + static int mtty_init_dev(struct vfio_device *vdev) { struct mdev_state *mdev_state = @@ -749,6 +1341,16 @@ static int mtty_init_dev(struct vfio_device *vdev) mutex_init(&mdev_state->ops_lock); mdev_state->mdev = mdev; mtty_create_config_space(mdev_state); + + mutex_init(&mdev_state->state_mutex); + mutex_init(&mdev_state->reset_mutex); + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + vdev->mig_ops = &mtty_migration_ops; + vdev->log_ops = &mtty_log_ops; + mdev_state->state = VFIO_DEVICE_STATE_RUNNING; + return 0; err_nr_ports: @@ -782,6 +1384,8 @@ static void mtty_release_dev(struct vfio_device *vdev) struct mdev_state *mdev_state = container_of(vdev, struct mdev_state, vdev); + mutex_destroy(&mdev_state->reset_mutex); + mutex_destroy(&mdev_state->state_mutex); atomic_add(mdev_state->nr_ports, &mdev_avail_ports); kfree(mdev_state->vconfig); } @@ -798,6 +1402,15 @@ static int mtty_reset(struct mdev_state *mdev_state) { pr_info("%s: called\n", __func__); + mutex_lock(&mdev_state->reset_mutex); + mdev_state->deferred_reset = true; + if (!mutex_trylock(&mdev_state->state_mutex)) { + mutex_unlock(&mdev_state->reset_mutex); + return 0; + } + mutex_unlock(&mdev_state->reset_mutex); + mtty_state_mutex_unlock(mdev_state); + return 0; } @@ -921,6 +1534,25 @@ write_err: return -EFAULT; } +static void mtty_disable_intx(struct mdev_state *mdev_state) +{ + if (mdev_state->intx_evtfd) { + eventfd_ctx_put(mdev_state->intx_evtfd); + mdev_state->intx_evtfd = NULL; + mdev_state->intx_mask = false; + mdev_state->irq_index = -1; + } +} + +static void mtty_disable_msi(struct mdev_state *mdev_state) +{ + if (mdev_state->msi_evtfd) { + eventfd_ctx_put(mdev_state->msi_evtfd); + mdev_state->msi_evtfd = NULL; + mdev_state->irq_index = -1; + } +} + static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags, unsigned int index, unsigned int start, unsigned int count, void *data) @@ -932,59 +1564,113 @@ static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags, case VFIO_PCI_INTX_IRQ_INDEX: switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_MASK: + if (!is_intx(mdev_state) || start != 0 || count != 1) { + ret = -EINVAL; + break; + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + mdev_state->intx_mask = true; + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t mask = *(uint8_t *)data; + + if (mask) + mdev_state->intx_mask = true; + } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + ret = -ENOTTY; /* No support for mask fd */ + } + break; case VFIO_IRQ_SET_ACTION_UNMASK: + if (!is_intx(mdev_state) || start != 0 || count != 1) { + ret = -EINVAL; + break; + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + mdev_state->intx_mask = false; + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t mask = *(uint8_t *)data; + + if (mask) + mdev_state->intx_mask = false; + } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + ret = -ENOTTY; /* No support for unmask fd */ + } break; case VFIO_IRQ_SET_ACTION_TRIGGER: - { - if (flags & VFIO_IRQ_SET_DATA_NONE) { - pr_info("%s: disable INTx\n", __func__); - if (mdev_state->intx_evtfd) - eventfd_ctx_put(mdev_state->intx_evtfd); + if (is_intx(mdev_state) && !count && + (flags & VFIO_IRQ_SET_DATA_NONE)) { + mtty_disable_intx(mdev_state); + break; + } + + if (!(is_intx(mdev_state) || is_noirq(mdev_state)) || + start != 0 || count != 1) { + ret = -EINVAL; break; } if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int fd = *(int *)data; + struct eventfd_ctx *evt; + + mtty_disable_intx(mdev_state); - if (fd > 0) { - struct eventfd_ctx *evt; - - evt = eventfd_ctx_fdget(fd); - if (IS_ERR(evt)) { - ret = PTR_ERR(evt); - break; - } - mdev_state->intx_evtfd = evt; - mdev_state->irq_fd = fd; - mdev_state->irq_index = index; + if (fd < 0) + break; + + evt = eventfd_ctx_fdget(fd); + if (IS_ERR(evt)) { + ret = PTR_ERR(evt); break; } + mdev_state->intx_evtfd = evt; + mdev_state->irq_index = index; + break; + } + + if (!is_intx(mdev_state)) { + ret = -EINVAL; + break; + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + mtty_trigger_interrupt(mdev_state); + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger = *(uint8_t *)data; + + if (trigger) + mtty_trigger_interrupt(mdev_state); } break; } - } break; case VFIO_PCI_MSI_IRQ_INDEX: switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_MASK: case VFIO_IRQ_SET_ACTION_UNMASK: + ret = -ENOTTY; break; case VFIO_IRQ_SET_ACTION_TRIGGER: - if (flags & VFIO_IRQ_SET_DATA_NONE) { - if (mdev_state->msi_evtfd) - eventfd_ctx_put(mdev_state->msi_evtfd); - pr_info("%s: disable MSI\n", __func__); - mdev_state->irq_index = VFIO_PCI_INTX_IRQ_INDEX; + if (is_msi(mdev_state) && !count && + (flags & VFIO_IRQ_SET_DATA_NONE)) { + mtty_disable_msi(mdev_state); + break; + } + + if (!(is_msi(mdev_state) || is_noirq(mdev_state)) || + start != 0 || count != 1) { + ret = -EINVAL; break; } + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int fd = *(int *)data; struct eventfd_ctx *evt; - if (fd <= 0) - break; + mtty_disable_msi(mdev_state); - if (mdev_state->msi_evtfd) + if (fd < 0) break; evt = eventfd_ctx_fdget(fd); @@ -993,20 +1679,37 @@ static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags, break; } mdev_state->msi_evtfd = evt; - mdev_state->irq_fd = fd; mdev_state->irq_index = index; + break; + } + + if (!is_msi(mdev_state)) { + ret = -EINVAL; + break; + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + mtty_trigger_interrupt(mdev_state); + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger = *(uint8_t *)data; + + if (trigger) + mtty_trigger_interrupt(mdev_state); } break; - } - break; + } + break; case VFIO_PCI_MSIX_IRQ_INDEX: - pr_info("%s: MSIX_IRQ\n", __func__); + dev_dbg(mdev_state->vdev.dev, "%s: MSIX_IRQ\n", __func__); + ret = -ENOTTY; break; case VFIO_PCI_ERR_IRQ_INDEX: - pr_info("%s: ERR_IRQ\n", __func__); + dev_dbg(mdev_state->vdev.dev, "%s: ERR_IRQ\n", __func__); + ret = -ENOTTY; break; case VFIO_PCI_REQ_IRQ_INDEX: - pr_info("%s: REQ_IRQ\n", __func__); + dev_dbg(mdev_state->vdev.dev, "%s: REQ_IRQ\n", __func__); + ret = -ENOTTY; break; } @@ -1014,37 +1717,12 @@ static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags, return ret; } -static int mtty_trigger_interrupt(struct mdev_state *mdev_state) -{ - int ret = -1; - - if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) && - (!mdev_state->msi_evtfd)) - return -EINVAL; - else if ((mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX) && - (!mdev_state->intx_evtfd)) { - pr_info("%s: Intr eventfd not found\n", __func__); - return -EINVAL; - } - - if (mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) - ret = eventfd_signal(mdev_state->msi_evtfd, 1); - else - ret = eventfd_signal(mdev_state->intx_evtfd, 1); - -#if defined(DEBUG_INTR) - pr_info("Intx triggered\n"); -#endif - if (ret != 1) - pr_err("%s: eventfd signal failed (%d)\n", __func__, ret); - - return ret; -} - -static int mtty_get_region_info(struct mdev_state *mdev_state, - struct vfio_region_info *region_info, - u16 *cap_type_id, void **cap_type) +static int mtty_ioctl_get_region_info(struct vfio_device *vdev, + struct vfio_region_info *region_info, + struct vfio_info_cap *caps) { + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); unsigned int size = 0; u32 bar_index; @@ -1084,22 +1762,16 @@ static int mtty_get_region_info(struct mdev_state *mdev_state, static int mtty_get_irq_info(struct vfio_irq_info *irq_info) { - switch (irq_info->index) { - case VFIO_PCI_INTX_IRQ_INDEX: - case VFIO_PCI_MSI_IRQ_INDEX: - case VFIO_PCI_REQ_IRQ_INDEX: - break; - - default: + if (irq_info->index != VFIO_PCI_INTX_IRQ_INDEX && + irq_info->index != VFIO_PCI_MSI_IRQ_INDEX) return -EINVAL; - } irq_info->flags = VFIO_IRQ_INFO_EVENTFD; irq_info->count = 1; if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX) - irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE | - VFIO_IRQ_INFO_AUTOMASKED); + irq_info->flags |= VFIO_IRQ_INFO_MASKABLE | + VFIO_IRQ_INFO_AUTOMASKED; else irq_info->flags |= VFIO_IRQ_INFO_NORESIZE; @@ -1147,30 +1819,6 @@ static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd, return 0; } - case VFIO_DEVICE_GET_REGION_INFO: - { - struct vfio_region_info info; - u16 cap_type_id = 0; - void *cap_type = NULL; - - minsz = offsetofend(struct vfio_region_info, offset); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - - if (info.argsz < minsz) - return -EINVAL; - - ret = mtty_get_region_info(mdev_state, &info, &cap_type_id, - &cap_type); - if (ret) - return ret; - - if (copy_to_user((void __user *)arg, &info, minsz)) - return -EFAULT; - - return 0; - } case VFIO_DEVICE_GET_IRQ_INFO: { @@ -1262,6 +1910,16 @@ static unsigned int mtty_get_available(struct mdev_type *mtype) return atomic_read(&mdev_avail_ports) / type->nr_ports; } +static void mtty_close(struct vfio_device *vdev) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + + mtty_disable_files(mdev_state); + mtty_disable_intx(mdev_state); + mtty_disable_msi(mdev_state); +} + static const struct vfio_device_ops mtty_dev_ops = { .name = "vfio-mtty", .init = mtty_init_dev, @@ -1269,6 +1927,12 @@ static const struct vfio_device_ops mtty_dev_ops = { .read = mtty_read, .write = mtty_write, .ioctl = mtty_ioctl, + .get_region_info_caps = mtty_ioctl_get_region_info, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, + .close_device = mtty_close, }; static struct mdev_driver mtty_driver = { @@ -1316,7 +1980,7 @@ static int __init mtty_dev_init(void) if (ret) goto err_cdev; - mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME); + mtty_dev.vd_class = class_create(MTTY_CLASS_NAME); if (IS_ERR(mtty_dev.vd_class)) { pr_err("Error: failed to register mtty_dev class\n"); @@ -1371,6 +2035,6 @@ module_init(mtty_dev_init) module_exit(mtty_dev_exit) MODULE_LICENSE("GPL v2"); -MODULE_INFO(supported, "Test driver that simulate serial port over PCI"); +MODULE_DESCRIPTION("Test driver that simulate serial port over PCI"); MODULE_VERSION(VERSION_STRING); MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore index 79212d91285b..8708341bc082 100644 --- a/samples/vfs/.gitignore +++ b/samples/vfs/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only /test-fsmount +/test-list-all-mounts /test-statx +/mountinfo diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile index 6377a678134a..9256ca5d762b 100644 --- a/samples/vfs/Makefile +++ b/samples/vfs/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -userprogs-always-y += test-fsmount test-statx +userprogs-always-y += test-fsmount test-statx mountinfo test-list-all-mounts +userccflags += -I $(srctree)/tools/testing/selftests/ userccflags += -I usr/include diff --git a/samples/vfs/mountinfo.c b/samples/vfs/mountinfo.c new file mode 100644 index 000000000000..bc78275cac69 --- /dev/null +++ b/samples/vfs/mountinfo.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* + * Use pidfds, nsfds, listmount() and statmount() mimic the + * contents of /proc/self/mountinfo. + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <alloca.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> + +#include "samples-vfs.h" + +/* max mounts per listmount call */ +#define MAXMOUNTS 1024 + +/* size of struct statmount (including trailing string buffer) */ +#define STATMOUNT_BUFSIZE 4096 + +static bool ext_format; + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +/* + * There are no bindings in glibc for listmount() and statmount() (yet), + * make our own here. + */ +static int statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask, + struct statmount *buf, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static ssize_t listmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 last_mnt_id, + __u64 list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_listmount, &req, list, num, flags); +} + +static void show_mnt_attrs(__u64 flags) +{ + printf("%s", flags & MOUNT_ATTR_RDONLY ? "ro" : "rw"); + + if (flags & MOUNT_ATTR_NOSUID) + printf(",nosuid"); + if (flags & MOUNT_ATTR_NODEV) + printf(",nodev"); + if (flags & MOUNT_ATTR_NOEXEC) + printf(",noexec"); + + switch (flags & MOUNT_ATTR__ATIME) { + case MOUNT_ATTR_RELATIME: + printf(",relatime"); + break; + case MOUNT_ATTR_NOATIME: + printf(",noatime"); + break; + case MOUNT_ATTR_STRICTATIME: + /* print nothing */ + break; + } + + if (flags & MOUNT_ATTR_NODIRATIME) + printf(",nodiratime"); + if (flags & MOUNT_ATTR_NOSYMFOLLOW) + printf(",nosymfollow"); + if (flags & MOUNT_ATTR_IDMAP) + printf(",idmapped"); +} + +static void show_propagation(struct statmount *sm) +{ + if (sm->mnt_propagation & MS_SHARED) + printf(" shared:%llu", sm->mnt_peer_group); + if (sm->mnt_propagation & MS_SLAVE) { + printf(" master:%llu", sm->mnt_master); + if (sm->propagate_from && sm->propagate_from != sm->mnt_master) + printf(" propagate_from:%llu", sm->propagate_from); + } + if (sm->mnt_propagation & MS_UNBINDABLE) + printf(" unbindable"); +} + +static void show_sb_flags(__u64 flags) +{ + printf("%s", flags & MS_RDONLY ? "ro" : "rw"); + if (flags & MS_SYNCHRONOUS) + printf(",sync"); + if (flags & MS_DIRSYNC) + printf(",dirsync"); + if (flags & MS_MANDLOCK) + printf(",mand"); + if (flags & MS_LAZYTIME) + printf(",lazytime"); +} + +static int dump_mountinfo(__u64 mnt_id, __u64 mnt_ns_id) +{ + int ret; + struct statmount *buf = alloca(STATMOUNT_BUFSIZE); + const __u64 mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | + STATMOUNT_PROPAGATE_FROM | STATMOUNT_FS_TYPE | + STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | + STATMOUNT_MNT_OPTS | STATMOUNT_FS_SUBTYPE | + STATMOUNT_SB_SOURCE; + + ret = statmount(mnt_id, mnt_ns_id, mask, buf, STATMOUNT_BUFSIZE, 0); + if (ret < 0) { + perror("statmount"); + return 1; + } + + if (ext_format) + printf("0x%llx 0x%llx 0x%llx ", mnt_ns_id, mnt_id, buf->mnt_parent_id); + + printf("%u %u %u:%u %s %s ", buf->mnt_id_old, buf->mnt_parent_id_old, + buf->sb_dev_major, buf->sb_dev_minor, + &buf->str[buf->mnt_root], + &buf->str[buf->mnt_point]); + show_mnt_attrs(buf->mnt_attr); + show_propagation(buf); + + printf(" - %s", &buf->str[buf->fs_type]); + if (buf->mask & STATMOUNT_FS_SUBTYPE) + printf(".%s", &buf->str[buf->fs_subtype]); + if (buf->mask & STATMOUNT_SB_SOURCE) + printf(" %s ", &buf->str[buf->sb_source]); + else + printf(" :none "); + + show_sb_flags(buf->sb_flags); + if (buf->mask & STATMOUNT_MNT_OPTS) + printf(",%s", &buf->str[buf->mnt_opts]); + printf("\n"); + return 0; +} + +static int dump_mounts(__u64 mnt_ns_id) +{ + __u64 mntid[MAXMOUNTS]; + __u64 last_mnt_id = 0; + ssize_t count; + int i; + + /* + * Get a list of all mntids in mnt_ns_id. If it returns MAXMOUNTS + * mounts, then go again until we get everything. + */ + do { + count = listmount(LSMT_ROOT, mnt_ns_id, last_mnt_id, mntid, MAXMOUNTS, 0); + if (count < 0 || count > MAXMOUNTS) { + errno = count < 0 ? errno : count; + perror("listmount"); + return 1; + } + + /* Walk the returned mntids and print info about each */ + for (i = 0; i < count; ++i) { + int ret = dump_mountinfo(mntid[i], mnt_ns_id); + + if (ret != 0) + return ret; + } + /* Set up last_mnt_id to pick up where we left off */ + last_mnt_id = mntid[count - 1]; + } while (count == MAXMOUNTS); + return 0; +} + +static void usage(const char * const prog) +{ + printf("Usage:\n"); + printf("%s [-e] [-p pid] [-r] [-h]\n", prog); + printf(" -e: extended format\n"); + printf(" -h: print usage message\n"); + printf(" -p: get mount namespace from given pid\n"); + printf(" -r: recursively print all mounts in all child namespaces\n"); +} + +int main(int argc, char * const *argv) +{ + struct mnt_ns_info mni = { .size = MNT_NS_INFO_SIZE_VER0 }; + int pidfd, mntns, ret, opt; + pid_t pid = getpid(); + bool recursive = false; + + while ((opt = getopt(argc, argv, "ehp:r")) != -1) { + switch (opt) { + case 'e': + ext_format = true; + break; + case 'h': + usage(argv[0]); + return 0; + case 'p': + pid = atoi(optarg); + break; + case 'r': + recursive = true; + break; + } + } + + /* Get a pidfd for pid */ + pidfd = syscall(__NR_pidfd_open, pid, 0); + if (pidfd < 0) { + perror("pidfd_open"); + return 1; + } + + /* Get the mnt namespace for pidfd */ + mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, NULL); + if (mntns < 0) { + perror("PIDFD_GET_MNT_NAMESPACE"); + return 1; + } + close(pidfd); + + /* get info about mntns. In particular, the mnt_ns_id */ + ret = ioctl(mntns, NS_MNT_GET_INFO, &mni); + if (ret < 0) { + perror("NS_MNT_GET_INFO"); + return 1; + } + + do { + int ret; + + ret = dump_mounts(mni.mnt_ns_id); + if (ret) + return ret; + + if (!recursive) + break; + + /* get the next mntns (and overwrite the old mount ns info) */ + ret = ioctl(mntns, NS_MNT_GET_NEXT, &mni); + close(mntns); + mntns = ret; + } while (mntns >= 0); + + return 0; +} diff --git a/samples/vfs/samples-vfs.h b/samples/vfs/samples-vfs.h new file mode 100644 index 000000000000..498baf581b56 --- /dev/null +++ b/samples/vfs/samples-vfs.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SAMPLES_VFS_H +#define __SAMPLES_VFS_H + +#include <errno.h> +#include <linux/types.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#define die_errno(format, ...) \ + do { \ + fprintf(stderr, "%m | %s: %d: %s: " format "\n", __FILE__, \ + __LINE__, __func__, ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings */ + __u64 __spare2[44]; + char str[]; /* Variable size part containing strings */ +}; + +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +#ifndef MNT_ID_REQ_SIZE_VER0 +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#endif + +#ifndef MNT_ID_REQ_SIZE_VER1 +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ +#endif + +/* Get the id for a mount namespace */ +#ifndef NS_GET_MNTNS_ID +#define NS_GET_MNTNS_ID _IO(0xb7, 0x5) +#endif + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#ifndef MNT_NS_INFO_SIZE_VER0 +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ +#endif + +#ifndef NS_MNT_GET_INFO +#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info) +#endif + +#ifndef NS_MNT_GET_NEXT +#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info) +#endif + +#ifndef NS_MNT_GET_PREV +#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(0xFF, 3) +#endif + +#ifndef __NR_listmount +#define __NR_listmount 458 +#endif + +#ifndef __NR_statmount +#define __NR_statmount 457 +#endif + +#ifndef LSMT_ROOT +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#endif + +/* @mask bits for statmount(2) */ +#ifndef STATMOUNT_SB_BASIC +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#endif + +#ifndef STATMOUNT_MNT_BASIC +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#endif + +#ifndef STATMOUNT_PROPAGATE_FROM +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#endif + +#ifndef STATMOUNT_MNT_ROOT +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#endif + +#ifndef STATMOUNT_MNT_POINT +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#endif + +#ifndef STATMOUNT_FS_TYPE +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#endif + +#ifndef STATMOUNT_MNT_NS_ID +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#endif + +#ifndef STATMOUNT_MNT_OPTS +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#endif + +#ifndef STATMOUNT_FS_SUBTYPE +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#endif + +#ifndef STATMOUNT_SB_SOURCE +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#endif + +#ifndef STATMOUNT_OPT_ARRAY +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#endif + +#ifndef STATMOUNT_OPT_SEC_ARRAY +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#endif + +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + +#ifndef STATMOUNT_MNT_UIDMAP +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#endif + +#ifndef STATMOUNT_MNT_GIDMAP +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#endif + +#ifndef MOUNT_ATTR_NODEV +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#endif + +#ifndef MOUNT_ATTR_NOSYMFOLLOW +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ +#endif + +#ifndef MS_RDONLY +#define MS_RDONLY 1 /* Mount read-only */ +#endif + +#ifndef MS_SYNCHRONOUS +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#endif + +#ifndef MS_MANDLOCK +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#endif + +#ifndef MS_DIRSYNC +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#endif + +#ifndef MS_UNBINDABLE +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#endif + +#ifndef MS_PRIVATE +#define MS_PRIVATE (1<<18) /* change to private */ +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) /* change to slave */ +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1<<20) /* change to shared */ +#endif + +#ifndef MS_LAZYTIME +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ +#endif + +#endif /* __SAMPLES_VFS_H */ diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c new file mode 100644 index 000000000000..713c174626aa --- /dev/null +++ b/samples/vfs/test-list-all-mounts.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <errno.h> +#include <limits.h> +#include <linux/types.h> +#include <inttypes.h> +#include <stdio.h> + +#include "../../tools/testing/selftests/pidfd/pidfd.h" +#include "samples-vfs.h" + +static int __statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask, + struct statmount *stmnt, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .mnt_id = mnt_id, + .param = mask, + .mnt_ns_id = mnt_ns_id, + }; + + return syscall(__NR_statmount, &req, stmnt, bufsize, flags); +} + +static struct statmount *sys_statmount(__u64 mnt_id, __u64 mnt_ns_id, + __u64 mask, unsigned int flags) +{ + size_t bufsize = 1 << 15; + struct statmount *stmnt = NULL, *tmp = NULL; + int ret; + + for (;;) { + tmp = realloc(stmnt, bufsize); + if (!tmp) + goto out; + + stmnt = tmp; + ret = __statmount(mnt_id, mnt_ns_id, mask, stmnt, bufsize, flags); + if (!ret) + return stmnt; + + if (errno != EOVERFLOW) + goto out; + + bufsize <<= 1; + if (bufsize >= UINT_MAX / 2) + goto out; + } + +out: + free(stmnt); + return NULL; +} + +static ssize_t sys_listmount(__u64 mnt_id, __u64 last_mnt_id, __u64 mnt_ns_id, + __u64 list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .mnt_id = mnt_id, + .param = last_mnt_id, + .mnt_ns_id = mnt_ns_id, + }; + + return syscall(__NR_listmount, &req, list, num, flags); +} + +int main(int argc, char *argv[]) +{ +#define LISTMNT_BUFFER 10 + __u64 list[LISTMNT_BUFFER], last_mnt_id = 0; + int ret, pidfd, fd_mntns; + struct mnt_ns_info info = {}; + + pidfd = sys_pidfd_open(getpid(), 0); + if (pidfd < 0) + die_errno("pidfd_open failed"); + + fd_mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, 0); + if (fd_mntns < 0) + die_errno("ioctl(PIDFD_GET_MNT_NAMESPACE) failed"); + + ret = ioctl(fd_mntns, NS_MNT_GET_INFO, &info); + if (ret < 0) + die_errno("ioctl(NS_GET_MNTNS_ID) failed"); + + printf("Listing %u mounts for mount namespace %" PRIu64 "\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + for (;;) { + ssize_t nr_mounts; +next: + nr_mounts = sys_listmount(LSMT_ROOT, last_mnt_id, + info.mnt_ns_id, list, LISTMNT_BUFFER, + 0); + if (nr_mounts <= 0) { + int fd_mntns_next; + + printf("Finished listing %u mounts for mount namespace %" PRIu64 "\n\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + fd_mntns_next = ioctl(fd_mntns, NS_MNT_GET_NEXT, &info); + if (fd_mntns_next < 0) { + if (errno == ENOENT) { + printf("Finished listing all mount namespaces\n"); + exit(0); + } + die_errno("ioctl(NS_MNT_GET_NEXT) failed"); + } + close(fd_mntns); + fd_mntns = fd_mntns_next; + last_mnt_id = 0; + printf("Listing %u mounts for mount namespace %" PRIu64 "\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + goto next; + } + + for (size_t cur = 0; cur < nr_mounts; cur++) { + struct statmount *stmnt; + + last_mnt_id = list[cur]; + + stmnt = sys_statmount(last_mnt_id, info.mnt_ns_id, + STATMOUNT_SB_BASIC | + STATMOUNT_MNT_BASIC | + STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | + STATMOUNT_MNT_NS_ID | + STATMOUNT_MNT_OPTS | + STATMOUNT_FS_TYPE | + STATMOUNT_MNT_UIDMAP | + STATMOUNT_MNT_GIDMAP, 0); + if (!stmnt) { + printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n", + (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id); + continue; + } + + printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n", + (uint64_t)stmnt->mnt_id, + (uint64_t)stmnt->mnt_parent_id, + (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "", + (stmnt->mask & STATMOUNT_MNT_ROOT) ? stmnt->str + stmnt->mnt_root : "", + (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "", + (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : ""); + + if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_uidmap; + + for (size_t idx = 0; idx < stmnt->mnt_uidmap_num; idx++) { + printf("mnt_uidmap[%zu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + if (stmnt->mask & STATMOUNT_MNT_GIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_gidmap; + + for (size_t idx = 0; idx < stmnt->mnt_gidmap_num; idx++) { + printf("mnt_gidmap[%zu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + printf("\n"); + + free(stmnt); + } + } + + exit(0); +} diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c index 49c7a46cee07..424a6fa15723 100644 --- a/samples/vfs/test-statx.c +++ b/samples/vfs/test-statx.c @@ -19,6 +19,12 @@ #include <time.h> #include <sys/syscall.h> #include <sys/types.h> + +// Work around glibc header silliness +#undef AT_RENAME_NOREPLACE +#undef AT_RENAME_EXCHANGE +#undef AT_RENAME_WHITEOUT + #include <linux/stat.h> #include <linux/fcntl.h> #define statx foo diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c index 8c6cb57d5cfc..24cf7d7a1972 100644 --- a/samples/watch_queue/watch_test.c +++ b/samples/watch_queue/watch_test.c @@ -16,6 +16,12 @@ #include <errno.h> #include <sys/ioctl.h> #include <limits.h> + +// Work around glibc header silliness +#undef AT_RENAME_NOREPLACE +#undef AT_RENAME_EXCHANGE +#undef AT_RENAME_WHITEOUT + #include <linux/watch_queue.h> #include <linux/unistd.h> #include <linux/keyctl.h> |
