summaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
Diffstat (limited to 'samples')
-rw-r--r--samples/Kconfig95
-rw-r--r--samples/Makefile11
-rw-r--r--samples/acrn/vm-sample.c9
-rw-r--r--samples/bpf/.gitignore12
-rw-r--r--samples/bpf/Makefile148
-rw-r--r--samples/bpf/README.rst20
-rw-r--r--samples/bpf/asm_goto_workaround.h8
-rw-r--r--samples/bpf/cpustat_kern.c7
-rw-r--r--samples/bpf/cpustat_user.c4
-rwxr-xr-xsamples/bpf/do_hbm_test.sh2
-rw-r--r--samples/bpf/gnu/stubs.h1
-rw-r--r--samples/bpf/hbm.c10
-rw-r--r--samples/bpf/ibumad_kern.c4
-rw-r--r--samples/bpf/lwt_len_hist.bpf.c (renamed from samples/bpf/lwt_len_hist_kern.c)29
-rwxr-xr-xsamples/bpf/lwt_len_hist.sh6
-rw-r--r--samples/bpf/map_perf_test.bpf.c (renamed from samples/bpf/map_perf_test_kern.c)48
-rw-r--r--samples/bpf/map_perf_test_user.c4
-rw-r--r--samples/bpf/net_shared.h34
-rw-r--r--samples/bpf/offwaketime.bpf.c (renamed from samples/bpf/offwaketime_kern.c)41
-rw-r--r--samples/bpf/offwaketime_user.c2
-rw-r--r--samples/bpf/sampleip_user.c11
-rw-r--r--samples/bpf/sock_flags_kern.c49
-rw-r--r--samples/bpf/sockex2_kern.c1
-rw-r--r--samples/bpf/spintest.bpf.c (renamed from samples/bpf/spintest_kern.c)27
-rw-r--r--samples/bpf/spintest_user.c24
-rw-r--r--samples/bpf/syscall_nrs.c5
-rw-r--r--samples/bpf/syscall_tp_kern.c31
-rw-r--r--samples/bpf/syscall_tp_user.c45
-rw-r--r--samples/bpf/task_fd_query_user.c4
-rwxr-xr-xsamples/bpf/tc_l2_redirect.sh3
-rw-r--r--samples/bpf/tc_l2_redirect_kern.c6
-rw-r--r--samples/bpf/tcp_basertt_kern.c2
-rw-r--r--samples/bpf/tcp_cong_kern.c2
-rw-r--r--samples/bpf/test_cgrp2_array_pin.c106
-rw-r--r--samples/bpf/test_cgrp2_attach.c177
-rw-r--r--samples/bpf/test_cgrp2_sock.c294
-rwxr-xr-xsamples/bpf/test_cgrp2_sock.sh135
-rw-r--r--samples/bpf/test_cgrp2_sock2.c95
-rwxr-xr-xsamples/bpf/test_cgrp2_sock2.sh98
-rwxr-xr-xsamples/bpf/test_cgrp2_tc.sh185
-rw-r--r--samples/bpf/test_cgrp2_tc_kern.c70
-rw-r--r--samples/bpf/test_current_task_under_cgroup_kern.c44
-rw-r--r--samples/bpf/test_current_task_under_cgroup_user.c113
-rw-r--r--samples/bpf/test_lru_dist.c5
-rw-r--r--samples/bpf/test_lwt_bpf.c50
-rwxr-xr-xsamples/bpf/test_lwt_bpf.sh23
-rw-r--r--samples/bpf/test_map_in_map.bpf.c (renamed from samples/bpf/test_map_in_map_kern.c)18
-rw-r--r--samples/bpf/test_map_in_map_user.c4
-rw-r--r--samples/bpf/test_overhead_kprobe_kern.c49
-rw-r--r--samples/bpf/test_overhead_raw_tp_kern.c17
-rw-r--r--samples/bpf/test_overhead_tp_kern.c37
-rw-r--r--samples/bpf/test_overhead_user.c215
-rwxr-xr-xsamples/bpf/test_override_return.sh16
-rw-r--r--samples/bpf/test_probe_write_user_kern.c56
-rw-r--r--samples/bpf/test_probe_write_user_user.c108
-rw-r--r--samples/bpf/trace_common.h13
-rw-r--r--samples/bpf/trace_output.bpf.c (renamed from samples/bpf/trace_output_kern.c)6
-rw-r--r--samples/bpf/trace_output_user.c2
-rw-r--r--samples/bpf/tracex1.bpf.c (renamed from samples/bpf/tracex1_kern.c)27
-rw-r--r--samples/bpf/tracex1_user.c2
-rw-r--r--samples/bpf/tracex2_kern.c102
-rw-r--r--samples/bpf/tracex2_user.c187
-rw-r--r--samples/bpf/tracex3.bpf.c (renamed from samples/bpf/tracex3_kern.c)40
-rw-r--r--samples/bpf/tracex3_user.c2
-rw-r--r--samples/bpf/tracex4.bpf.c (renamed from samples/bpf/tracex4_kern.c)7
-rw-r--r--samples/bpf/tracex4_user.c6
-rw-r--r--samples/bpf/tracex5.bpf.c (renamed from samples/bpf/tracex5_kern.c)12
-rw-r--r--samples/bpf/tracex5_user.c2
-rw-r--r--samples/bpf/tracex6.bpf.c (renamed from samples/bpf/tracex6_kern.c)20
-rw-r--r--samples/bpf/tracex6_user.c2
-rw-r--r--samples/bpf/tracex7_kern.c16
-rw-r--r--samples/bpf/tracex7_user.c56
-rw-r--r--samples/bpf/xdp1_kern.c100
-rw-r--r--samples/bpf/xdp1_user.c166
-rw-r--r--samples/bpf/xdp2_kern.c125
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c3
-rw-r--r--samples/bpf/xdp_adjust_tail_kern.c1
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c2
-rw-r--r--samples/bpf/xdp_fwd_user.c4
-rw-r--r--samples/bpf/xdp_monitor.bpf.c8
-rw-r--r--samples/bpf/xdp_monitor_user.c118
-rw-r--r--samples/bpf/xdp_redirect.bpf.c49
-rw-r--r--samples/bpf/xdp_redirect_cpu.bpf.c539
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c559
-rw-r--r--samples/bpf/xdp_redirect_map.bpf.c97
-rw-r--r--samples/bpf/xdp_redirect_map_multi.bpf.c77
-rw-r--r--samples/bpf/xdp_redirect_map_multi_user.c232
-rw-r--r--samples/bpf/xdp_redirect_map_user.c228
-rw-r--r--samples/bpf/xdp_redirect_user.c172
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c2
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c140
-rw-r--r--samples/bpf/xdp_rxq_info_user.c614
-rw-r--r--samples/bpf/xdp_sample.bpf.h22
-rw-r--r--samples/bpf/xdp_sample_pkts_kern.c57
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c196
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c2
-rw-r--r--samples/cgroup/.gitignore3
-rw-r--r--samples/cgroup/Makefile5
-rw-r--r--samples/cgroup/cgroup_event_listener.c83
-rw-r--r--samples/cgroup/memcg_event_listener.c328
-rw-r--r--samples/check-exec/.gitignore2
-rw-r--r--samples/check-exec/Makefile15
-rw-r--r--samples/check-exec/inc.c212
-rwxr-xr-xsamples/check-exec/run-script-ask.sh9
-rwxr-xr-xsamples/check-exec/script-ask.inc5
-rwxr-xr-xsamples/check-exec/script-exec.inc4
-rw-r--r--samples/check-exec/script-noexec.inc4
-rw-r--r--samples/check-exec/set-exec.c85
-rw-r--r--samples/configfs/configfs_sample.c1
-rw-r--r--samples/connector/cn_test.c2
-rw-r--r--samples/damon/Kconfig43
-rw-r--r--samples/damon/Makefile5
-rw-r--r--samples/damon/mtier.c240
-rw-r--r--samples/damon/prcl.c169
-rw-r--r--samples/damon/wsse.c149
-rw-r--r--samples/fanotify/fs-monitor.c7
-rw-r--r--samples/fprobe/fprobe_example.c12
-rw-r--r--samples/ftrace/Makefile1
-rw-r--r--samples/ftrace/ftrace-direct-modify.c200
-rw-r--r--samples/ftrace/ftrace-direct-multi-modify.c232
-rw-r--r--samples/ftrace/ftrace-direct-multi.c159
-rw-r--r--samples/ftrace/ftrace-direct-too.c188
-rw-r--r--samples/ftrace/ftrace-direct.c150
-rw-r--r--samples/ftrace/ftrace-ops.c252
-rw-r--r--samples/ftrace/sample-trace-array.c6
-rw-r--r--samples/hid/.gitignore8
-rw-r--r--samples/hid/Makefile250
-rw-r--r--samples/hid/Makefile.target75
-rw-r--r--samples/hid/hid_bpf_helpers.h21
-rw-r--r--samples/hid/hid_mouse.bpf.c128
-rw-r--r--samples/hid/hid_mouse.c138
-rw-r--r--samples/hid/hid_surface_dial.bpf.c140
-rw-r--r--samples/hid/hid_surface_dial.c203
-rw-r--r--samples/hung_task/Makefile2
-rw-r--r--samples/hung_task/hung_task_tests.c164
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c10
-rw-r--r--samples/kfifo/bytestream-example.c1
-rw-r--r--samples/kfifo/dma-example.c5
-rw-r--r--samples/kfifo/inttype-example.c1
-rw-r--r--samples/kfifo/record-example.c1
-rw-r--r--samples/kmemleak/Makefile2
-rw-r--r--samples/kmemleak/kmemleak-test.c39
-rw-r--r--samples/kobject/kobject-example.c5
-rw-r--r--samples/kobject/kset-example.c51
-rw-r--r--samples/kprobes/kprobe_example.c9
-rw-r--r--samples/kprobes/kretprobe_example.c3
-rw-r--r--samples/landlock/sandboxer.c304
-rw-r--r--samples/livepatch/livepatch-callbacks-busymod.c4
-rw-r--r--samples/livepatch/livepatch-callbacks-demo.c1
-rw-r--r--samples/livepatch/livepatch-callbacks-mod.c1
-rw-r--r--samples/livepatch/livepatch-sample.c1
-rw-r--r--samples/livepatch/livepatch-shadow-fix1.c4
-rw-r--r--samples/livepatch/livepatch-shadow-fix2.c1
-rw-r--r--samples/livepatch/livepatch-shadow-mod.c15
-rw-r--r--samples/mei/mei-amt-version.c2
-rw-r--r--samples/pfsm/.gitignore2
-rw-r--r--samples/pfsm/Makefile4
-rw-r--r--samples/pfsm/pfsm-wakeup.c125
-rw-r--r--samples/pktgen/functions.sh13
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh4
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh4
-rwxr-xr-xsamples/pktgen/pktgen_sample01_simple.sh6
-rwxr-xr-xsamples/pktgen/pktgen_sample02_multiqueue.sh3
-rwxr-xr-xsamples/pktgen/pktgen_sample03_burst_single_flow.sh4
-rwxr-xr-xsamples/pktgen/pktgen_sample04_many_flows.sh4
-rwxr-xr-xsamples/pktgen/pktgen_sample05_flow_per_thread.sh4
-rwxr-xr-xsamples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh3
-rw-r--r--samples/qmi/qmi_sample_client.c6
-rw-r--r--samples/rust/Kconfig131
-rw-r--r--samples/rust/Makefile15
-rw-r--r--samples/rust/rust_configfs.rs192
-rw-r--r--samples/rust/rust_debugfs.rs163
-rw-r--r--samples/rust/rust_debugfs_scoped.rs140
-rw-r--r--samples/rust/rust_dma.rs121
-rw-r--r--samples/rust/rust_driver_auxiliary.rs128
-rw-r--r--samples/rust/rust_driver_faux.rs29
-rw-r--r--samples/rust/rust_driver_i2c.rs74
-rw-r--r--samples/rust/rust_driver_pci.rs119
-rw-r--r--samples/rust/rust_driver_platform.rs191
-rw-r--r--samples/rust/rust_driver_usb.rs46
-rw-r--r--samples/rust/rust_i2c_client.rs147
-rw-r--r--samples/rust/rust_minimal.rs24
-rw-r--r--samples/rust/rust_misc_device.rs272
-rw-r--r--samples/rust/rust_print.rs54
-rw-r--r--samples/rust/rust_print_events.c8
-rw-r--r--samples/rust/rust_print_main.rs117
-rw-r--r--samples/seccomp/user-trap.c8
-rw-r--r--samples/trace_events/trace-events-sample.h46
-rw-r--r--samples/trace_events/trace_custom_sched.c1
-rw-r--r--samples/tsm-mr/Makefile2
-rw-r--r--samples/tsm-mr/tsm_mr_sample.c131
-rw-r--r--samples/user_events/example.c47
-rw-r--r--samples/v4l/v4l2-pci-skeleton.c33
-rw-r--r--samples/vfio-mdev/README.rst100
-rw-r--r--samples/vfio-mdev/mbochs.c98
-rw-r--r--samples/vfio-mdev/mdpy-fb.c6
-rw-r--r--samples/vfio-mdev/mdpy.c59
-rw-r--r--samples/vfio-mdev/mtty.c870
-rw-r--r--samples/vfs/.gitignore2
-rw-r--r--samples/vfs/Makefile3
-rw-r--r--samples/vfs/mountinfo.c274
-rw-r--r--samples/vfs/samples-vfs.h253
-rw-r--r--samples/vfs/test-list-all-mounts.c173
-rw-r--r--samples/vfs/test-statx.c6
-rw-r--r--samples/watch_queue/watch_test.c6
205 files changed, 8415 insertions, 6651 deletions
diff --git a/samples/Kconfig b/samples/Kconfig
index 0d81c00289ee..5bc7c9e5a59e 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -23,11 +23,11 @@ config SAMPLE_TRACE_CUSTOM_EVENTS
This builds the custom trace event example module.
config SAMPLE_TRACE_PRINTK
- tristate "Build trace_printk module - tests various trace_printk formats"
+ tristate "Build trace_printk module - tests various trace_printk formats"
depends on EVENT_TRACING && m
help
- This builds a module that calls trace_printk() and can be used to
- test various trace_printk() calls from a module.
+ This builds a module that calls trace_printk() and can be used to
+ test various trace_printk() calls from a module.
config SAMPLE_FTRACE_DIRECT
tristate "Build register_ftrace_direct() example"
@@ -38,7 +38,7 @@ config SAMPLE_FTRACE_DIRECT
that hooks to wake_up_process and prints the parameters.
config SAMPLE_FTRACE_DIRECT_MULTI
- tristate "Build register_ftrace_direct_multi() example"
+ tristate "Build register_ftrace_direct() on multiple ips example"
depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
depends on HAVE_SAMPLE_FTRACE_DIRECT_MULTI
help
@@ -46,12 +46,19 @@ config SAMPLE_FTRACE_DIRECT_MULTI
that hooks to wake_up_process and schedule, and prints
the function addresses.
+config SAMPLE_FTRACE_OPS
+ tristate "Build custom ftrace ops example"
+ depends on FUNCTION_TRACER
+ help
+ This builds an ftrace ops example that hooks two functions and
+ measures the time taken to invoke one function a number of times.
+
config SAMPLE_TRACE_ARRAY
- tristate "Build sample module for kernel access to Ftrace instancess"
+ tristate "Build sample module for kernel access to Ftrace instances"
depends on EVENT_TRACING && m
help
- This builds a module that demonstrates the use of various APIs to
- access Ftrace instances from within the kernel.
+ This builds a module that demonstrates the use of various APIs to
+ access Ftrace instances from within the kernel.
config SAMPLE_KOBJECT
tristate "Build kobject examples"
@@ -177,6 +184,17 @@ config SAMPLE_TIMER
bool "Timer sample"
depends on CC_CAN_LINK && HEADERS_INSTALL
+config SAMPLE_TSM_MR
+ tristate "TSM measurement sample"
+ select TSM_MEASUREMENTS
+ select VIRT_DRIVERS
+ help
+ Build a sample module that emulates MRs (Measurement Registers) and
+ exposes them to user mode applications through the TSM sysfs
+ interface (/sys/class/misc/tsm_mr_sample/emulated_mr/).
+
+ The module name will be tsm-mr-sample when built as a module.
+
config SAMPLE_UHID
bool "UHID sample"
depends on CC_CAN_LINK && HEADERS_INSTALL
@@ -184,32 +202,33 @@ config SAMPLE_UHID
Build UHID sample program.
config SAMPLE_VFIO_MDEV_MTTY
- tristate "Build VFIO mtty example mediated device sample code -- loadable modules only"
- depends on VFIO_MDEV && m
+ tristate "Build VFIO mtty example mediated device sample code"
+ depends on VFIO
+ select VFIO_MDEV
help
Build a virtual tty sample driver for use as a VFIO
mediated device
config SAMPLE_VFIO_MDEV_MDPY
- tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
- depends on VFIO_MDEV && m
+ tristate "Build VFIO mdpy example mediated device sample code"
+ depends on VFIO
+ select VFIO_MDEV
help
Build a virtual display sample driver for use as a VFIO
mediated device. It is a simple framebuffer and supports
the region display interface (VFIO_GFX_PLANE_TYPE_REGION).
config SAMPLE_VFIO_MDEV_MDPY_FB
- tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only"
- depends on FB && m
- select FB_CFB_FILLRECT
- select FB_CFB_COPYAREA
- select FB_CFB_IMAGEBLIT
+ tristate "Build VFIO mdpy example guest fbdev driver"
+ depends on FB
+ select FB_IOMEM_HELPERS
help
Guest fbdev driver for the virtual display sample driver.
config SAMPLE_VFIO_MDEV_MBOCHS
- tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
- depends on VFIO_MDEV && m
+ tristate "Build VFIO mbochs example mediated device sample code"
+ depends on VFIO
+ select VFIO_MDEV
select DMA_SHARED_BUFFER
help
Build a virtual display sample driver for use as a VFIO
@@ -243,6 +262,13 @@ config SAMPLE_INTEL_MEI
help
Build a sample program to work with mei device.
+config SAMPLE_TPS6594_PFSM
+ bool "Build example program working with TPS6594 PFSM driver"
+ depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK
+ help
+ Build a sample program to work with PFSM devices.
+
config SAMPLE_WATCHDOG
bool "watchdog sample"
depends on CC_CAN_LINK
@@ -263,8 +289,41 @@ config SAMPLE_CORESIGHT_SYSCFG
This demonstrates how a user may create their own CoreSight
configurations and easily load them into the system at runtime.
+config SAMPLE_KMEMLEAK
+ tristate "Simple test for the kernel memory leak detector"
+ depends on DEBUG_KMEMLEAK && m
+ help
+ Build a sample program which have explicitly leaks memory to test
+ kmemleak.
+
+config SAMPLE_CGROUP
+ bool "Build cgroup sample code"
+ depends on CGROUPS && CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build samples that demonstrate the usage of the cgroup API.
+
+config SAMPLE_CHECK_EXEC
+ bool "Exec secure bits examples"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build a tool to easily configure SECBIT_EXEC_RESTRICT_FILE and
+ SECBIT_EXEC_DENY_INTERACTIVE, and a simple script interpreter to
+ demonstrate how they should be used with execveat(2) +
+ AT_EXECVE_CHECK.
+
+config SAMPLE_HUNG_TASK
+ tristate "Hung task detector test code"
+ depends on DETECT_HUNG_TASK && DEBUG_FS
+ help
+ Build a module that provides debugfs files (e.g., mutex, semaphore,
+ rw_semaphore_read, rw_semaphore_write) under <debugfs>/hung_task.
+ Reading these files with multiple processes triggers hung task
+ detection by holding locks for a long time (256 seconds).
+
source "samples/rust/Kconfig"
+source "samples/damon/Kconfig"
+
endif # SAMPLES
config HAVE_SAMPLE_FTRACE_DIRECT
diff --git a/samples/Makefile b/samples/Makefile
index 9832ef3f8fcb..07641e177bd8 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,6 +3,8 @@
subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay
subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs
+subdir-$(CONFIG_SAMPLE_CHECK_EXEC) += check-exec
+subdir-$(CONFIG_SAMPLE_CGROUP) += cgroup
obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/
obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/
obj-$(CONFIG_SAMPLE_FANOTIFY_ERROR) += fanotify/
@@ -24,15 +26,22 @@ obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_events/
obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/
+obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace/
obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/
subdir-$(CONFIG_SAMPLE_UHID) += uhid
obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/
obj-y += vfio-mdev/
subdir-$(CONFIG_SAMPLE_VFS) += vfs
obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/
+obj-$(CONFIG_SAMPLE_TPS6594_PFSM) += pfsm/
subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog
subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue
-obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/
+obj-$(CONFIG_SAMPLE_KMEMLEAK) += kmemleak/
obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/
obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/
obj-$(CONFIG_SAMPLES_RUST) += rust/
+obj-$(CONFIG_SAMPLE_DAMON_WSSE) += damon/
+obj-$(CONFIG_SAMPLE_DAMON_PRCL) += damon/
+obj-$(CONFIG_SAMPLE_DAMON_MTIER) += damon/
+obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task/
+obj-$(CONFIG_SAMPLE_TSM_MR) += tsm-mr/
diff --git a/samples/acrn/vm-sample.c b/samples/acrn/vm-sample.c
index 7abd68b20153..c61e0f91456e 100644
--- a/samples/acrn/vm-sample.c
+++ b/samples/acrn/vm-sample.c
@@ -3,7 +3,7 @@
* A sample program to run a User VM on the ACRN hypervisor
*
* This sample runs in a Service VM, which is a privileged VM of ACRN.
- * CONFIG_ACRN_HSM need to be enabled in the Service VM.
+ * CONFIG_ACRN_HSM needs to be enabled in the Service VM.
*
* Guest VM code in guest16.s will be executed after the VM launched.
*
@@ -13,7 +13,6 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
-#include <malloc.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
@@ -54,9 +53,9 @@ int main(int argc, char **argv)
argc = argc;
argv = argv;
- guest_memory = memalign(4096, GUEST_MEMORY_SIZE);
- if (!guest_memory) {
- printf("No enough memory!\n");
+ ret = posix_memalign(&guest_memory, 4096, GUEST_MEMORY_SIZE);
+ if (ret < 0) {
+ printf("Not enough memory!\n");
return -1;
}
hsm_fd = open("/dev/acrn_hsm", O_RDWR|O_CLOEXEC);
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 0e7bfdbff80a..0002cd359fb1 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -37,22 +37,10 @@ tracex4
tracex5
tracex6
tracex7
-xdp1
-xdp2
xdp_adjust_tail
xdp_fwd
-xdp_monitor
-xdp_redirect
-xdp_redirect_cpu
-xdp_redirect_map
-xdp_redirect_map_multi
xdp_router_ipv4
-xdp_rxq_info
-xdp_sample_pkts
xdp_tx_iptunnel
-xdpsock
-xdpsock_ctrl_proc
-xsk_fwd
testfile.img
hbm_out.log
iperf.*
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 727da3c5879b..95a4fa1f1e44 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src))
+BPF_SAMPLES_PATH ?= $(abspath $(src))
TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
pound := \#
@@ -13,27 +13,16 @@ tprogs-y += sockex1
tprogs-y += sockex2
tprogs-y += sockex3
tprogs-y += tracex1
-tprogs-y += tracex2
tprogs-y += tracex3
tprogs-y += tracex4
tprogs-y += tracex5
tprogs-y += tracex6
-tprogs-y += tracex7
-tprogs-y += test_probe_write_user
tprogs-y += trace_output
tprogs-y += lathist
tprogs-y += offwaketime
tprogs-y += spintest
tprogs-y += map_perf_test
-tprogs-y += test_overhead
-tprogs-y += test_cgrp2_array_pin
-tprogs-y += test_cgrp2_attach
-tprogs-y += test_cgrp2_sock
-tprogs-y += test_cgrp2_sock2
-tprogs-y += xdp1
-tprogs-y += xdp2
tprogs-y += xdp_router_ipv4
-tprogs-y += test_current_task_under_cgroup
tprogs-y += trace_event
tprogs-y += sampleip
tprogs-y += tc_l2_redirect
@@ -41,22 +30,14 @@ tprogs-y += lwt_len_hist
tprogs-y += xdp_tx_iptunnel
tprogs-y += test_map_in_map
tprogs-y += per_socket_stats_example
-tprogs-y += xdp_rxq_info
tprogs-y += syscall_tp
tprogs-y += cpustat
tprogs-y += xdp_adjust_tail
tprogs-y += xdp_fwd
tprogs-y += task_fd_query
-tprogs-y += xdp_sample_pkts
tprogs-y += ibumad
tprogs-y += hbm
-tprogs-y += xdp_redirect_cpu
-tprogs-y += xdp_redirect_map_multi
-tprogs-y += xdp_redirect_map
-tprogs-y += xdp_redirect
-tprogs-y += xdp_monitor
-
# Libbpf dependencies
LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf
LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf
@@ -73,28 +54,16 @@ sockex1-objs := sockex1_user.o
sockex2-objs := sockex2_user.o
sockex3-objs := sockex3_user.o
tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
-tracex2-objs := tracex2_user.o
tracex3-objs := tracex3_user.o
tracex4-objs := tracex4_user.o
tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
tracex6-objs := tracex6_user.o
-tracex7-objs := tracex7_user.o
-test_probe_write_user-objs := test_probe_write_user_user.o
trace_output-objs := trace_output_user.o
lathist-objs := lathist_user.o
offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
spintest-objs := spintest_user.o $(TRACE_HELPERS)
map_perf_test-objs := map_perf_test_user.o
test_overhead-objs := test_overhead_user.o
-test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
-test_cgrp2_attach-objs := test_cgrp2_attach.o
-test_cgrp2_sock-objs := test_cgrp2_sock.o
-test_cgrp2_sock2-objs := test_cgrp2_sock2.o
-xdp1-objs := xdp1_user.o
-# reuse xdp1 source intentionally
-xdp2-objs := xdp1_user.o
-test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
- test_current_task_under_cgroup_user.o
trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
tc_l2_redirect-objs := tc_l2_redirect_user.o
@@ -102,21 +71,14 @@ lwt_len_hist-objs := lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
-xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := syscall_tp_user.o
cpustat-objs := cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
-xdp_sample_pkts-objs := xdp_sample_pkts_user.o
ibumad-objs := ibumad_user.o
hbm-objs := hbm.o $(CGROUP_HELPERS)
-xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE)
-xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE)
-xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
-xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
-xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE)
# Tell kbuild to always build the programs
@@ -124,35 +86,24 @@ always-y := $(tprogs-y)
always-y += sockex1_kern.o
always-y += sockex2_kern.o
always-y += sockex3_kern.o
-always-y += tracex1_kern.o
-always-y += tracex2_kern.o
-always-y += tracex3_kern.o
-always-y += tracex4_kern.o
-always-y += tracex5_kern.o
-always-y += tracex6_kern.o
-always-y += tracex7_kern.o
-always-y += sock_flags_kern.o
-always-y += test_probe_write_user_kern.o
-always-y += trace_output_kern.o
+always-y += tracex1.bpf.o
+always-y += tracex3.bpf.o
+always-y += tracex4.bpf.o
+always-y += tracex5.bpf.o
+always-y += tracex6.bpf.o
+always-y += trace_output.bpf.o
always-y += tcbpf1_kern.o
always-y += tc_l2_redirect_kern.o
always-y += lathist_kern.o
-always-y += offwaketime_kern.o
-always-y += spintest_kern.o
-always-y += map_perf_test_kern.o
-always-y += test_overhead_tp_kern.o
-always-y += test_overhead_raw_tp_kern.o
-always-y += test_overhead_kprobe_kern.o
+always-y += offwaketime.bpf.o
+always-y += spintest.bpf.o
+always-y += map_perf_test.bpf.o
always-y += parse_varlen.o parse_simple.o parse_ldabs.o
-always-y += test_cgrp2_tc_kern.o
-always-y += xdp1_kern.o
-always-y += xdp2_kern.o
-always-y += test_current_task_under_cgroup_kern.o
always-y += trace_event_kern.o
always-y += sampleip_kern.o
-always-y += lwt_len_hist_kern.o
+always-y += lwt_len_hist.bpf.o
always-y += xdp_tx_iptunnel_kern.o
-always-y += test_map_in_map_kern.o
+always-y += test_map_in_map.bpf.o
always-y += tcp_synrto_kern.o
always-y += tcp_rwnd_kern.o
always-y += tcp_bufs_kern.o
@@ -162,18 +113,19 @@ always-y += tcp_clamp_kern.o
always-y += tcp_basertt_kern.o
always-y += tcp_tos_reflect_kern.o
always-y += tcp_dumpstats_kern.o
-always-y += xdp_rxq_info_kern.o
always-y += xdp2skb_meta_kern.o
always-y += syscall_tp_kern.o
always-y += cpustat_kern.o
always-y += xdp_adjust_tail_kern.o
always-y += xdp_fwd_kern.o
always-y += task_fd_query_kern.o
-always-y += xdp_sample_pkts_kern.o
always-y += ibumad_kern.o
always-y += hbm_out_kern.o
always-y += hbm_edt_kern.o
+COMMON_CFLAGS = $(TPROGS_USER_CFLAGS)
+TPROGS_LDFLAGS = $(TPROGS_USER_LDFLAGS)
+
ifeq ($(ARCH), arm)
# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
# headers when arm instruction set identification is requested.
@@ -190,33 +142,36 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic
endif
endif
-TPROGS_CFLAGS += -Wall -O2
-TPROGS_CFLAGS += -Wmissing-prototypes
-TPROGS_CFLAGS += -Wstrict-prototypes
+ifeq ($(ARCH), x86)
+BPF_EXTRA_CFLAGS += -fcf-protection
+endif
+COMMON_CFLAGS += -Wall -O2
+COMMON_CFLAGS += -Wmissing-prototypes
+COMMON_CFLAGS += -Wstrict-prototypes
+COMMON_CFLAGS += $(call try-run,\
+ printf "int main() { return 0; }" |\
+ $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,)
+
+TPROGS_CFLAGS += $(COMMON_CFLAGS)
TPROGS_CFLAGS += -I$(objtree)/usr/include
TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
TPROGS_CFLAGS += -I$(srctree)/tools/include
TPROGS_CFLAGS += -I$(srctree)/tools/perf
+TPROGS_CFLAGS += -I$(srctree)/tools/lib
TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
ifdef SYSROOT
-TPROGS_CFLAGS += --sysroot=$(SYSROOT)
+COMMON_CFLAGS += --sysroot=$(SYSROOT)
TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
endif
TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
-TPROGLDLIBS_xdp_monitor += -lm
-TPROGLDLIBS_xdp_redirect += -lm
-TPROGLDLIBS_xdp_redirect_cpu += -lm
-TPROGLDLIBS_xdp_redirect_map += -lm
-TPROGLDLIBS_xdp_redirect_map_multi += -lm
TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread
TPROGLDLIBS_tracex4 += -lrt
TPROGLDLIBS_trace_output += -lrt
TPROGLDLIBS_map_perf_test += -lrt
-TPROGLDLIBS_test_overhead += -lrt
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
@@ -248,7 +203,7 @@ BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
- $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+ $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
$(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
@@ -275,15 +230,16 @@ clean:
$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
# Fix up variables inherited from Kbuild that tools/ build system won't like
- $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
- LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \
+ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \
+ LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \
O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
$@ install_headers
BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool
-BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
-$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT)
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT)
$(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \
OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap
@@ -326,14 +282,9 @@ $(obj)/$(TRACE_HELPERS) $(obj)/$(CGROUP_HELPERS) $(obj)/$(XDP_SAMPLE): | libbpf_
.PHONY: libbpf_hdrs
-$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h
-$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
-$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
-$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
-$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h
-$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
+$(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
@@ -346,21 +297,24 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \
-I$(LIBBPF_INCLUDE) \
-I$(src)/../../tools/testing/selftests/bpf
-$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS)
+$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) $(TPROGS_USER_CFLAGS)
$(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+# Override includes for trace_helpers.o because __must_check won't be defined
+# in our include path.
+$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check=
-include $(BPF_SAMPLES_PATH)/Makefile.target
VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
$(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \
- $(abspath ./vmlinux)
+ $(abspath $(objtree)/vmlinux)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
ifeq ($(VMLINUX_H),)
ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\
- build the kernel or set VMLINUX_BTF or VMLINUX_H variable)
+ build the kernel or set VMLINUX_BTF like "VMLINUX_BTF=/sys/kernel/btf/vmlinux" or VMLINUX_H variable)
endif
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
else
@@ -370,7 +324,7 @@ endif
clean-files += vmlinux.h
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -383,31 +337,19 @@ endef
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o
-$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o
-$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o
-$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
-$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
@echo " CLANG-BPF " $@
- $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \
-Wno-compare-distinct-pointer-types -I$(srctree)/include \
-I$(srctree)/samples/bpf -I$(srctree)/tools/include \
-I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
-c $(filter %.bpf.c,$^) -o $@
-LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
- xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \
- xdp_router_ipv4.skel.h
+LINKED_SKELS := xdp_router_ipv4.skel.h
clean-files += $(LINKED_SKELS)
-xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
-xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o
-xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o
-xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
-xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o
LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
@@ -434,7 +376,7 @@ $(obj)/%.o: $(src)/%.c
@echo " CLANG-bpf " $@
$(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
-I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
- -I$(LIBBPF_INCLUDE) \
+ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
-D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
-D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 57f93edd1957..cabe2d216997 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -4,15 +4,24 @@ eBPF sample programs
This directory contains a test stubs, verifier test-suite and examples
for using eBPF. The examples use libbpf from tools/lib/bpf.
+Note that the XDP-specific samples have been removed from this directory and
+moved to the xdp-tools repository: https://github.com/xdp-project/xdp-tools
+See the commit messages removing each tool from this directory for how to
+convert specific command invocations between the old samples and the utilities
+in xdp-tools.
+
Build dependencies
==================
Compiling requires having installed:
- * clang >= version 3.4.0
- * llvm >= version 3.7.1
+ * clang
+ * llvm
+ * pahole
-Note that LLVM's tool 'llc' must support target 'bpf', list version
-and supported targets with command: ``llc --version``
+Consult :ref:`Documentation/process/changes.rst <changes>` for the minimum
+version numbers required and how to update them. Note that LLVM's tool
+'llc' must support target 'bpf', list version and supported targets with
+command: ``llc --version``
Clean and configuration
-----------------------
@@ -24,7 +33,8 @@ after some changes (on demand)::
make -C samples/bpf clean
make clean
-Configure kernel, defconfig for instance::
+Configure kernel, defconfig for instance
+(see "tools/testing/selftests/bpf/config" for a reference config)::
make defconfig
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
index 7048bb3594d6..634e81d83efd 100644
--- a/samples/bpf/asm_goto_workaround.h
+++ b/samples/bpf/asm_goto_workaround.h
@@ -4,14 +4,14 @@
#define __ASM_GOTO_WORKAROUND_H
/*
- * This will bring in asm_volatile_goto and asm_inline macro definitions
+ * This will bring in asm_goto_output and asm_inline macro definitions
* if enabled by compiler and config options.
*/
#include <linux/types.h>
-#ifdef asm_volatile_goto
-#undef asm_volatile_goto
-#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#ifdef asm_goto_output
+#undef asm_goto_output
+#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output")
#endif
/*
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index 5aefd19cdfa1..7ec7143e2757 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -76,8 +76,8 @@ struct {
/*
* The trace events for cpu_idle and cpu_frequency are taken from:
- * /sys/kernel/debug/tracing/events/power/cpu_idle/format
- * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ * /sys/kernel/tracing/events/power/cpu_idle/format
+ * /sys/kernel/tracing/events/power/cpu_frequency/format
*
* These two events have same format, so define one common structure.
*/
@@ -211,7 +211,7 @@ int bpf_prog1(struct cpu_args *ctx)
SEC("tracepoint/power/cpu_frequency")
int bpf_prog2(struct cpu_args *ctx)
{
- u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u64 *pts, *cstate, *pstate, cur_ts, delta;
u32 key, cpu, pstate_idx;
u64 *val;
@@ -232,7 +232,6 @@ int bpf_prog2(struct cpu_args *ctx)
if (!cstate)
return 0;
- prev_state = *pstate;
*pstate = ctx->state;
if (!*pts) {
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
index ab90bb08a2b4..356f756cba0d 100644
--- a/samples/bpf/cpustat_user.c
+++ b/samples/bpf/cpustat_user.c
@@ -66,10 +66,10 @@ static void cpu_stat_print(void)
printf("CPU-%-6d ", j);
for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
- printf("%-11ld ", data->cstate[i] / 1000000);
+ printf("%-11lu ", data->cstate[i] / 1000000);
for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
- printf("%-11ld ", data->pstate[i] / 1000000);
+ printf("%-11lu ", data->pstate[i] / 1000000);
printf("\n");
}
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
index 38e4599350db..7f4f722787d5 100755
--- a/samples/bpf/do_hbm_test.sh
+++ b/samples/bpf/do_hbm_test.sh
@@ -112,7 +112,7 @@ function start_hbm () {
processArgs () {
for i in $args ; do
case $i in
- # Support for upcomming ingress rate limiting
+ # Support for upcoming ingress rate limiting
#in) # support for upcoming ingress rate limiting
# dir="-i"
# dir_name="in"
diff --git a/samples/bpf/gnu/stubs.h b/samples/bpf/gnu/stubs.h
new file mode 100644
index 000000000000..1c638d9dce1a
--- /dev/null
+++ b/samples/bpf/gnu/stubs.h
@@ -0,0 +1 @@
+/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 516fbac28b71..fc88d4dbdf48 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -5,7 +5,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
- * Example program for Host Bandwidth Managment
+ * Example program for Host Bandwidth Management
*
* This program loads a cgroup skb BPF program to enforce cgroup output
* (egress) or input (ingress) bandwidth limits.
@@ -24,7 +24,7 @@
* beyond the rate limit specified while there is available
* bandwidth. Current implementation assumes there is only
* NIC (eth0), but can be extended to support multiple NICs.
- * Currrently only supported for egress.
+ * Currently only supported for egress.
* -h Print this info
* prog BPF program file name. Name defaults to hbm_out_kern.o
*/
@@ -65,7 +65,7 @@ static void Usage(void);
static void read_trace_pipe2(void);
static void do_error(char *msg, bool errno_flag);
-#define DEBUGFS "/sys/kernel/debug/tracing/"
+#define TRACEFS "/sys/kernel/tracing/"
static struct bpf_program *bpf_prog;
static struct bpf_object *obj;
@@ -77,7 +77,7 @@ static void read_trace_pipe2(void)
FILE *outf;
char *outFname = "hbm_out.log";
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ trace_fd = open(TRACEFS "trace_pipe", O_RDONLY, 0);
if (trace_fd < 0) {
printf("Error opening trace_pipe\n");
return;
@@ -315,6 +315,7 @@ static int run_bpf_prog(char *prog, int cg_id)
fout = fopen(fname, "w");
fprintf(fout, "id:%d\n", cg_id);
fprintf(fout, "ERROR: Could not lookup queue_stats\n");
+ fclose(fout);
} else if (stats_flag && qstats.lastPacketTime >
qstats.firstPacketTime) {
long long delta_us = (qstats.lastPacketTime -
@@ -497,7 +498,6 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
- __fallthrough;
default:
Usage();
return 0;
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index 9b193231024a..f07474c72525 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -39,8 +39,8 @@ struct {
/* Taken from the current format defined in
* include/trace/events/ib_umad.h
* and
- * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format
- * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format
+ * /sys/kernel/tracing/events/ib_umad/ib_umad_read/format
+ * /sys/kernel/tracing/events/ib_umad/ib_umad_write/format
*/
struct ib_umad_rw_args {
u64 pad;
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist.bpf.c
index 1fa14c54963a..dbab80e813fe 100644
--- a/samples/bpf/lwt_len_hist_kern.c
+++ b/samples/bpf/lwt_len_hist.bpf.c
@@ -10,29 +10,16 @@
* General Public License for more details.
*/
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/in.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
-struct bpf_elf_map {
- __u32 type;
- __u32 size_key;
- __u32 size_value;
- __u32 max_elem;
- __u32 flags;
- __u32 id;
- __u32 pinning;
-};
-
-struct bpf_elf_map SEC("maps") lwt_len_hist_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .size_key = sizeof(__u64),
- .size_value = sizeof(__u64),
- .pinning = 2,
- .max_elem = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, u64);
+ __type(value, u64);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+ __uint(max_entries, 1024);
+} lwt_len_hist_map SEC(".maps");
static unsigned int log2(unsigned int v)
{
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
index 0eda9754f50b..381b2c634784 100755
--- a/samples/bpf/lwt_len_hist.sh
+++ b/samples/bpf/lwt_len_hist.sh
@@ -4,8 +4,8 @@
NS1=lwt_ns1
VETH0=tst_lwt1a
VETH1=tst_lwt1b
-
-TRACE_ROOT=/sys/kernel/debug/tracing
+BPF_PROG=lwt_len_hist.bpf.o
+TRACE_ROOT=/sys/kernel/tracing
function cleanup {
# To reset saved histogram, remove pinned map
@@ -30,7 +30,7 @@ ip netns exec $NS1 netserver
echo 1 > ${TRACE_ROOT}/tracing_on
cp /dev/null ${TRACE_ROOT}/trace
-ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0
+ip route add 192.168.253.2/32 encap bpf out obj $BPF_PROG section len_hist dev $VETH0
netperf -H 192.168.253.2 -t TCP_STREAM
cat ${TRACE_ROOT}/trace | grep -v '^#'
./lwt_len_hist
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test.bpf.c
index 7342c5b2f278..3cdeba2afe12 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test.bpf.c
@@ -4,14 +4,12 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
+#include "vmlinux.h"
+#include <errno.h>
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-#include "trace_common.h"
#define MAX_ENTRIES 1000
#define MAX_NR_CPUS 1024
@@ -102,8 +100,8 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} lru_hash_lookup_map SEC(".maps");
-SEC("kprobe/" SYSCALL(sys_getuid))
-int stress_hmap(struct pt_regs *ctx)
+SEC("ksyscall/getuid")
+int BPF_KSYSCALL(stress_hmap)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
@@ -120,8 +118,8 @@ int stress_hmap(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/" SYSCALL(sys_geteuid))
-int stress_percpu_hmap(struct pt_regs *ctx)
+SEC("ksyscall/geteuid")
+int BPF_KSYSCALL(stress_percpu_hmap)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
@@ -137,8 +135,8 @@ int stress_percpu_hmap(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/" SYSCALL(sys_getgid))
-int stress_hmap_alloc(struct pt_regs *ctx)
+SEC("ksyscall/getgid")
+int BPF_KSYSCALL(stress_hmap_alloc)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
@@ -154,8 +152,8 @@ int stress_hmap_alloc(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/" SYSCALL(sys_getegid))
-int stress_percpu_hmap_alloc(struct pt_regs *ctx)
+SEC("ksyscall/getegid")
+int BPF_KSYSCALL(stress_percpu_hmap_alloc)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
@@ -170,11 +168,10 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
}
return 0;
}
-
-SEC("kprobe/" SYSCALL(sys_connect))
-int stress_lru_hmap_alloc(struct pt_regs *ctx)
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(stress_lru_hmap_alloc, int fd, struct sockaddr_in *uservaddr,
+ int addrlen)
{
- struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
union {
u16 dst6[8];
@@ -187,14 +184,11 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
u32 key;
};
} test_params;
- struct sockaddr_in6 *in6;
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)uservaddr;
u16 test_case;
- int addrlen, ret;
long val = 1;
u32 key = 0;
-
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs);
- addrlen = (int)PT_REGS_PARM3_CORE(real_regs);
+ int ret;
if (addrlen != sizeof(*in6))
return 0;
@@ -251,8 +245,8 @@ done:
return 0;
}
-SEC("kprobe/" SYSCALL(sys_gettid))
-int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
+SEC("ksyscall/gettid")
+int BPF_KSYSCALL(stress_lpm_trie_map_alloc)
{
union {
u32 b32[2];
@@ -273,8 +267,8 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/" SYSCALL(sys_getpgid))
-int stress_hash_map_lookup(struct pt_regs *ctx)
+SEC("ksyscall/getpgid")
+int BPF_KSYSCALL(stress_hash_map_lookup)
{
u32 key = 1, i;
long *value;
@@ -286,8 +280,8 @@ int stress_hash_map_lookup(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/" SYSCALL(sys_getppid))
-int stress_array_map_lookup(struct pt_regs *ctx)
+SEC("ksyscall/getppid")
+int BPF_KSYSCALL(stress_array_map_lookup)
{
u32 key = 1, i;
long *value;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 1bb53f4b29e1..07ff471ed6ae 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -370,7 +370,7 @@ static void run_perf_test(int tasks)
static void fill_lpm_trie(void)
{
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
unsigned long value = 0;
unsigned int i;
int r;
@@ -443,7 +443,7 @@ int main(int argc, char **argv)
if (argc > 4)
max_cnt = atoi(argv[4]);
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/net_shared.h b/samples/bpf/net_shared.h
new file mode 100644
index 000000000000..88cc52461c98
--- /dev/null
+++ b/samples/bpf/net_shared.h
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _NET_SHARED_H
+#define _NET_SHARED_H
+
+#define AF_INET 2
+#define AF_INET6 10
+
+#define ETH_ALEN 6
+#define ETH_P_802_3_MIN 0x0600
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define ETH_P_ARP 0x0806
+#define IPPROTO_ICMPV6 58
+
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define IFNAMSIZ 16
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_ntohs(x) __builtin_bswap16(x)
+#define bpf_htons(x) __builtin_bswap16(x)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define bpf_ntohs(x) (x)
+#define bpf_htons(x) (x)
+#else
+# error "Endianness detection needs to be set up for your compiler?!"
+#endif
+
+#endif
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime.bpf.c
index eb4d94742e6b..4a65ba76c1b1 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime.bpf.c
@@ -4,20 +4,15 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/ptrace.h>
-#include <uapi/linux/perf_event.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <linux/sched.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
-#define _(P) \
- ({ \
- typeof(P) val; \
- bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
- val; \
- })
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
#define MINBLOCK_US 1
#define MAX_ENTRIES 10000
@@ -67,11 +62,9 @@ struct {
SEC("kprobe/try_to_wake_up")
int waker(struct pt_regs *ctx)
{
- struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
+ struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
+ u32 pid = BPF_CORE_READ(p, pid);
struct wokeby_t woke;
- u32 pid;
-
- pid = _(p->pid);
bpf_get_current_comm(&woke.name, sizeof(woke.name));
woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
@@ -110,29 +103,19 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
}
#if 1
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
-struct sched_switch_args {
- unsigned long long pad;
- char prev_comm[TASK_COMM_LEN];
- int prev_pid;
- int prev_prio;
- long long prev_state;
- char next_comm[TASK_COMM_LEN];
- int next_pid;
- int next_prio;
-};
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
SEC("tracepoint/sched/sched_switch")
-int oncpu(struct sched_switch_args *ctx)
+int oncpu(struct trace_event_raw_sched_switch *ctx)
{
/* record previous thread sleep time */
u32 pid = ctx->prev_pid;
#else
-SEC("kprobe/finish_task_switch")
+SEC("kprobe.multi/finish_task_switch*")
int oncpu(struct pt_regs *ctx)
{
- struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
+ struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
/* record previous thread sleep time */
- u32 pid = _(p->pid);
+ u32 pid = BPF_CORE_READ(p, pid);
#endif
u64 delta, ts, *tsp;
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index b6eedcb98fb9..5557b5393642 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -105,7 +105,7 @@ int main(int argc, char **argv)
return 2;
}
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 921c505bb567..9283f47844fb 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -21,10 +21,10 @@
#define DEFAULT_FREQ 99
#define DEFAULT_SECS 5
#define MAX_IPS 8192
-#define PAGE_OFFSET 0xffff880000000000
static int map_fd;
static int nr_cpus;
+static long _text_addr;
static void usage(void)
{
@@ -108,7 +108,7 @@ static void print_ip_map(int fd)
/* sort and print */
qsort(counts, max, sizeof(struct ipcount), count_cmp);
for (i = 0; i < max; i++) {
- if (counts[i].ip > PAGE_OFFSET) {
+ if (counts[i].ip > _text_addr) {
sym = ksym_search(counts[i].ip);
if (!sym) {
printf("ksym not found. Is kallsyms loaded?\n");
@@ -169,6 +169,13 @@ int main(int argc, char **argv)
return 2;
}
+ /* used to determine whether the address is kernel space */
+ _text_addr = ksym_get_addr("_text");
+ if (!_text_addr) {
+ fprintf(stderr, "ERROR: no '_text' in /proc/kallsyms\n");
+ return 3;
+ }
+
/* create perf FDs for each CPU */
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
links = calloc(nr_cpus, sizeof(struct bpf_link *));
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
deleted file mode 100644
index 6d0ac7569d6f..000000000000
--- a/samples/bpf/sock_flags_kern.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include <uapi/linux/bpf.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/in6.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("cgroup/sock1")
-int bpf_prog1(struct bpf_sock *sk)
-{
- char fmt[] = "socket: family %d type %d protocol %d\n";
- char fmt2[] = "socket: uid %u gid %u\n";
- __u64 gid_uid = bpf_get_current_uid_gid();
- __u32 uid = gid_uid & 0xffffffff;
- __u32 gid = gid_uid >> 32;
-
- bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
- bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid);
-
- /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
- * ie., make ping6 fail
- */
- if (sk->family == PF_INET6 &&
- sk->type == SOCK_RAW &&
- sk->protocol == IPPROTO_ICMPV6)
- return 0;
-
- return 1;
-}
-
-SEC("cgroup/sock2")
-int bpf_prog2(struct bpf_sock *sk)
-{
- char fmt[] = "socket: family %d type %d protocol %d\n";
-
- bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
-
- /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
- * ie., make ping fail
- */
- if (sk->family == PF_INET &&
- sk->type == SOCK_RAW &&
- sk->protocol == IPPROTO_ICMP)
- return 0;
-
- return 1;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index b7997541f7ee..f93d9145ab8a 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -31,7 +31,6 @@ static inline int proto_ports_offset(__u64 proto)
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
- case IPPROTO_DCCP:
case IPPROTO_ESP:
case IPPROTO_SCTP:
case IPPROTO_UDPLITE:
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest.bpf.c
index 455da77319d9..cba5a9d50783 100644
--- a/samples/bpf/spintest_kern.c
+++ b/samples/bpf/spintest.bpf.c
@@ -4,14 +4,15 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/perf_event.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, long);
@@ -46,20 +47,10 @@ int foo(struct pt_regs *ctx) \
}
/* add kprobes to all possible *spin* functions */
-SEC("kprobe/spin_unlock")PROG(p1)
-SEC("kprobe/spin_lock")PROG(p2)
-SEC("kprobe/mutex_spin_on_owner")PROG(p3)
-SEC("kprobe/rwsem_spin_on_owner")PROG(p4)
-SEC("kprobe/spin_unlock_irqrestore")PROG(p5)
-SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6)
-SEC("kprobe/_raw_spin_unlock_bh")PROG(p7)
-SEC("kprobe/_raw_spin_unlock")PROG(p8)
-SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9)
-SEC("kprobe/_raw_spin_trylock_bh")PROG(p10)
-SEC("kprobe/_raw_spin_lock_irq")PROG(p11)
-SEC("kprobe/_raw_spin_trylock")PROG(p12)
-SEC("kprobe/_raw_spin_lock")PROG(p13)
-SEC("kprobe/_raw_spin_lock_bh")PROG(p14)
+SEC("kprobe.multi/spin_*lock*")PROG(spin_lock)
+SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner)
+SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock)
+
/* and to inner bpf helpers */
SEC("kprobe/htab_map_update_elem")PROG(p15)
SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index aadac14f748a..55971edb1088 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -9,13 +9,12 @@
int main(int ac, char **argv)
{
- char filename[256], symbol[256];
struct bpf_object *obj = NULL;
struct bpf_link *links[20];
long key, next_key, value;
struct bpf_program *prog;
int map_fd, i, j = 0;
- const char *section;
+ char filename[256];
struct ksym *sym;
if (load_kallsyms()) {
@@ -23,7 +22,7 @@ int main(int ac, char **argv)
return 2;
}
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
@@ -44,20 +43,13 @@ int main(int ac, char **argv)
}
bpf_object__for_each_program(prog, obj) {
- section = bpf_program__section_name(prog);
- if (sscanf(section, "kprobe/%s", symbol) != 1)
- continue;
-
- /* Attach prog only when symbol exists */
- if (ksym_get_addr(symbol)) {
- links[j] = bpf_program__attach(prog);
- if (libbpf_get_error(links[j])) {
- fprintf(stderr, "bpf_program__attach failed\n");
- links[j] = NULL;
- goto cleanup;
- }
- j++;
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
}
+ j++;
}
for (i = 0; i < 5; i++) {
diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c
index 88f940052450..a6e600f3d477 100644
--- a/samples/bpf/syscall_nrs.c
+++ b/samples/bpf/syscall_nrs.c
@@ -2,6 +2,9 @@
#include <uapi/linux/unistd.h>
#include <linux/kbuild.h>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR)
void syscall_defines(void)
@@ -17,3 +20,5 @@ void syscall_defines(void)
#endif
}
+
+#pragma GCC diagnostic pop
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 50231c2eff9c..58fef969a60e 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -4,6 +4,7 @@
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#if !defined(__aarch64__)
struct syscalls_enter_open_args {
unsigned long long unused;
long syscall_nr;
@@ -11,6 +12,7 @@ struct syscalls_enter_open_args {
long flags;
long mode;
};
+#endif
struct syscalls_exit_open_args {
unsigned long long unused;
@@ -18,6 +20,15 @@ struct syscalls_exit_open_args {
long ret;
};
+struct syscalls_enter_open_at_args {
+ unsigned long long unused;
+ long syscall_nr;
+ long long dfd;
+ long filename_ptr;
+ long flags;
+ long mode;
+};
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
@@ -44,26 +55,37 @@ static __always_inline void count(void *map)
bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST);
}
+#if !defined(__aarch64__)
SEC("tracepoint/syscalls/sys_enter_open")
int trace_enter_open(struct syscalls_enter_open_args *ctx)
{
count(&enter_open_map);
return 0;
}
+#endif
SEC("tracepoint/syscalls/sys_enter_openat")
-int trace_enter_open_at(struct syscalls_enter_open_args *ctx)
+int trace_enter_open_at(struct syscalls_enter_open_at_args *ctx)
{
count(&enter_open_map);
return 0;
}
+SEC("tracepoint/syscalls/sys_enter_openat2")
+int trace_enter_open_at2(struct syscalls_enter_open_at_args *ctx)
+{
+ count(&enter_open_map);
+ return 0;
+}
+
+#if !defined(__aarch64__)
SEC("tracepoint/syscalls/sys_exit_open")
int trace_enter_exit(struct syscalls_exit_open_args *ctx)
{
count(&exit_open_map);
return 0;
}
+#endif
SEC("tracepoint/syscalls/sys_exit_openat")
int trace_enter_exit_at(struct syscalls_exit_open_args *ctx)
@@ -71,3 +93,10 @@ int trace_enter_exit_at(struct syscalls_exit_open_args *ctx)
count(&exit_open_map);
return 0;
}
+
+SEC("tracepoint/syscalls/sys_exit_openat2")
+int trace_enter_exit_at2(struct syscalls_exit_open_args *ctx)
+{
+ count(&exit_open_map);
+ return 0;
+}
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 7a788bb837fc..7a09ac74fac0 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -17,9 +17,9 @@
static void usage(const char *cmd)
{
- printf("USAGE: %s [-i num_progs] [-h]\n", cmd);
- printf(" -i num_progs # number of progs of the test\n");
- printf(" -h # help\n");
+ printf("USAGE: %s [-i nr_tests] [-h]\n", cmd);
+ printf(" -i nr_tests # rounds of test to run\n");
+ printf(" -h # help\n");
}
static void verify_map(int map_id)
@@ -45,14 +45,14 @@ static void verify_map(int map_id)
}
}
-static int test(char *filename, int num_progs)
+static int test(char *filename, int nr_tests)
{
- int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
- struct bpf_link *links[num_progs * 4];
- struct bpf_object *objs[num_progs];
+ int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0;
+ struct bpf_link **links = NULL;
+ struct bpf_object *objs[nr_tests];
struct bpf_program *prog;
- for (i = 0; i < num_progs; i++) {
+ for (i = 0; i < nr_tests; i++) {
objs[i] = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(objs[i])) {
fprintf(stderr, "opening BPF object file failed\n");
@@ -60,6 +60,19 @@ static int test(char *filename, int num_progs)
goto cleanup;
}
+ /* One-time initialization */
+ if (!links) {
+ int nr_progs = 0;
+
+ bpf_object__for_each_program(prog, objs[i])
+ nr_progs += 1;
+
+ links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *));
+
+ if (!links)
+ goto cleanup;
+ }
+
/* load BPF program */
if (bpf_object__load(objs[i])) {
fprintf(stderr, "loading BPF object file failed\n");
@@ -101,14 +114,18 @@ static int test(char *filename, int num_progs)
close(fd);
/* verify the map */
- for (i = 0; i < num_progs; i++) {
+ for (i = 0; i < nr_tests; i++) {
verify_map(map0_fds[i]);
verify_map(map1_fds[i]);
}
cleanup:
- for (j--; j >= 0; j--)
- bpf_link__destroy(links[j]);
+ if (links) {
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ free(links);
+ }
for (i--; i >= 0; i--)
bpf_object__close(objs[i]);
@@ -117,13 +134,13 @@ cleanup:
int main(int argc, char **argv)
{
- int opt, num_progs = 1;
+ int opt, nr_tests = 1;
char filename[256];
while ((opt = getopt(argc, argv, "i:h")) != -1) {
switch (opt) {
case 'i':
- num_progs = atoi(optarg);
+ nr_tests = atoi(optarg);
break;
case 'h':
default:
@@ -134,5 +151,5 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- return test(filename, num_progs);
+ return test(filename, nr_tests);
}
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index a33d74bd3a4b..1e61f2180470 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -235,7 +235,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
struct bpf_link *link;
ssize_t bytes;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
+ snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events",
event_type);
kfd = open(buf, O_WRONLY | O_TRUNC, 0);
CHECK_PERROR_RET(kfd < 0);
@@ -252,7 +252,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
close(kfd);
kfd = -1;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
+ snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%ss/%s/id",
event_type, event_alias);
efd = open(buf, O_RDONLY, 0);
CHECK_PERROR_RET(efd < 0);
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
index 37d95ef3c20f..a28a8fc99dbe 100755
--- a/samples/bpf/tc_l2_redirect.sh
+++ b/samples/bpf/tc_l2_redirect.sh
@@ -8,6 +8,7 @@ REDIRECT_USER='./tc_l2_redirect'
REDIRECT_BPF='./tc_l2_redirect_kern.o'
RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
+IPV6_DISABLED=$(< /proc/sys/net/ipv6/conf/all/disable_ipv6)
IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
function config_common {
@@ -64,6 +65,7 @@ function config_common {
sysctl -q -w net.ipv4.conf.all.rp_filter=0
sysctl -q -w net.ipv6.conf.all.forwarding=1
+ sysctl -q -w net.ipv6.conf.all.disable_ipv6=0
}
function cleanup {
@@ -77,6 +79,7 @@ function cleanup {
$IP link del ip6t >& /dev/null
sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
+ sysctl -q -w net.ipv6.conf.all.disable_ipv6=$IPV6_DISABLED
rm -f /sys/fs/bpf/tc/globals/tun_iface
[[ -z $DEBUG ]] || set -x
set -e
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
index fd2fa0004330..b19fa9b88fe0 100644
--- a/samples/bpf/tc_l2_redirect_kern.c
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -58,14 +58,11 @@ static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr)
SEC("l2_to_iptun_ingress_forward")
int _l2_to_iptun_ingress_forward(struct __sk_buff *skb)
{
- struct bpf_tunnel_key tkey = {};
void *data = (void *)(long)skb->data;
struct eth_hdr *eth = data;
void *data_end = (void *)(long)skb->data_end;
int key = 0, *ifindex;
- int ret;
-
if (data + sizeof(*eth) > data_end)
return TC_ACT_OK;
@@ -115,8 +112,6 @@ int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb)
void *data_end = (void *)(long)skb->data_end;
int key = 0, *ifindex;
- int ret;
-
if (data + sizeof(*eth) > data_end)
return TC_ACT_OK;
@@ -205,7 +200,6 @@ int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb)
SEC("drop_non_tun_vip")
int _drop_non_tun_vip(struct __sk_buff *skb)
{
- struct bpf_tunnel_key tkey = {};
void *data = (void *)(long)skb->data;
struct eth_hdr *eth = data;
void *data_end = (void *)(long)skb->data_end;
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
index 8dfe09a92fec..822b0742b815 100644
--- a/samples/bpf/tcp_basertt_kern.c
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -47,7 +47,7 @@ int bpf_basertt(struct bpf_sock_ops *skops)
case BPF_SOCK_OPS_BASE_RTT:
n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION,
cong, sizeof(cong));
- if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) {
+ if (!n && !__builtin_memcmp(cong, nv, sizeof(nv))) {
/* Set base_rtt to 80us */
rv = 80;
} else if (n) {
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index 2311fc9dde85..339415eac477 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c
@@ -5,7 +5,7 @@
* License as published by the Free Software Foundation.
*
* BPF program to set congestion control to dctcp when both hosts are
- * in the same datacenter (as deteremined by IPv6 prefix).
+ * in the same datacenter (as determined by IPv6 prefix).
*
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
deleted file mode 100644
index 05e88aa63009..000000000000
--- a/samples/bpf/test_cgrp2_array_pin.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2016 Facebook
- */
-#include <linux/unistd.h>
-#include <linux/bpf.h>
-
-#include <stdio.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <fcntl.h>
-
-#include <bpf/bpf.h>
-
-static void usage(void)
-{
- printf("Usage: test_cgrp2_array_pin [...]\n");
- printf(" -F <file> File to pin an BPF cgroup array\n");
- printf(" -U <file> Update an already pinned BPF cgroup array\n");
- printf(" -v <value> Full path of the cgroup2\n");
- printf(" -h Display this help\n");
-}
-
-int main(int argc, char **argv)
-{
- const char *pinned_file = NULL, *cg2 = NULL;
- int create_array = 1;
- int array_key = 0;
- int array_fd = -1;
- int cg2_fd = -1;
- int ret = -1;
- int opt;
-
- while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
- switch (opt) {
- /* General args */
- case 'F':
- pinned_file = optarg;
- break;
- case 'U':
- pinned_file = optarg;
- create_array = 0;
- break;
- case 'v':
- cg2 = optarg;
- break;
- default:
- usage();
- goto out;
- }
- }
-
- if (!cg2 || !pinned_file) {
- usage();
- goto out;
- }
-
- cg2_fd = open(cg2, O_RDONLY);
- if (cg2_fd < 0) {
- fprintf(stderr, "open(%s,...): %s(%d)\n",
- cg2, strerror(errno), errno);
- goto out;
- }
-
- if (create_array) {
- array_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_ARRAY, NULL,
- sizeof(uint32_t), sizeof(uint32_t),
- 1, NULL);
- if (array_fd < 0) {
- fprintf(stderr,
- "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
- strerror(errno), errno);
- goto out;
- }
- } else {
- array_fd = bpf_obj_get(pinned_file);
- if (array_fd < 0) {
- fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
- pinned_file, strerror(errno), errno);
- goto out;
- }
- }
-
- ret = bpf_map_update_elem(array_fd, &array_key, &cg2_fd, 0);
- if (ret) {
- perror("bpf_map_update_elem");
- goto out;
- }
-
- if (create_array) {
- ret = bpf_obj_pin(array_fd, pinned_file);
- if (ret) {
- fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
- pinned_file, strerror(errno), errno);
- goto out;
- }
- }
-
-out:
- if (array_fd != -1)
- close(array_fd);
- if (cg2_fd != -1)
- close(cg2_fd);
- return ret;
-}
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
deleted file mode 100644
index 68ce69457afe..000000000000
--- a/samples/bpf/test_cgrp2_attach.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- * The eBPF program accesses the map passed in to store two pieces of
- * information. The number of invocations of the program, which maps
- * to the number of packets received, is stored to key 0. Key 1 is
- * incremented on each iteration by the number of bytes stored in
- * the skb.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- * packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-#include "bpf_insn.h"
-#include "bpf_util.h"
-
-enum {
- MAP_KEY_PACKETS,
- MAP_KEY_BYTES,
-};
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(int map_fd, int verdict)
-{
- struct bpf_insn prog[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
-
- /* Count packets */
- BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
-
- /* Count bytes */
- BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
-
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = ARRAY_SIZE(prog);
- LIBBPF_OPTS(bpf_prog_load_opts, opts,
- .log_buf = bpf_log_buf,
- .log_size = BPF_LOG_BUF_SIZE,
- );
-
- return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, NULL, "GPL",
- prog, insns_cnt, &opts);
-}
-
-static int usage(const char *argv0)
-{
- printf("Usage: %s [-d] [-D] <cg-path> <egress|ingress>\n", argv0);
- printf(" -d Drop Traffic\n");
- printf(" -D Detach filter, and exit\n");
- return EXIT_FAILURE;
-}
-
-static int attach_filter(int cg_fd, int type, int verdict)
-{
- int prog_fd, map_fd, ret, key;
- long long pkt_cnt, byte_cnt;
-
- map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL,
- sizeof(key), sizeof(byte_cnt),
- 256, NULL);
- if (map_fd < 0) {
- printf("Failed to create map: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- prog_fd = prog_load(map_fd, verdict);
- printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
-
- if (prog_fd < 0) {
- printf("Failed to load prog: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- ret = bpf_prog_attach(prog_fd, cg_fd, type, 0);
- if (ret < 0) {
- printf("Failed to attach prog to cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
- }
- while (1) {
- key = MAP_KEY_PACKETS;
- assert(bpf_map_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
-
- key = MAP_KEY_BYTES;
- assert(bpf_map_lookup_elem(map_fd, &key, &byte_cnt) == 0);
-
- printf("cgroup received %lld packets, %lld bytes\n",
- pkt_cnt, byte_cnt);
- sleep(1);
- }
-
- return EXIT_SUCCESS;
-}
-
-int main(int argc, char **argv)
-{
- int detach_only = 0, verdict = 1;
- enum bpf_attach_type type;
- int opt, cg_fd, ret;
-
- while ((opt = getopt(argc, argv, "Dd")) != -1) {
- switch (opt) {
- case 'd':
- verdict = 0;
- break;
- case 'D':
- detach_only = 1;
- break;
- default:
- return usage(argv[0]);
- }
- }
-
- if (argc - optind < 2)
- return usage(argv[0]);
-
- if (strcmp(argv[optind + 1], "ingress") == 0)
- type = BPF_CGROUP_INET_INGRESS;
- else if (strcmp(argv[optind + 1], "egress") == 0)
- type = BPF_CGROUP_INET_EGRESS;
- else
- return usage(argv[0]);
-
- cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY);
- if (cg_fd < 0) {
- printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- if (detach_only) {
- ret = bpf_prog_detach(cg_fd, type);
- printf("bpf_prog_detach() returned '%s' (%d)\n",
- strerror(errno), errno);
- } else
- ret = attach_filter(cg_fd, type, verdict);
-
- return ret;
-}
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
deleted file mode 100644
index a0811df888f4..000000000000
--- a/samples/bpf/test_cgrp2_sock.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/* eBPF example program:
- *
- * - Loads eBPF program
- *
- * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
- * sockets opened by processes in the cgroup.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <net/if.h>
-#include <inttypes.h>
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-#include "bpf_insn.h"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(__u32 idx, __u32 mark, __u32 prio)
-{
- /* save pointer to context */
- struct bpf_insn prog_start[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- };
- struct bpf_insn prog_end[] = {
- BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
-
- /* set sk_bound_dev_if on socket */
- struct bpf_insn prog_dev[] = {
- BPF_MOV64_IMM(BPF_REG_3, idx),
- BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
- };
-
- /* set mark on socket */
- struct bpf_insn prog_mark[] = {
- /* get uid of process */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_current_uid_gid),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff),
-
- /* if uid is 0, use given mark, else use the uid as the mark */
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_MOV64_IMM(BPF_REG_3, mark),
-
- /* set the mark on the new socket */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)),
- };
-
- /* set priority on socket */
- struct bpf_insn prog_prio[] = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_3, prio),
- BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)),
- };
- LIBBPF_OPTS(bpf_prog_load_opts, opts,
- .log_buf = bpf_log_buf,
- .log_size = BPF_LOG_BUF_SIZE,
- );
-
- struct bpf_insn *prog;
- size_t insns_cnt;
- void *p;
- int ret;
-
- insns_cnt = sizeof(prog_start) + sizeof(prog_end);
- if (idx)
- insns_cnt += sizeof(prog_dev);
-
- if (mark)
- insns_cnt += sizeof(prog_mark);
-
- if (prio)
- insns_cnt += sizeof(prog_prio);
-
- p = prog = malloc(insns_cnt);
- if (!prog) {
- fprintf(stderr, "Failed to allocate memory for instructions\n");
- return EXIT_FAILURE;
- }
-
- memcpy(p, prog_start, sizeof(prog_start));
- p += sizeof(prog_start);
-
- if (idx) {
- memcpy(p, prog_dev, sizeof(prog_dev));
- p += sizeof(prog_dev);
- }
-
- if (mark) {
- memcpy(p, prog_mark, sizeof(prog_mark));
- p += sizeof(prog_mark);
- }
-
- if (prio) {
- memcpy(p, prog_prio, sizeof(prog_prio));
- p += sizeof(prog_prio);
- }
-
- memcpy(p, prog_end, sizeof(prog_end));
- p += sizeof(prog_end);
-
- insns_cnt /= sizeof(struct bpf_insn);
-
- ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL",
- prog, insns_cnt, &opts);
-
- free(prog);
-
- return ret;
-}
-
-static int get_bind_to_device(int sd, char *name, size_t len)
-{
- socklen_t optlen = len;
- int rc;
-
- name[0] = '\0';
- rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
- if (rc < 0)
- perror("setsockopt(SO_BINDTODEVICE)");
-
- return rc;
-}
-
-static unsigned int get_somark(int sd)
-{
- unsigned int mark = 0;
- socklen_t optlen = sizeof(mark);
- int rc;
-
- rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen);
- if (rc < 0)
- perror("getsockopt(SO_MARK)");
-
- return mark;
-}
-
-static unsigned int get_priority(int sd)
-{
- unsigned int prio = 0;
- socklen_t optlen = sizeof(prio);
- int rc;
-
- rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen);
- if (rc < 0)
- perror("getsockopt(SO_PRIORITY)");
-
- return prio;
-}
-
-static int show_sockopts(int family)
-{
- unsigned int mark, prio;
- char name[16];
- int sd;
-
- sd = socket(family, SOCK_DGRAM, 17);
- if (sd < 0) {
- perror("socket");
- return 1;
- }
-
- if (get_bind_to_device(sd, name, sizeof(name)) < 0)
- return 1;
-
- mark = get_somark(sd);
- prio = get_priority(sd);
-
- close(sd);
-
- printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio);
-
- return 0;
-}
-
-static int usage(const char *argv0)
-{
- printf("Usage:\n");
- printf(" Attach a program\n");
- printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0);
- printf("\n");
- printf(" Detach a program\n");
- printf(" %s -d cg-path\n", argv0);
- printf("\n");
- printf(" Show inherited socket settings (mark, priority, and device)\n");
- printf(" %s [-6]\n", argv0);
- return EXIT_FAILURE;
-}
-
-int main(int argc, char **argv)
-{
- __u32 idx = 0, mark = 0, prio = 0;
- const char *cgrp_path = NULL;
- int cg_fd, prog_fd, ret;
- int family = PF_INET;
- int do_attach = 1;
- int rc;
-
- while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) {
- switch (rc) {
- case 'd':
- do_attach = 0;
- break;
- case 'b':
- idx = if_nametoindex(optarg);
- if (!idx) {
- idx = strtoumax(optarg, NULL, 0);
- if (!idx) {
- printf("Invalid device name\n");
- return EXIT_FAILURE;
- }
- }
- break;
- case 'm':
- mark = strtoumax(optarg, NULL, 0);
- break;
- case 'p':
- prio = strtoumax(optarg, NULL, 0);
- break;
- case '6':
- family = PF_INET6;
- break;
- default:
- return usage(argv[0]);
- }
- }
-
- if (optind == argc)
- return show_sockopts(family);
-
- cgrp_path = argv[optind];
- if (!cgrp_path) {
- fprintf(stderr, "cgroup path not given\n");
- return EXIT_FAILURE;
- }
-
- if (do_attach && !idx && !mark && !prio) {
- fprintf(stderr,
- "One of device, mark or priority must be given\n");
- return EXIT_FAILURE;
- }
-
- cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY);
- if (cg_fd < 0) {
- printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- if (do_attach) {
- prog_fd = prog_load(idx, mark, prio);
- if (prog_fd < 0) {
- printf("Failed to load prog: '%s'\n", strerror(errno));
- printf("Output from kernel verifier:\n%s\n-------\n",
- bpf_log_buf);
- return EXIT_FAILURE;
- }
-
- ret = bpf_prog_attach(prog_fd, cg_fd,
- BPF_CGROUP_INET_SOCK_CREATE, 0);
- if (ret < 0) {
- printf("Failed to attach prog to cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
- }
- } else {
- ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
- if (ret < 0) {
- printf("Failed to detach prog from cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
- }
- }
-
- close(cg_fd);
- return EXIT_SUCCESS;
-}
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
deleted file mode 100755
index 9f6174236856..000000000000
--- a/samples/bpf/test_cgrp2_sock.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# Test various socket options that can be set by attaching programs to cgroups.
-
-CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock"
-
-################################################################################
-#
-print_result()
-{
- local rc=$1
- local status=" OK "
-
- [ $rc -ne 0 ] && status="FAIL"
-
- printf "%-50s [%4s]\n" "$2" "$status"
-}
-
-check_sock()
-{
- out=$(test_cgrp2_sock)
- echo $out | grep -q "$1"
- if [ $? -ne 0 ]; then
- print_result 1 "IPv4: $2"
- echo " expected: $1"
- echo " have: $out"
- rc=1
- else
- print_result 0 "IPv4: $2"
- fi
-}
-
-check_sock6()
-{
- out=$(test_cgrp2_sock -6)
- echo $out | grep -q "$1"
- if [ $? -ne 0 ]; then
- print_result 1 "IPv6: $2"
- echo " expected: $1"
- echo " have: $out"
- rc=1
- else
- print_result 0 "IPv6: $2"
- fi
-}
-
-################################################################################
-#
-
-cleanup()
-{
- echo $$ >> ${CGRP_MNT}/cgroup.procs
- rmdir ${CGRP_MNT}/sockopts
-}
-
-cleanup_and_exit()
-{
- local rc=$1
- local msg="$2"
-
- [ -n "$msg" ] && echo "ERROR: $msg"
-
- test_cgrp2_sock -d ${CGRP_MNT}/sockopts
- ip li del cgrp2_sock
- umount ${CGRP_MNT}
-
- exit $rc
-}
-
-
-################################################################################
-# main
-
-rc=0
-
-ip li add cgrp2_sock type dummy 2>/dev/null
-
-set -e
-mkdir -p ${CGRP_MNT}
-mount -t cgroup2 none ${CGRP_MNT}
-set +e
-
-
-# make sure we have a known start point
-cleanup 2>/dev/null
-
-mkdir -p ${CGRP_MNT}/sockopts
-[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy"
-
-
-# set pid into cgroup
-echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs
-
-# no bpf program attached, so socket should show no settings
-check_sock "dev , mark 0, priority 0" "No programs attached"
-check_sock6 "dev , mark 0, priority 0" "No programs attached"
-
-# verify device is set
-#
-test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts
-if [ $? -ne 0 ]; then
- cleanup_and_exit 1 "Failed to install program to set device"
-fi
-check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set"
-check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set"
-
-# verify mark is set
-#
-test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts
-if [ $? -ne 0 ]; then
- cleanup_and_exit 1 "Failed to install program to set mark"
-fi
-check_sock "dev , mark 666, priority 0" "Mark set"
-check_sock6 "dev , mark 666, priority 0" "Mark set"
-
-# verify priority is set
-#
-test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts
-if [ $? -ne 0 ]; then
- cleanup_and_exit 1 "Failed to install program to set priority"
-fi
-check_sock "dev , mark 0, priority 123" "Priority set"
-check_sock6 "dev , mark 0, priority 123" "Priority set"
-
-# all 3 at once
-#
-test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts
-if [ $? -ne 0 ]; then
- cleanup_and_exit 1 "Failed to install program to set device, mark and priority"
-fi
-check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set"
-check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set"
-
-cleanup_and_exit $rc
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
deleted file mode 100644
index e7060aaa2f5a..000000000000
--- a/samples/bpf/test_cgrp2_sock2.c
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* eBPF example program:
- *
- * - Loads eBPF program
- *
- * The eBPF program loads a filter from file and attaches the
- * program to a cgroup using BPF_PROG_ATTACH
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <net/if.h>
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_insn.h"
-
-static int usage(const char *argv0)
-{
- printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
- return EXIT_FAILURE;
-}
-
-int main(int argc, char **argv)
-{
- int cg_fd, err, ret = EXIT_FAILURE, filter_id = 0, prog_cnt = 0;
- const char *link_pin_path = "/sys/fs/bpf/test_cgrp2_sock2";
- struct bpf_link *link = NULL;
- struct bpf_program *progs[2];
- struct bpf_program *prog;
- struct bpf_object *obj;
-
- if (argc < 3)
- return usage(argv[0]);
-
- if (argc > 3)
- filter_id = atoi(argv[3]);
-
- cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
- if (cg_fd < 0) {
- printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return ret;
- }
-
- obj = bpf_object__open_file(argv[2], NULL);
- if (libbpf_get_error(obj)) {
- printf("ERROR: opening BPF object file failed\n");
- return ret;
- }
-
- bpf_object__for_each_program(prog, obj) {
- progs[prog_cnt] = prog;
- prog_cnt++;
- }
-
- if (filter_id >= prog_cnt) {
- printf("Invalid program id; program not found in file\n");
- goto cleanup;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- printf("ERROR: loading BPF object file failed\n");
- goto cleanup;
- }
-
- link = bpf_program__attach_cgroup(progs[filter_id], cg_fd);
- if (libbpf_get_error(link)) {
- printf("ERROR: bpf_program__attach failed\n");
- link = NULL;
- goto cleanup;
- }
-
- err = bpf_link__pin(link, link_pin_path);
- if (err < 0) {
- printf("ERROR: bpf_link__pin failed: %d\n", err);
- goto cleanup;
- }
-
- ret = EXIT_SUCCESS;
-
-cleanup:
- bpf_link__destroy(link);
- bpf_object__close(obj);
- return ret;
-}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
deleted file mode 100755
index 6a3dbe642b2b..000000000000
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-BPFFS=/sys/fs/bpf
-LINK_PIN=$BPFFS/test_cgrp2_sock2
-
-function config_device {
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth0b
- ip link set veth0b up
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add 172.16.1.101/24 dev veth0b
- ip addr add 2401:db00::2/64 dev veth0b nodad
-}
-
-function config_cgroup {
- rm -rf /tmp/cgroupv2
- mkdir -p /tmp/cgroupv2
- mount -t cgroup2 none /tmp/cgroupv2
- mkdir -p /tmp/cgroupv2/foo
- echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
-}
-
-function config_bpffs {
- if mount | grep $BPFFS > /dev/null; then
- echo "bpffs already mounted"
- else
- echo "bpffs not mounted. Mounting..."
- mount -t bpf none $BPFFS
- fi
-}
-
-function attach_bpf {
- ./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
- [ $? -ne 0 ] && exit 1
-}
-
-function cleanup {
- rm -rf $LINK_PIN
- ip link del veth0b
- ip netns delete at_ns0
- umount /tmp/cgroupv2
- rm -rf /tmp/cgroupv2
-}
-
-cleanup 2>/dev/null
-
-set -e
-config_device
-config_cgroup
-config_bpffs
-set +e
-
-#
-# Test 1 - fail ping6
-#
-attach_bpf 0
-ping -c1 -w1 172.16.1.100
-if [ $? -ne 0 ]; then
- echo "ping failed when it should succeed"
- cleanup
- exit 1
-fi
-
-ping6 -c1 -w1 2401:db00::1
-if [ $? -eq 0 ]; then
- echo "ping6 succeeded when it should not"
- cleanup
- exit 1
-fi
-
-rm -rf $LINK_PIN
-sleep 1 # Wait for link detach
-
-#
-# Test 2 - fail ping
-#
-attach_bpf 1
-ping6 -c1 -w1 2401:db00::1
-if [ $? -ne 0 ]; then
- echo "ping6 failed when it should succeed"
- cleanup
- exit 1
-fi
-
-ping -c1 -w1 172.16.1.100
-if [ $? -eq 0 ]; then
- echo "ping succeeded when it should not"
- cleanup
- exit 1
-fi
-
-cleanup
-echo
-echo "*** PASS ***"
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
deleted file mode 100755
index 395573be6ae8..000000000000
--- a/samples/bpf/test_cgrp2_tc.sh
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-MY_DIR=$(dirname $0)
-# Details on the bpf prog
-BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
-BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
-BPF_SECTION='filter'
-
-[ -z "$TC" ] && TC='tc'
-[ -z "$IP" ] && IP='ip'
-
-# Names of the veth interface, net namespace...etc.
-HOST_IFC='ve'
-NS_IFC='vens'
-NS='ns'
-
-find_mnt() {
- cat /proc/mounts | \
- awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
-}
-
-# Init cgroup2 vars
-init_cgrp2_vars() {
- CGRP2_ROOT=$(find_mnt cgroup2)
- if [ -z "$CGRP2_ROOT" ]
- then
- CGRP2_ROOT='/mnt/cgroup2'
- MOUNT_CGRP2="yes"
- fi
- CGRP2_TC="$CGRP2_ROOT/tc"
- CGRP2_TC_LEAF="$CGRP2_TC/leaf"
-}
-
-# Init bpf fs vars
-init_bpf_fs_vars() {
- local bpf_fs_root=$(find_mnt bpf)
- [ -n "$bpf_fs_root" ] || return -1
- BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
-}
-
-setup_cgrp2() {
- case $1 in
- start)
- if [ "$MOUNT_CGRP2" == 'yes' ]
- then
- [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
- mount -t cgroup2 none $CGRP2_ROOT || return $?
- fi
- mkdir -p $CGRP2_TC_LEAF
- ;;
- *)
- rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
- [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
- ;;
- esac
-}
-
-setup_bpf_cgrp2_array() {
- local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
- case $1 in
- start)
- $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
- ;;
- *)
- [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
- ;;
- esac
-}
-
-setup_net() {
- case $1 in
- start)
- $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
- $IP link set dev $HOST_IFC up || return $?
- sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
-
- $IP netns add ns || return $?
- $IP link set dev $NS_IFC netns ns || return $?
- $IP -n $NS link set dev $NS_IFC up || return $?
- $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
- $TC qdisc add dev $HOST_IFC clsact || return $?
- $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
- ;;
- *)
- $IP netns del $NS
- $IP link del $HOST_IFC
- ;;
- esac
-}
-
-run_in_cgrp() {
- # Fork another bash and move it under the specified cgroup.
- # It makes the cgroup cleanup easier at the end of the test.
- cmd='echo $$ > '
- cmd="$cmd $1/cgroup.procs; exec $2"
- bash -c "$cmd"
-}
-
-do_test() {
- run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
- local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
- awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
- if [[ $dropped -eq 0 ]]
- then
- echo "FAIL"
- return 1
- else
- echo "Successfully filtered $dropped packets"
- return 0
- fi
-}
-
-do_exit() {
- if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
- then
- echo "------ DEBUG ------"
- echo "mount: "; mount | grep -E '(cgroup2|bpf)'; echo
- echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
- if [ -d "$BPF_FS_TC_SHARE" ]
- then
- echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
- fi
- echo "Host net:"
- $IP netns
- $IP link show dev $HOST_IFC
- $IP -6 a show dev $HOST_IFC
- $TC -s qdisc show dev $HOST_IFC
- echo
- echo "$NS net:"
- $IP -n $NS link show dev $NS_IFC
- $IP -n $NS -6 link show dev $NS_IFC
- echo "------ DEBUG ------"
- echo
- fi
-
- if [ "$MODE" != 'nocleanup' ]
- then
- setup_net stop
- setup_bpf_cgrp2_array stop
- setup_cgrp2 stop
- fi
-}
-
-init_cgrp2_vars
-init_bpf_fs_vars
-
-while [[ $# -ge 1 ]]
-do
- a="$1"
- case $a in
- debug)
- DEBUG='yes'
- shift 1
- ;;
- cleanup-only)
- MODE='cleanuponly'
- shift 1
- ;;
- no-cleanup)
- MODE='nocleanup'
- shift 1
- ;;
- *)
- echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
- echo " debug: Print cgrp and network setup details at the end of the test"
- echo " cleanup-only: Try to cleanup things from last test. No test will be run"
- echo " no-cleanup: Run the test but don't do cleanup at the end"
- echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
- echo
- exit -1
- ;;
- esac
-done
-
-trap do_exit 0
-
-[ "$MODE" == 'cleanuponly' ] && exit
-
-setup_cgrp2 start || exit $?
-setup_net start || exit $?
-init_bpf_fs_vars || exit $?
-setup_bpf_cgrp2_array start || exit $?
-do_test
-echo
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
deleted file mode 100644
index 4dd532a312b9..000000000000
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/in6.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/pkt_cls.h>
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-/* copy of 'struct ethhdr' without __packed */
-struct eth_hdr {
- unsigned char h_dest[ETH_ALEN];
- unsigned char h_source[ETH_ALEN];
- unsigned short h_proto;
-};
-
-#define PIN_GLOBAL_NS 2
-struct bpf_elf_map {
- __u32 type;
- __u32 size_key;
- __u32 size_value;
- __u32 max_elem;
- __u32 flags;
- __u32 id;
- __u32 pinning;
-};
-
-struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .size_key = sizeof(uint32_t),
- .size_value = sizeof(uint32_t),
- .pinning = PIN_GLOBAL_NS,
- .max_elem = 1,
-};
-
-SEC("filter")
-int handle_egress(struct __sk_buff *skb)
-{
- void *data = (void *)(long)skb->data;
- struct eth_hdr *eth = data;
- struct ipv6hdr *ip6h = data + sizeof(*eth);
- void *data_end = (void *)(long)skb->data_end;
- char dont_care_msg[] = "dont care %04x %d\n";
- char pass_msg[] = "pass\n";
- char reject_msg[] = "reject\n";
-
- /* single length check */
- if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
- return TC_ACT_OK;
-
- if (eth->h_proto != htons(ETH_P_IPV6) ||
- ip6h->nexthdr != IPPROTO_ICMPV6) {
- bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
- eth->h_proto, ip6h->nexthdr);
- return TC_ACT_OK;
- } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
- bpf_trace_printk(pass_msg, sizeof(pass_msg));
- return TC_ACT_OK;
- } else {
- bpf_trace_printk(reject_msg, sizeof(reject_msg));
- return TC_ACT_SHOT;
- }
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
deleted file mode 100644
index fbd43e2bb4d3..000000000000
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#include <linux/ptrace.h>
-#include <uapi/linux/bpf.h>
-#include <linux/version.h>
-#include <bpf/bpf_helpers.h>
-#include <uapi/linux/utsname.h>
-#include "trace_common.h"
-
-struct {
- __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
- __uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
- __uint(max_entries, 1);
-} cgroup_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, 1);
-} perf_map SEC(".maps");
-
-/* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/" SYSCALL(sys_sync))
-int bpf_prog1(struct pt_regs *ctx)
-{
- u64 pid = bpf_get_current_pid_tgid();
- int idx = 0;
-
- if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
- return 0;
-
- bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
deleted file mode 100644
index ac251a417f45..000000000000
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <unistd.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "cgroup_helpers.h"
-
-#define CGROUP_PATH "/my-cgroup"
-
-int main(int argc, char **argv)
-{
- pid_t remote_pid, local_pid = getpid();
- struct bpf_link *link = NULL;
- struct bpf_program *prog;
- int cg2, idx = 0, rc = 1;
- struct bpf_object *obj;
- char filename[256];
- int map_fd[2];
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj)) {
- fprintf(stderr, "ERROR: opening BPF object file failed\n");
- return 0;
- }
-
- prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
- if (!prog) {
- printf("finding a prog in obj file failed\n");
- goto cleanup;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- fprintf(stderr, "ERROR: loading BPF object file failed\n");
- goto cleanup;
- }
-
- map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map");
- map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map");
- if (map_fd[0] < 0 || map_fd[1] < 0) {
- fprintf(stderr, "ERROR: finding a map in obj file failed\n");
- goto cleanup;
- }
-
- link = bpf_program__attach(prog);
- if (libbpf_get_error(link)) {
- fprintf(stderr, "ERROR: bpf_program__attach failed\n");
- link = NULL;
- goto cleanup;
- }
-
- if (setup_cgroup_environment())
- goto err;
-
- cg2 = create_and_get_cgroup(CGROUP_PATH);
-
- if (cg2 < 0)
- goto err;
-
- if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
- log_err("Adding target cgroup to map");
- goto err;
- }
-
- if (join_cgroup(CGROUP_PATH))
- goto err;
-
- /*
- * The installed helper program catched the sync call, and should
- * write it to the map.
- */
-
- sync();
- bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid);
-
- if (local_pid != remote_pid) {
- fprintf(stderr,
- "BPF Helper didn't write correct PID to map, but: %d\n",
- remote_pid);
- goto err;
- }
-
- /* Verify the negative scenario; leave the cgroup */
- if (join_cgroup("/"))
- goto err;
-
- remote_pid = 0;
- bpf_map_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
-
- sync();
- bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid);
-
- if (local_pid == remote_pid) {
- fprintf(stderr, "BPF cgroup negative test did not work\n");
- goto err;
- }
-
- rc = 0;
-
-err:
- close(cg2);
- cleanup_cgroup_environment();
-
-cleanup:
- bpf_link__destroy(link);
- bpf_object__close(obj);
- return rc;
-}
diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
index 5efb91763d65..1c161276d57b 100644
--- a/samples/bpf/test_lru_dist.c
+++ b/samples/bpf/test_lru_dist.c
@@ -42,11 +42,6 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
list->prev = list;
}
-static inline int list_empty(const struct list_head *head)
-{
- return head->next == head;
-}
-
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c
index 1b568575ad11..9a13dbb81847 100644
--- a/samples/bpf/test_lwt_bpf.c
+++ b/samples/bpf/test_lwt_bpf.c
@@ -10,16 +10,8 @@
* General Public License for more details.
*/
-#include <stdint.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmpv6.h>
-#include <linux/if_ether.h>
+#include "vmlinux.h"
+#include "net_shared.h"
#include <bpf/bpf_helpers.h>
#include <string.h>
@@ -44,9 +36,9 @@ SEC("test_ctx")
int do_test_ctx(struct __sk_buff *skb)
{
skb->cb[0] = CB_MAGIC;
- printk("len %d hash %d protocol %d\n", skb->len, skb->hash,
+ printk("len %d hash %d protocol %d", skb->len, skb->hash,
skb->protocol);
- printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0],
+ printk("cb %d ingress_ifindex %d ifindex %d", skb->cb[0],
skb->ingress_ifindex, skb->ifindex);
return BPF_OK;
@@ -56,9 +48,9 @@ int do_test_ctx(struct __sk_buff *skb)
SEC("test_cb")
int do_test_cb(struct __sk_buff *skb)
{
- printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1],
+ printk("cb0: %x cb1: %x cb2: %x", skb->cb[0], skb->cb[1],
skb->cb[2]);
- printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]);
+ printk("cb3: %x cb4: %x", skb->cb[3], skb->cb[4]);
return BPF_OK;
}
@@ -72,11 +64,11 @@ int do_test_data(struct __sk_buff *skb)
struct iphdr *iph = data;
if (data + sizeof(*iph) > data_end) {
- printk("packet truncated\n");
+ printk("packet truncated");
return BPF_DROP;
}
- printk("src: %x dst: %x\n", iph->saddr, iph->daddr);
+ printk("src: %x dst: %x", iph->saddr, iph->daddr);
return BPF_OK;
}
@@ -97,7 +89,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1);
if (ret < 0) {
- printk("bpf_l4_csum_replace failed: %d\n", ret);
+ printk("bpf_l4_csum_replace failed: %d", ret);
return BPF_DROP;
}
@@ -120,14 +112,14 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip,
flags | sizeof(new_ip));
if (ret < 0) {
- printk("bpf_l4_csum_replace failed: %d\n");
+ printk("bpf_l4_csum_replace failed: %d");
return BPF_DROP;
}
}
ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
if (ret < 0) {
- printk("bpf_l3_csum_replace failed: %d\n", ret);
+ printk("bpf_l3_csum_replace failed: %d", ret);
return BPF_DROP;
}
@@ -137,7 +129,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
if (ret < 0) {
- printk("bpf_skb_store_bytes() failed: %d\n", ret);
+ printk("bpf_skb_store_bytes() failed: %d", ret);
return BPF_DROP;
}
@@ -153,12 +145,12 @@ int do_test_rewrite(struct __sk_buff *skb)
ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4);
if (ret < 0) {
- printk("bpf_skb_load_bytes failed: %d\n", ret);
+ printk("bpf_skb_load_bytes failed: %d", ret);
return BPF_DROP;
}
if (old_ip == 0x2fea8c0) {
- printk("out: rewriting from %x to %x\n", old_ip, new_ip);
+ printk("out: rewriting from %x to %x", old_ip, new_ip);
return rewrite(skb, old_ip, new_ip, 1);
}
@@ -173,16 +165,16 @@ static inline int __do_push_ll_and_redirect(struct __sk_buff *skb)
ret = bpf_skb_change_head(skb, 14, 0);
if (ret < 0) {
- printk("skb_change_head() failed: %d\n", ret);
+ printk("skb_change_head() failed: %d", ret);
}
- ehdr.h_proto = __constant_htons(ETH_P_IP);
+ ehdr.h_proto = bpf_htons(ETH_P_IP);
memcpy(&ehdr.h_source, &smac, 6);
memcpy(&ehdr.h_dest, &dmac, 6);
ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0);
if (ret < 0) {
- printk("skb_store_bytes() failed: %d\n", ret);
+ printk("skb_store_bytes() failed: %d", ret);
return BPF_DROP;
}
@@ -202,7 +194,7 @@ int do_push_ll_and_redirect(struct __sk_buff *skb)
ret = __do_push_ll_and_redirect(skb);
if (ret >= 0)
- printk("redirected to %d\n", ifindex);
+ printk("redirected to %d", ifindex);
return ret;
}
@@ -229,7 +221,7 @@ SEC("fill_garbage")
int do_fill_garbage(struct __sk_buff *skb)
{
__fill_garbage(skb);
- printk("Set initial 96 bytes of header to FF\n");
+ printk("Set initial 96 bytes of header to FF");
return BPF_OK;
}
@@ -238,7 +230,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb)
{
int ifindex = DST_IFINDEX;
__fill_garbage(skb);
- printk("redirected to %d\n", ifindex);
+ printk("redirected to %d", ifindex);
return bpf_redirect(ifindex, 0);
}
@@ -246,7 +238,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb)
SEC("drop_all")
int do_drop_all(struct __sk_buff *skb)
{
- printk("dropping with: %d\n", BPF_DROP);
+ printk("dropping with: %d", BPF_DROP);
return BPF_DROP;
}
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
index 65a976058dd3..148e2df6cdce 100755
--- a/samples/bpf/test_lwt_bpf.sh
+++ b/samples/bpf/test_lwt_bpf.sh
@@ -19,7 +19,10 @@ IPVETH3="192.168.111.2"
IP_LOCAL="192.168.99.1"
-TRACE_ROOT=/sys/kernel/debug/tracing
+PROG_SRC="test_lwt_bpf.c"
+BPF_PROG="test_lwt_bpf.o"
+TRACE_ROOT=/sys/kernel/tracing
+CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context)
function lookup_mac()
{
@@ -36,7 +39,7 @@ function lookup_mac()
function cleanup {
set +ex
- rm test_lwt_bpf.o 2> /dev/null
+ rm $BPF_PROG 2> /dev/null
ip link del $VETH0 2> /dev/null
ip link del $VETH1 2> /dev/null
ip link del $VETH2 2> /dev/null
@@ -76,7 +79,7 @@ function install_test {
cleanup_routes
cp /dev/null ${TRACE_ROOT}/trace
- OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE"
+ OPTS="encap bpf headroom 14 $1 obj $BPF_PROG section $2 $VERBOSE"
if [ "$1" == "in" ]; then
ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo
@@ -96,7 +99,7 @@ function remove_prog {
function filter_trace {
# Add newline to allow starting EXPECT= variables on newline
NL=$'\n'
- echo "${NL}$*" | sed -e 's/^.*: : //g'
+ echo "${NL}$*" | sed -e 's/bpf_trace_printk: //g'
}
function expect_fail {
@@ -160,11 +163,11 @@ function test_ctx_out {
failure "test_ctx out: packets are dropped"
}
match_trace "$(get_trace)" "
-len 84 hash 0 protocol 0
+len 84 hash 0 protocol 8
cb 1234 ingress_ifindex 0 ifindex 0
-len 84 hash 0 protocol 0
+len 84 hash 0 protocol 8
cb 1234 ingress_ifindex 0 ifindex 0
-len 84 hash 0 protocol 0
+len 84 hash 0 protocol 8
cb 1234 ingress_ifindex 0 ifindex 0" || exit 1
remove_prog out
}
@@ -367,14 +370,15 @@ setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b
setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3
ip netns exec $NS1 netserver
echo 1 > ${TRACE_ROOT}/tracing_on
+echo nocontext-info > ${TRACE_ROOT}/trace_options
DST_MAC=$(lookup_mac $VETH1 $NS1)
SRC_MAC=$(lookup_mac $VETH0)
DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex)
-CLANG_OPTS="-O2 -target bpf -I ../include/"
+CLANG_OPTS="-O2 --target=bpf -I ../include/"
CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX"
-clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o
+clang $CLANG_OPTS -c $PROG_SRC -o $BPF_PROG
test_ctx_xmit
test_ctx_out
@@ -397,4 +401,5 @@ test_netperf_redirect
cleanup
echo 0 > ${TRACE_ROOT}/tracing_on
+echo $CONTEXT_INFO > ${TRACE_ROOT}/trace_options
exit 0
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map.bpf.c
index b0200c8eac09..9f030f9c4e1b 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map.bpf.c
@@ -6,17 +6,17 @@
* License as published by the Free Software Foundation.
*/
#define KBUILD_MODNAME "foo"
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/in6.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-#include "trace_common.h"
#define MAX_NR_PORTS 65536
+#define EINVAL 22
+#define ENOENT 2
+
/* map #0 */
struct inner_a {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -103,19 +103,15 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
return result ? *result : -ENOENT;
}
-SEC("kprobe/__sys_connect")
-int trace_sys_connect(struct pt_regs *ctx)
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(trace_sys_connect, unsigned int fd, struct sockaddr_in6 *in6, int addrlen)
{
- struct sockaddr_in6 *in6;
u16 test_case, port, dst6[8];
- int addrlen, ret, inline_ret, ret_key = 0;
+ int ret, inline_ret, ret_key = 0;
u32 port_key;
void *outer_map, *inner_map;
bool inline_hash = false;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx);
- addrlen = (int)PT_REGS_PARM3_CORE(ctx);
-
if (addrlen != sizeof(*in6))
return 0;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index 652ec720533d..55dca43f3723 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -38,7 +38,7 @@ static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
uint32_t info_len = sizeof(info);
int ret, id;
- ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len);
+ ret = bpf_map_get_info_by_fd(inner_map_fd, &info, &info_len);
assert(!ret);
ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id);
@@ -120,7 +120,7 @@ int main(int argc, char **argv)
struct bpf_object *obj;
char filename[256];
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
deleted file mode 100644
index 8fdd2c9c56b2..000000000000
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/version.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-#define _(P) \
- ({ \
- typeof(P) val = 0; \
- bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
- val; \
- })
-
-SEC("kprobe/__set_task_comm")
-int prog(struct pt_regs *ctx)
-{
- struct signal_struct *signal;
- struct task_struct *tsk;
- char oldcomm[TASK_COMM_LEN] = {};
- char newcomm[TASK_COMM_LEN] = {};
- u16 oom_score_adj;
- u32 pid;
-
- tsk = (void *)PT_REGS_PARM1(ctx);
-
- pid = _(tsk->pid);
- bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read_kernel_str(newcomm, sizeof(newcomm),
- (void *)PT_REGS_PARM2(ctx));
- signal = _(tsk->signal);
- oom_score_adj = _(signal->oom_score_adj);
- return 0;
-}
-
-SEC("kprobe/urandom_read")
-int prog2(struct pt_regs *ctx)
-{
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
deleted file mode 100644
index 8763181a32f3..000000000000
--- a/samples/bpf/test_overhead_raw_tp_kern.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 Facebook */
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("raw_tracepoint/task_rename")
-int prog(struct bpf_raw_tracepoint_args *ctx)
-{
- return 0;
-}
-
-SEC("raw_tracepoint/urandom_read")
-int prog2(struct bpf_raw_tracepoint_args *ctx)
-{
- return 0;
-}
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
deleted file mode 100644
index 80edadacb692..000000000000
--- a/samples/bpf/test_overhead_tp_kern.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/sched.h>
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-/* from /sys/kernel/debug/tracing/events/task/task_rename/format */
-struct task_rename {
- __u64 pad;
- __u32 pid;
- char oldcomm[TASK_COMM_LEN];
- char newcomm[TASK_COMM_LEN];
- __u16 oom_score_adj;
-};
-SEC("tracepoint/task/task_rename")
-int prog(struct task_rename *ctx)
-{
- return 0;
-}
-
-/* from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct urandom_read {
- __u64 pad;
- int got_bits;
- int pool_left;
- int input_left;
-};
-SEC("tracepoint/random/urandom_read")
-int prog2(struct urandom_read *ctx)
-{
- return 0;
-}
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
deleted file mode 100644
index 88717f8ec6ac..000000000000
--- a/samples/bpf/test_overhead_user.c
+++ /dev/null
@@ -1,215 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2016 Facebook
- */
-#define _GNU_SOURCE
-#include <sched.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <asm/unistd.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <assert.h>
-#include <sys/wait.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
-#include <time.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#define MAX_CNT 1000000
-
-static struct bpf_link *links[2];
-static struct bpf_object *obj;
-static int cnt;
-
-static __u64 time_get_ns(void)
-{
- struct timespec ts;
-
- clock_gettime(CLOCK_MONOTONIC, &ts);
- return ts.tv_sec * 1000000000ull + ts.tv_nsec;
-}
-
-static void test_task_rename(int cpu)
-{
- __u64 start_time;
- char buf[] = "test\n";
- int i, fd;
-
- fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
- if (fd < 0) {
- printf("couldn't open /proc\n");
- exit(1);
- }
- start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++) {
- if (write(fd, buf, sizeof(buf)) < 0) {
- printf("task rename failed: %s\n", strerror(errno));
- close(fd);
- return;
- }
- }
- printf("task_rename:%d: %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
- close(fd);
-}
-
-static void test_urandom_read(int cpu)
-{
- __u64 start_time;
- char buf[4];
- int i, fd;
-
- fd = open("/dev/urandom", O_RDONLY);
- if (fd < 0) {
- printf("couldn't open /dev/urandom\n");
- exit(1);
- }
- start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++) {
- if (read(fd, buf, sizeof(buf)) < 0) {
- printf("failed to read from /dev/urandom: %s\n", strerror(errno));
- close(fd);
- return;
- }
- }
- printf("urandom_read:%d: %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
- close(fd);
-}
-
-static void loop(int cpu, int flags)
-{
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
-
- if (flags & 1)
- test_task_rename(cpu);
- if (flags & 2)
- test_urandom_read(cpu);
-}
-
-static void run_perf_test(int tasks, int flags)
-{
- pid_t pid[tasks];
- int i;
-
- for (i = 0; i < tasks; i++) {
- pid[i] = fork();
- if (pid[i] == 0) {
- loop(i, flags);
- exit(0);
- } else if (pid[i] == -1) {
- printf("couldn't spawn #%d process\n", i);
- exit(1);
- }
- }
- for (i = 0; i < tasks; i++) {
- int status;
-
- assert(waitpid(pid[i], &status, 0) == pid[i]);
- assert(status == 0);
- }
-}
-
-static int load_progs(char *filename)
-{
- struct bpf_program *prog;
- int err = 0;
-
- obj = bpf_object__open_file(filename, NULL);
- err = libbpf_get_error(obj);
- if (err < 0) {
- fprintf(stderr, "ERROR: opening BPF object file failed\n");
- return err;
- }
-
- /* load BPF program */
- err = bpf_object__load(obj);
- if (err < 0) {
- fprintf(stderr, "ERROR: loading BPF object file failed\n");
- return err;
- }
-
- bpf_object__for_each_program(prog, obj) {
- links[cnt] = bpf_program__attach(prog);
- err = libbpf_get_error(links[cnt]);
- if (err < 0) {
- fprintf(stderr, "ERROR: bpf_program__attach failed\n");
- links[cnt] = NULL;
- return err;
- }
- cnt++;
- }
-
- return err;
-}
-
-static void unload_progs(void)
-{
- while (cnt)
- bpf_link__destroy(links[--cnt]);
-
- bpf_object__close(obj);
-}
-
-int main(int argc, char **argv)
-{
- int num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
- int test_flags = ~0;
- char filename[256];
- int err = 0;
-
-
- if (argc > 1)
- test_flags = atoi(argv[1]) ? : test_flags;
- if (argc > 2)
- num_cpu = atoi(argv[2]) ? : num_cpu;
-
- if (test_flags & 0x3) {
- printf("BASE\n");
- run_perf_test(num_cpu, test_flags);
- }
-
- if (test_flags & 0xC) {
- snprintf(filename, sizeof(filename),
- "%s_kprobe_kern.o", argv[0]);
-
- printf("w/KPROBE\n");
- err = load_progs(filename);
- if (!err)
- run_perf_test(num_cpu, test_flags >> 2);
-
- unload_progs();
- }
-
- if (test_flags & 0x30) {
- snprintf(filename, sizeof(filename),
- "%s_tp_kern.o", argv[0]);
- printf("w/TRACEPOINT\n");
- err = load_progs(filename);
- if (!err)
- run_perf_test(num_cpu, test_flags >> 4);
-
- unload_progs();
- }
-
- if (test_flags & 0xC0) {
- snprintf(filename, sizeof(filename),
- "%s_raw_tp_kern.o", argv[0]);
- printf("w/RAW_TRACEPOINT\n");
- err = load_progs(filename);
- if (!err)
- run_perf_test(num_cpu, test_flags >> 6);
-
- unload_progs();
- }
-
- return err;
-}
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh
deleted file mode 100755
index 35db26f736b9..000000000000
--- a/samples/bpf/test_override_return.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-rm -r tmpmnt
-rm -f testfile.img
-dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
-DEVICE=$(losetup --show -f testfile.img)
-mkfs.btrfs -f $DEVICE
-mkdir tmpmnt
-./tracex7 $DEVICE
-if [ $? -eq 0 ]
-then
- echo "SUCCESS!"
-else
- echo "FAILED!"
-fi
-losetup -d $DEVICE
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
deleted file mode 100644
index 220a96438d75..000000000000
--- a/samples/bpf/test_probe_write_user_kern.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <uapi/linux/bpf.h>
-#include <linux/version.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-#include <bpf/bpf_core_read.h>
-#include "trace_common.h"
-
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, struct sockaddr_in);
- __type(value, struct sockaddr_in);
- __uint(max_entries, 256);
-} dnat_map SEC(".maps");
-
-/* kprobe is NOT a stable ABI
- * kernel functions can be removed, renamed or completely change semantics.
- * Number of arguments and their positions can change, etc.
- * In such case this bpf+kprobe example will no longer be meaningful
- *
- * This example sits on a syscall, and the syscall ABI is relatively stable
- * of course, across platforms, and over time, the ABI may change.
- */
-SEC("kprobe/" SYSCALL(sys_connect))
-int bpf_prog1(struct pt_regs *ctx)
-{
- struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
- void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs);
- int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs);
- struct sockaddr_in new_addr, orig_addr = {};
- struct sockaddr_in *mapped_addr;
-
- if (sockaddr_len > sizeof(orig_addr))
- return 0;
-
- if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
- return 0;
-
- mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
- if (mapped_addr != NULL) {
- memcpy(&new_addr, mapped_addr, sizeof(new_addr));
- bpf_probe_write_user(sockaddr_arg, &new_addr,
- sizeof(new_addr));
- }
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
deleted file mode 100644
index 00ccfb834e45..000000000000
--- a/samples/bpf/test_probe_write_user_user.c
+++ /dev/null
@@ -1,108 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <assert.h>
-#include <unistd.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-
-int main(int ac, char **argv)
-{
- struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
- struct sockaddr serv_addr, mapped_addr, tmp_addr;
- int serverfd, serverconnfd, clientfd, map_fd;
- struct bpf_link *link = NULL;
- struct bpf_program *prog;
- struct bpf_object *obj;
- socklen_t sockaddr_len;
- char filename[256];
- char *ip;
-
- serv_addr_in = (struct sockaddr_in *)&serv_addr;
- mapped_addr_in = (struct sockaddr_in *)&mapped_addr;
- tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj)) {
- fprintf(stderr, "ERROR: opening BPF object file failed\n");
- return 0;
- }
-
- prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
- if (libbpf_get_error(prog)) {
- fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
- goto cleanup;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- fprintf(stderr, "ERROR: loading BPF object file failed\n");
- goto cleanup;
- }
-
- map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map");
- if (map_fd < 0) {
- fprintf(stderr, "ERROR: finding a map in obj file failed\n");
- goto cleanup;
- }
-
- link = bpf_program__attach(prog);
- if (libbpf_get_error(link)) {
- fprintf(stderr, "ERROR: bpf_program__attach failed\n");
- link = NULL;
- goto cleanup;
- }
-
- assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
- assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
-
- /* Bind server to ephemeral port on lo */
- memset(&serv_addr, 0, sizeof(serv_addr));
- serv_addr_in->sin_family = AF_INET;
- serv_addr_in->sin_port = 0;
- serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
- assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0);
-
- sockaddr_len = sizeof(serv_addr);
- assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0);
- ip = inet_ntoa(serv_addr_in->sin_addr);
- printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port));
-
- memset(&mapped_addr, 0, sizeof(mapped_addr));
- mapped_addr_in->sin_family = AF_INET;
- mapped_addr_in->sin_port = htons(5555);
- mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
-
- assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY));
-
- assert(listen(serverfd, 5) == 0);
-
- ip = inet_ntoa(mapped_addr_in->sin_addr);
- printf("Client connecting to: %s:%d\n",
- ip, ntohs(mapped_addr_in->sin_port));
- assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0);
-
- sockaddr_len = sizeof(tmp_addr);
- ip = inet_ntoa(tmp_addr_in->sin_addr);
- assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0);
- printf("Server received connection from: %s:%d\n",
- ip, ntohs(tmp_addr_in->sin_port));
-
- sockaddr_len = sizeof(tmp_addr);
- assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0);
- ip = inet_ntoa(tmp_addr_in->sin_addr);
- printf("Client's peer address: %s:%d\n",
- ip, ntohs(tmp_addr_in->sin_port));
-
- /* Is the server's getsockname = the socket getpeername */
- assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
-
-cleanup:
- bpf_link__destroy(link);
- bpf_object__close(obj);
- return 0;
-}
diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h
deleted file mode 100644
index 8cb5400aed1f..000000000000
--- a/samples/bpf/trace_common.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef __TRACE_COMMON_H
-#define __TRACE_COMMON_H
-
-#ifdef __x86_64__
-#define SYSCALL(SYS) "__x64_" __stringify(SYS)
-#elif defined(__s390x__)
-#define SYSCALL(SYS) "__s390x_" __stringify(SYS)
-#else
-#define SYSCALL(SYS) __stringify(SYS)
-#endif
-
-#endif
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output.bpf.c
index b64815af0943..565a73b51b04 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output.bpf.c
@@ -1,8 +1,6 @@
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "trace_common.h"
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -11,7 +9,7 @@ struct {
__uint(max_entries, 2);
} my_map SEC(".maps");
-SEC("kprobe/" SYSCALL(sys_write))
+SEC("ksyscall/write")
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 371732f9cf8e..d316fd2c8e24 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -51,7 +51,7 @@ int main(int argc, char **argv)
char filename[256];
FILE *f;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1.bpf.c
index ef30d2b353b0..ceedf0b1d479 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1.bpf.c
@@ -4,42 +4,35 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <uapi/linux/bpf.h>
+#include "vmlinux.h"
+#include "net_shared.h"
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
-#define _(P) \
- ({ \
- typeof(P) val = 0; \
- bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
- val; \
- })
-
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
* Number of arguments and their positions can change, etc.
* In such case this bpf+kprobe example will no longer be meaningful
*/
-SEC("kprobe/__netif_receive_skb_core")
+SEC("kprobe.multi/__netif_receive_skb_core*")
int bpf_prog1(struct pt_regs *ctx)
{
/* attaches to kprobe __netif_receive_skb_core,
- * looks for packets on loobpack device and prints them
+ * looks for packets on loopback device and prints them
+ * (wildcard is used for avoiding symbol mismatch due to optimization)
*/
char devname[IFNAMSIZ];
struct net_device *dev;
struct sk_buff *skb;
int len;
- /* non-portable! works for the given kernel only */
- bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
- dev = _(skb->dev);
- len = _(skb->len);
+ bpf_core_read(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
+ dev = BPF_CORE_READ(skb, dev);
+ len = BPF_CORE_READ(skb, len);
- bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
+ BPF_CORE_READ_STR_INTO(&devname, dev, name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
index 9d4adb7fd834..8c3d9043a2b6 100644
--- a/samples/bpf/tracex1_user.c
+++ b/samples/bpf/tracex1_user.c
@@ -12,7 +12,7 @@ int main(int ac, char **argv)
char filename[256];
FILE *f;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
deleted file mode 100644
index 93e0b7680b4f..000000000000
--- a/samples/bpf/tracex2_kern.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-#include "trace_common.h"
-
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, long);
- __type(value, long);
- __uint(max_entries, 1024);
-} my_map SEC(".maps");
-
-/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
- * example will no longer be meaningful
- */
-SEC("kprobe/kfree_skb_reason")
-int bpf_prog2(struct pt_regs *ctx)
-{
- long loc = 0;
- long init_val = 1;
- long *value;
-
- /* read ip of kfree_skb_reason caller.
- * non-portable version of __builtin_return_address(0)
- */
- BPF_KPROBE_READ_RET_IP(loc, ctx);
-
- value = bpf_map_lookup_elem(&my_map, &loc);
- if (value)
- *value += 1;
- else
- bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
- return 0;
-}
-
-static unsigned int log2(unsigned int v)
-{
- unsigned int r;
- unsigned int shift;
-
- r = (v > 0xFFFF) << 4; v >>= r;
- shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
- shift = (v > 0xF) << 2; v >>= shift; r |= shift;
- shift = (v > 0x3) << 1; v >>= shift; r |= shift;
- r |= (v >> 1);
- return r;
-}
-
-static unsigned int log2l(unsigned long v)
-{
- unsigned int hi = v >> 32;
- if (hi)
- return log2(hi) + 32;
- else
- return log2(v);
-}
-
-struct hist_key {
- char comm[16];
- u64 pid_tgid;
- u64 uid_gid;
- u64 index;
-};
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
- __uint(key_size, sizeof(struct hist_key));
- __uint(value_size, sizeof(long));
- __uint(max_entries, 1024);
-} my_hist_map SEC(".maps");
-
-SEC("kprobe/" SYSCALL(sys_write))
-int bpf_prog3(struct pt_regs *ctx)
-{
- long write_size = PT_REGS_PARM3(ctx);
- long init_val = 1;
- long *value;
- struct hist_key key;
-
- key.index = log2l(write_size);
- key.pid_tgid = bpf_get_current_pid_tgid();
- key.uid_gid = bpf_get_current_uid_gid();
- bpf_get_current_comm(&key.comm, sizeof(key.comm));
-
- value = bpf_map_lookup_elem(&my_hist_map, &key);
- if (value)
- __sync_fetch_and_add(value, 1);
- else
- bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
- return 0;
-}
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
deleted file mode 100644
index 089e408abd7a..000000000000
--- a/samples/bpf/tracex2_user.c
+++ /dev/null
@@ -1,187 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <string.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-
-#define MAX_INDEX 64
-#define MAX_STARS 38
-
-/* my_map, my_hist_map */
-static int map_fd[2];
-
-static void stars(char *str, long val, long max, int width)
-{
- int i;
-
- for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
- str[i] = '*';
- if (val > max)
- str[i - 1] = '+';
- str[i] = '\0';
-}
-
-struct task {
- char comm[16];
- __u64 pid_tgid;
- __u64 uid_gid;
-};
-
-struct hist_key {
- struct task t;
- __u32 index;
-};
-
-#define SIZE sizeof(struct task)
-
-static void print_hist_for_pid(int fd, void *task)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct hist_key key = {}, next_key;
- long values[nr_cpus];
- char starstr[MAX_STARS];
- long value;
- long data[MAX_INDEX] = {};
- int max_ind = -1;
- long max_value = 0;
- int i, ind;
-
- while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
- if (memcmp(&next_key, task, SIZE)) {
- key = next_key;
- continue;
- }
- bpf_map_lookup_elem(fd, &next_key, values);
- value = 0;
- for (i = 0; i < nr_cpus; i++)
- value += values[i];
- ind = next_key.index;
- data[ind] = value;
- if (value && ind > max_ind)
- max_ind = ind;
- if (value > max_value)
- max_value = value;
- key = next_key;
- }
-
- printf(" syscall write() stats\n");
- printf(" byte_size : count distribution\n");
- for (i = 1; i <= max_ind + 1; i++) {
- stars(starstr, data[i - 1], max_value, MAX_STARS);
- printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
- (1l << i) >> 1, (1l << i) - 1, data[i - 1],
- MAX_STARS, starstr);
- }
-}
-
-static void print_hist(int fd)
-{
- struct hist_key key = {}, next_key;
- static struct task tasks[1024];
- int task_cnt = 0;
- int i;
-
- while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
- int found = 0;
-
- for (i = 0; i < task_cnt; i++)
- if (memcmp(&tasks[i], &next_key, SIZE) == 0)
- found = 1;
- if (!found)
- memcpy(&tasks[task_cnt++], &next_key, SIZE);
- key = next_key;
- }
-
- for (i = 0; i < task_cnt; i++) {
- printf("\npid %d cmd %s uid %d\n",
- (__u32) tasks[i].pid_tgid,
- tasks[i].comm,
- (__u32) tasks[i].uid_gid);
- print_hist_for_pid(fd, &tasks[i]);
- }
-
-}
-
-static void int_exit(int sig)
-{
- print_hist(map_fd[1]);
- exit(0);
-}
-
-int main(int ac, char **argv)
-{
- long key, next_key, value;
- struct bpf_link *links[2];
- struct bpf_program *prog;
- struct bpf_object *obj;
- char filename[256];
- int i, j = 0;
- FILE *f;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj)) {
- fprintf(stderr, "ERROR: opening BPF object file failed\n");
- return 0;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- fprintf(stderr, "ERROR: loading BPF object file failed\n");
- goto cleanup;
- }
-
- map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map");
- map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map");
- if (map_fd[0] < 0 || map_fd[1] < 0) {
- fprintf(stderr, "ERROR: finding a map in obj file failed\n");
- goto cleanup;
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- /* start 'ping' in the background to have some kfree_skb_reason
- * events */
- f = popen("ping -4 -c5 localhost", "r");
- (void) f;
-
- /* start 'dd' in the background to have plenty of 'write' syscalls */
- f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
- (void) f;
-
- bpf_object__for_each_program(prog, obj) {
- links[j] = bpf_program__attach(prog);
- if (libbpf_get_error(links[j])) {
- fprintf(stderr, "ERROR: bpf_program__attach failed\n");
- links[j] = NULL;
- goto cleanup;
- }
- j++;
- }
-
- for (i = 0; i < 5; i++) {
- key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
- printf("location 0x%lx count %ld\n", next_key, value);
- key = next_key;
- }
- if (key)
- printf("\n");
- sleep(1);
- }
- print_hist(map_fd[1]);
-
-cleanup:
- for (j--; j >= 0; j--)
- bpf_link__destroy(links[j]);
-
- bpf_object__close(obj);
- return 0;
-}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3.bpf.c
index bde6591cb20c..41f37966f5f5 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3.bpf.c
@@ -4,13 +4,17 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+struct start_key {
+ dev_t dev;
+ u32 _pad;
+ sector_t sector;
+};
+
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, long);
@@ -18,16 +22,17 @@ struct {
__uint(max_entries, 4096);
} my_map SEC(".maps");
-/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
- * example will no longer be meaningful
- */
-SEC("kprobe/blk_mq_start_request")
-int bpf_prog1(struct pt_regs *ctx)
+/* from /sys/kernel/tracing/events/block/block_io_start/format */
+SEC("tracepoint/block/block_io_start")
+int bpf_prog1(struct trace_event_raw_block_rq *ctx)
{
- long rq = PT_REGS_PARM1(ctx);
u64 val = bpf_ktime_get_ns();
+ struct start_key key = {
+ .dev = ctx->dev,
+ .sector = ctx->sector
+ };
- bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
+ bpf_map_update_elem(&my_map, &key, &val, BPF_ANY);
return 0;
}
@@ -49,21 +54,26 @@ struct {
__uint(max_entries, SLOTS);
} lat_map SEC(".maps");
-SEC("kprobe/__blk_account_io_done")
-int bpf_prog2(struct pt_regs *ctx)
+/* from /sys/kernel/tracing/events/block/block_io_done/format */
+SEC("tracepoint/block/block_io_done")
+int bpf_prog2(struct trace_event_raw_block_rq *ctx)
{
- long rq = PT_REGS_PARM1(ctx);
+ struct start_key key = {
+ .dev = ctx->dev,
+ .sector = ctx->sector
+ };
+
u64 *value, l, base;
u32 index;
- value = bpf_map_lookup_elem(&my_map, &rq);
+ value = bpf_map_lookup_elem(&my_map, &key);
if (!value)
return 0;
u64 cur_time = bpf_ktime_get_ns();
u64 delta = cur_time - *value;
- bpf_map_delete_elem(&my_map, &rq);
+ bpf_map_delete_elem(&my_map, &key);
/* the lines below are computing index = log10(delta)*10
* using integer arithmetic
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index d5eebace31e6..1002eb0323b4 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -125,7 +125,7 @@ int main(int ac, char **argv)
}
}
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4.bpf.c
index eb0f8fdd14bf..d786492fd926 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4.bpf.c
@@ -4,9 +4,8 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -34,13 +33,13 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
-SEC("kretprobe/kmem_cache_alloc_node")
+SEC("kretprobe/kmem_cache_alloc_node_noprof")
int bpf_prog2(struct pt_regs *ctx)
{
long ptr = PT_REGS_RC(ctx);
long ip = 0;
- /* get ip address of kmem_cache_alloc_node() caller */
+ /* get ip address of kmem_cache_alloc_node_noprof() caller */
BPF_KRETPROBE_READ_RET_IP(ip, ctx);
struct pair v = {
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index 227b05a0bc88..a5145ad72cbf 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -51,9 +51,9 @@ int main(int ac, char **argv)
struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
- int map_fd, i, j = 0;
+ int map_fd, j = 0;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
@@ -82,7 +82,7 @@ int main(int ac, char **argv)
j++;
}
- for (i = 0; ; i++) {
+ while (1) {
print_old_objects(map_fd);
sleep(1);
}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5.bpf.c
index 64a1f7550d7e..4d3d6c9b25fa 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5.bpf.c
@@ -4,15 +4,15 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/ptrace.h>
+#include "vmlinux.h"
+#include "syscall_nrs.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/seccomp.h>
#include <uapi/linux/unistd.h>
-#include "syscall_nrs.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#define __stringify(x) #x
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
struct {
@@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index 9d7d79f0d47d..7e2d8397fb98 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -42,7 +42,7 @@ int main(int ac, char **argv)
char filename[256];
FILE *f;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6.bpf.c
index acad5712d8b4..9b23b4737cfb 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6.bpf.c
@@ -1,7 +1,8 @@
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -45,13 +46,24 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/htab_map_lookup_elem")
-int bpf_prog2(struct pt_regs *ctx)
+/*
+ * Since *_map_lookup_elem can't be expected to trigger bpf programs
+ * due to potential deadlocks (bpf_disable_instrumentation), this bpf
+ * program will be attached to bpf_map_copy_value (which is called
+ * from map_lookup_elem) and will only filter the hashtable type.
+ */
+SEC("kprobe/bpf_map_copy_value")
+int BPF_KPROBE(bpf_prog2, struct bpf_map *map)
{
u32 key = bpf_get_smp_processor_id();
struct bpf_perf_event_value *val, buf;
+ enum bpf_map_type type;
int error;
+ type = BPF_CORE_READ(map, map_type);
+ if (type != BPF_MAP_TYPE_HASH)
+ return 0;
+
error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf));
if (error)
return 0;
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index 8e83bf2a84a4..ae811ac83bc2 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -180,7 +180,7 @@ int main(int argc, char **argv)
char filename[256];
int i = 0;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c
deleted file mode 100644
index c5a92df8ac31..000000000000
--- a/samples/bpf/tracex7_kern.c
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <uapi/linux/ptrace.h>
-#include <uapi/linux/bpf.h>
-#include <linux/version.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("kprobe/open_ctree")
-int bpf_prog1(struct pt_regs *ctx)
-{
- unsigned long rc = -12;
-
- bpf_override_return(ctx, rc);
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
deleted file mode 100644
index 8be7ce18d3ba..000000000000
--- a/samples/bpf/tracex7_user.c
+++ /dev/null
@@ -1,56 +0,0 @@
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <unistd.h>
-#include <bpf/libbpf.h>
-
-int main(int argc, char **argv)
-{
- struct bpf_link *link = NULL;
- struct bpf_program *prog;
- struct bpf_object *obj;
- char filename[256];
- char command[256];
- int ret = 0;
- FILE *f;
-
- if (!argv[1]) {
- fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
- return 0;
- }
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj)) {
- fprintf(stderr, "ERROR: opening BPF object file failed\n");
- return 0;
- }
-
- prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
- if (!prog) {
- fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
- goto cleanup;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- fprintf(stderr, "ERROR: loading BPF object file failed\n");
- goto cleanup;
- }
-
- link = bpf_program__attach(prog);
- if (libbpf_get_error(link)) {
- fprintf(stderr, "ERROR: bpf_program__attach failed\n");
- link = NULL;
- goto cleanup;
- }
-
- snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
- f = popen(command, "r");
- ret = pclose(f);
-
-cleanup:
- bpf_link__destroy(link);
- bpf_object__close(obj);
- return ret ? 0 : 1;
-}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
deleted file mode 100644
index 0a5c704badd0..000000000000
--- a/samples/bpf/xdp1_kern.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright (c) 2016 PLUMgrid
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 256);
-} rxcnt SEC(".maps");
-
-static int parse_ipv4(void *data, u64 nh_off, void *data_end)
-{
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- return iph->protocol;
-}
-
-static int parse_ipv6(void *data, u64 nh_off, void *data_end)
-{
- struct ipv6hdr *ip6h = data + nh_off;
-
- if (ip6h + 1 > data_end)
- return 0;
- return ip6h->nexthdr;
-}
-
-#define XDPBUFSIZE 64
-SEC("xdp.frags")
-int xdp_prog1(struct xdp_md *ctx)
-{
- __u8 pkt[XDPBUFSIZE] = {};
- void *data_end = &pkt[XDPBUFSIZE-1];
- void *data = pkt;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- long *value;
- u16 h_proto;
- u64 nh_off;
- u32 ipproto;
-
- if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
- return rc;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- h_proto = eth->h_proto;
-
- /* Handle VLAN tagged packet */
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
- /* Handle double VLAN tagged packet */
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
-
- if (h_proto == htons(ETH_P_IP))
- ipproto = parse_ipv4(data, nh_off, data_end);
- else if (h_proto == htons(ETH_P_IPV6))
- ipproto = parse_ipv6(data, nh_off, data_end);
- else
- ipproto = 0;
-
- value = bpf_map_lookup_elem(&rxcnt, &ipproto);
- if (value)
- *value += 1;
-
- return rc;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
deleted file mode 100644
index 281dc964de8d..000000000000
--- a/samples/bpf/xdp1_user.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2016 PLUMgrid
- */
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <net/if.h>
-
-#include "bpf_util.h"
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-static int ifindex;
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static __u32 prog_id;
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
- printf("bpf_xdp_query_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_xdp_detach(ifindex, xdp_flags, NULL);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on a given interface\n");
- else
- printf("program on interface changed, not removing\n");
- exit(0);
-}
-
-/* simple per-protocol drop counter
- */
-static void poll_stats(int map_fd, int interval)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[UINT8_MAX] = { 0 };
- int i;
-
- while (1) {
- __u32 key = UINT32_MAX;
-
- sleep(interval);
-
- while (bpf_map_get_next_key(map_fd, &key, &key) == 0) {
- __u64 sum = 0;
-
- assert(bpf_map_lookup_elem(map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += values[i];
- if (sum > prev[key])
- printf("proto %u: %10llu pkt/s\n",
- key, (sum - prev[key]) / interval);
- prev[key] = sum;
- }
- }
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] IFACE\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n",
- prog);
-}
-
-int main(int argc, char **argv)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "FSN";
- int prog_fd, map_fd, opt;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_map *map;
- char filename[256];
- int err;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
- break;
- case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- default:
- usage(basename(argv[0]));
- return 1;
- }
- }
-
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind == argc) {
- usage(basename(argv[0]));
- return 1;
- }
-
- ifindex = if_nametoindex(argv[optind]);
- if (!ifindex) {
- perror("if_nametoindex");
- return 1;
- }
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj))
- return 1;
-
- prog = bpf_object__next_program(obj, NULL);
- bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
-
- err = bpf_object__load(obj);
- if (err)
- return 1;
-
- prog_fd = bpf_program__fd(prog);
-
- map = bpf_object__next_map(obj, NULL);
- if (!map) {
- printf("finding a map in obj file failed\n");
- return 1;
- }
- map_fd = bpf_map__fd(map);
-
- if (!prog_fd) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
- printf("link set xdp fd failed\n");
- return 1;
- }
-
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- return err;
- }
- prog_id = info.id;
-
- poll_stats(map_fd, 1);
-
- return 0;
-}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
deleted file mode 100644
index 67804ecf7ce3..000000000000
--- a/samples/bpf/xdp2_kern.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/* Copyright (c) 2016 PLUMgrid
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 256);
-} rxcnt SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-static int parse_ipv4(void *data, u64 nh_off, void *data_end)
-{
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- return iph->protocol;
-}
-
-static int parse_ipv6(void *data, u64 nh_off, void *data_end)
-{
- struct ipv6hdr *ip6h = data + nh_off;
-
- if (ip6h + 1 > data_end)
- return 0;
- return ip6h->nexthdr;
-}
-
-#define XDPBUFSIZE 64
-SEC("xdp.frags")
-int xdp_prog1(struct xdp_md *ctx)
-{
- __u8 pkt[XDPBUFSIZE] = {};
- void *data_end = &pkt[XDPBUFSIZE-1];
- void *data = pkt;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- long *value;
- u16 h_proto;
- u64 nh_off;
- u32 ipproto;
-
- if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
- return rc;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- h_proto = eth->h_proto;
-
- /* Handle VLAN tagged packet */
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
- /* Handle double VLAN tagged packet */
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
-
- if (h_proto == htons(ETH_P_IP))
- ipproto = parse_ipv4(data, nh_off, data_end);
- else if (h_proto == htons(ETH_P_IPV6))
- ipproto = parse_ipv6(data, nh_off, data_end);
- else
- ipproto = 0;
-
- value = bpf_map_lookup_elem(&rxcnt, &ipproto);
- if (value)
- *value += 1;
-
- if (ipproto == IPPROTO_UDP) {
- swap_src_dst_mac(data);
-
- if (bpf_xdp_store_bytes(ctx, 0, pkt, sizeof(pkt)))
- return rc;
-
- rc = XDP_TX;
- }
-
- return rc;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
index d5631014a176..3c36c25d9902 100644
--- a/samples/bpf/xdp2skb_meta_kern.c
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -32,7 +32,7 @@ SEC("xdp_mark")
int _xdp_mark(struct xdp_md *ctx)
{
struct meta_info *meta;
- void *data, *data_end;
+ void *data;
int ret;
/* Reserve space in-front of data pointer for our meta info.
@@ -63,7 +63,6 @@ SEC("tc_mark")
int _tc_mark(struct __sk_buff *ctx)
{
void *data = (void *)(unsigned long)ctx->data;
- void *data_end = (void *)(unsigned long)ctx->data_end;
void *data_meta = (void *)(unsigned long)ctx->data_meta;
struct meta_info *meta = data_meta;
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
index ffdd548627f0..da67bcad1c63 100644
--- a/samples/bpf/xdp_adjust_tail_kern.c
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -57,6 +57,7 @@ static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
static __always_inline __u16 csum_fold_helper(__u32 csum)
{
+ csum = (csum & 0xffff) + (csum >> 16);
return ~((csum & 0xffff) + (csum >> 16));
}
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index 167646077c8f..e9426bd65420 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -184,7 +184,7 @@ int main(int argc, char **argv)
return 1;
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return 1;
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 84f57f1209ce..193b3b79b31f 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -76,9 +76,9 @@ static int do_detach(int ifindex, const char *ifname, const char *app_name)
return prog_fd;
}
- err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
if (err) {
- printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n",
+ printf("ERROR: bpf_prog_get_info_by_fd failed (%s)\n",
strerror(errno));
goto close_out;
}
diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c
deleted file mode 100644
index cfb41e2205f4..000000000000
--- a/samples/bpf/xdp_monitor.bpf.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
- *
- * XDP monitor tool, based on tracepoints
- */
-#include "xdp_sample.bpf.h"
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
deleted file mode 100644
index 58015eb2ffae..000000000000
--- a/samples/bpf/xdp_monitor_user.c
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
-static const char *__doc__=
-"XDP monitor tool, based on tracepoints\n";
-
-static const char *__doc_err_only__=
-" NOTICE: Only tracking XDP redirect errors\n"
-" Enable redirect success stats via '-s/--stats'\n"
-" (which comes with a per packet processing overhead)\n";
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <string.h>
-#include <ctype.h>
-#include <unistd.h>
-#include <locale.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <time.h>
-#include <signal.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-#include "xdp_sample_user.h"
-#include "xdp_monitor.skel.h"
-
-static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT |
- SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT |
- SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-
-DEFINE_SAMPLE_INIT(xdp_monitor);
-
-static const struct option long_options[] = {
- { "help", no_argument, NULL, 'h' },
- { "stats", no_argument, NULL, 's' },
- { "interval", required_argument, NULL, 'i' },
- { "verbose", no_argument, NULL, 'v' },
- {}
-};
-
-int main(int argc, char **argv)
-{
- unsigned long interval = 2;
- int ret = EXIT_FAIL_OPTION;
- struct xdp_monitor *skel;
- bool errors_only = true;
- int longindex = 0, opt;
- bool error = true;
-
- /* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "si:vh",
- long_options, &longindex)) != -1) {
- switch (opt) {
- case 's':
- errors_only = false;
- mask |= SAMPLE_REDIRECT_CNT;
- break;
- case 'i':
- interval = strtoul(optarg, NULL, 0);
- break;
- case 'v':
- sample_switch_mode();
- break;
- case 'h':
- error = false;
- default:
- sample_usage(argv, long_options, __doc__, mask, error);
- return ret;
- }
- }
-
- skel = xdp_monitor__open();
- if (!skel) {
- fprintf(stderr, "Failed to xdp_monitor__open: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end;
- }
-
- ret = sample_init_pre_load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- ret = xdp_monitor__load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- ret = sample_init(skel, mask);
- if (ret < 0) {
- fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- if (errors_only)
- printf("%s", __doc_err_only__);
-
- ret = sample_run(interval, NULL, NULL);
- if (ret < 0) {
- fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
- ret = EXIT_OK;
-end_destroy:
- xdp_monitor__destroy(skel);
-end:
- sample_exit(ret);
-}
diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c
deleted file mode 100644
index 7c02bacfe96b..000000000000
--- a/samples/bpf/xdp_redirect.bpf.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include "vmlinux.h"
-#include "xdp_sample.bpf.h"
-#include "xdp_sample_shared.h"
-
-const volatile int ifindex_out;
-
-SEC("xdp")
-int xdp_redirect_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct ethhdr *eth = data;
- struct datarec *rec;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return XDP_DROP;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- swap_src_dst_mac(data);
- return bpf_redirect(ifindex_out, 0);
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c
deleted file mode 100644
index 87c54bfdbb70..000000000000
--- a/samples/bpf/xdp_redirect_cpu.bpf.c
+++ /dev/null
@@ -1,539 +0,0 @@
-/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
- *
- * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
-#include "vmlinux.h"
-#include "xdp_sample.bpf.h"
-#include "xdp_sample_shared.h"
-#include "hash_func01.h"
-
-/* Special map type that can XDP_REDIRECT frames to another CPU */
-struct {
- __uint(type, BPF_MAP_TYPE_CPUMAP);
- __uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(struct bpf_cpumap_val));
-} cpu_map SEC(".maps");
-
-/* Set of maps controlling available CPU, and for iterating through
- * selectable redirect CPUs.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, u32);
- __type(value, u32);
-} cpus_available SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, u32);
- __type(value, u32);
- __uint(max_entries, 1);
-} cpus_count SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u32);
- __uint(max_entries, 1);
-} cpus_iterator SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(struct bpf_devmap_val));
- __uint(max_entries, 1);
-} tx_port SEC(".maps");
-
-char tx_mac_addr[ETH_ALEN];
-
-/* Helper parse functions */
-
-static __always_inline
-bool parse_eth(struct ethhdr *eth, void *data_end,
- u16 *eth_proto, u64 *l3_offset)
-{
- u16 eth_type;
- u64 offset;
-
- offset = sizeof(*eth);
- if ((void *)eth + offset > data_end)
- return false;
-
- eth_type = eth->h_proto;
-
- /* Skip non 802.3 Ethertypes */
- if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
- return false;
-
- /* Handle VLAN tagged packet */
- if (eth_type == bpf_htons(ETH_P_8021Q) ||
- eth_type == bpf_htons(ETH_P_8021AD)) {
- struct vlan_hdr *vlan_hdr;
-
- vlan_hdr = (void *)eth + offset;
- offset += sizeof(*vlan_hdr);
- if ((void *)eth + offset > data_end)
- return false;
- eth_type = vlan_hdr->h_vlan_encapsulated_proto;
- }
- /* Handle double VLAN tagged packet */
- if (eth_type == bpf_htons(ETH_P_8021Q) ||
- eth_type == bpf_htons(ETH_P_8021AD)) {
- struct vlan_hdr *vlan_hdr;
-
- vlan_hdr = (void *)eth + offset;
- offset += sizeof(*vlan_hdr);
- if ((void *)eth + offset > data_end)
- return false;
- eth_type = vlan_hdr->h_vlan_encapsulated_proto;
- }
-
- *eth_proto = bpf_ntohs(eth_type);
- *l3_offset = offset;
- return true;
-}
-
-static __always_inline
-u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct iphdr *iph = data + nh_off;
- struct udphdr *udph;
-
- if (iph + 1 > data_end)
- return 0;
- if (!(iph->protocol == IPPROTO_UDP))
- return 0;
-
- udph = (void *)(iph + 1);
- if (udph + 1 > data_end)
- return 0;
-
- return bpf_ntohs(udph->dest);
-}
-
-static __always_inline
-int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- return iph->protocol;
-}
-
-static __always_inline
-int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ipv6hdr *ip6h = data + nh_off;
-
- if (ip6h + 1 > data_end)
- return 0;
- return ip6h->nexthdr;
-}
-
-SEC("xdp")
-int xdp_prognum0_no_touch(struct xdp_md *ctx)
-{
- u32 key = bpf_get_smp_processor_id();
- struct datarec *rec;
- u32 *cpu_selected;
- u32 cpu_dest = 0;
- u32 key0 = 0;
-
- /* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
- if (!cpu_selected)
- return XDP_ABORTED;
- cpu_dest = *cpu_selected;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- if (cpu_dest >= nr_cpus) {
- NO_TEAR_INC(rec->issue);
- return XDP_ABORTED;
- }
- return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp")
-int xdp_prognum1_touch_data(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct ethhdr *eth = data;
- struct datarec *rec;
- u32 *cpu_selected;
- u32 cpu_dest = 0;
- u32 key0 = 0;
- u16 eth_type;
-
- /* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
- if (!cpu_selected)
- return XDP_ABORTED;
- cpu_dest = *cpu_selected;
-
- /* Validate packet length is minimum Eth header size */
- if (eth + 1 > data_end)
- return XDP_ABORTED;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- /* Read packet data, and use it (drop non 802.3 Ethertypes) */
- eth_type = eth->h_proto;
- if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
- NO_TEAR_INC(rec->dropped);
- return XDP_DROP;
- }
-
- if (cpu_dest >= nr_cpus) {
- NO_TEAR_INC(rec->issue);
- return XDP_ABORTED;
- }
- return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp")
-int xdp_prognum2_round_robin(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct datarec *rec;
- u32 cpu_dest = 0;
- u32 key0 = 0;
-
- u32 *cpu_selected;
- u32 *cpu_iterator;
- u32 *cpu_max;
- u32 cpu_idx;
-
- cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
- if (!cpu_max)
- return XDP_ABORTED;
-
- cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
- if (!cpu_iterator)
- return XDP_ABORTED;
- cpu_idx = *cpu_iterator;
-
- *cpu_iterator += 1;
- if (*cpu_iterator == *cpu_max)
- *cpu_iterator = 0;
-
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
- if (!cpu_selected)
- return XDP_ABORTED;
- cpu_dest = *cpu_selected;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- if (cpu_dest >= nr_cpus) {
- NO_TEAR_INC(rec->issue);
- return XDP_ABORTED;
- }
- return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp")
-int xdp_prognum3_proto_separate(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct ethhdr *eth = data;
- u8 ip_proto = IPPROTO_UDP;
- struct datarec *rec;
- u16 eth_proto = 0;
- u64 l3_offset = 0;
- u32 cpu_dest = 0;
- u32 *cpu_lookup;
- u32 cpu_idx = 0;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
- return XDP_PASS; /* Just skip */
-
- /* Extract L4 protocol */
- switch (eth_proto) {
- case ETH_P_IP:
- ip_proto = get_proto_ipv4(ctx, l3_offset);
- break;
- case ETH_P_IPV6:
- ip_proto = get_proto_ipv6(ctx, l3_offset);
- break;
- case ETH_P_ARP:
- cpu_idx = 0; /* ARP packet handled on separate CPU */
- break;
- default:
- cpu_idx = 0;
- }
-
- /* Choose CPU based on L4 protocol */
- switch (ip_proto) {
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- cpu_idx = 2;
- break;
- case IPPROTO_TCP:
- cpu_idx = 0;
- break;
- case IPPROTO_UDP:
- cpu_idx = 1;
- break;
- default:
- cpu_idx = 0;
- }
-
- cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
- if (!cpu_lookup)
- return XDP_ABORTED;
- cpu_dest = *cpu_lookup;
-
- if (cpu_dest >= nr_cpus) {
- NO_TEAR_INC(rec->issue);
- return XDP_ABORTED;
- }
- return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp")
-int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct ethhdr *eth = data;
- u8 ip_proto = IPPROTO_UDP;
- struct datarec *rec;
- u16 eth_proto = 0;
- u64 l3_offset = 0;
- u32 cpu_dest = 0;
- u32 *cpu_lookup;
- u32 cpu_idx = 0;
- u16 dest_port;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
- return XDP_PASS; /* Just skip */
-
- /* Extract L4 protocol */
- switch (eth_proto) {
- case ETH_P_IP:
- ip_proto = get_proto_ipv4(ctx, l3_offset);
- break;
- case ETH_P_IPV6:
- ip_proto = get_proto_ipv6(ctx, l3_offset);
- break;
- case ETH_P_ARP:
- cpu_idx = 0; /* ARP packet handled on separate CPU */
- break;
- default:
- cpu_idx = 0;
- }
-
- /* Choose CPU based on L4 protocol */
- switch (ip_proto) {
- case IPPROTO_ICMP:
- case IPPROTO_ICMPV6:
- cpu_idx = 2;
- break;
- case IPPROTO_TCP:
- cpu_idx = 0;
- break;
- case IPPROTO_UDP:
- cpu_idx = 1;
- /* DDoS filter UDP port 9 (pktgen) */
- dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
- if (dest_port == 9) {
- NO_TEAR_INC(rec->dropped);
- return XDP_DROP;
- }
- break;
- default:
- cpu_idx = 0;
- }
-
- cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
- if (!cpu_lookup)
- return XDP_ABORTED;
- cpu_dest = *cpu_lookup;
-
- if (cpu_dest >= nr_cpus) {
- NO_TEAR_INC(rec->issue);
- return XDP_ABORTED;
- }
- return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-/* Hashing initval */
-#define INITVAL 15485863
-
-static __always_inline
-u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct iphdr *iph = data + nh_off;
- u32 cpu_hash;
-
- if (iph + 1 > data_end)
- return 0;
-
- cpu_hash = iph->saddr + iph->daddr;
- cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
-
- return cpu_hash;
-}
-
-static __always_inline
-u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ipv6hdr *ip6h = data + nh_off;
- u32 cpu_hash;
-
- if (ip6h + 1 > data_end)
- return 0;
-
- cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
- cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
- cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
- cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
- cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
-
- return cpu_hash;
-}
-
-/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
- * hashing scheme is symmetric, meaning swapping IP src/dest still hit
- * same CPU.
- */
-SEC("xdp")
-int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct ethhdr *eth = data;
- struct datarec *rec;
- u16 eth_proto = 0;
- u64 l3_offset = 0;
- u32 cpu_dest = 0;
- u32 cpu_idx = 0;
- u32 *cpu_lookup;
- u32 key0 = 0;
- u32 *cpu_max;
- u32 cpu_hash;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
- if (!cpu_max)
- return XDP_ABORTED;
-
- if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
- return XDP_PASS; /* Just skip */
-
- /* Hash for IPv4 and IPv6 */
- switch (eth_proto) {
- case ETH_P_IP:
- cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
- break;
- case ETH_P_IPV6:
- cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
- break;
- case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
- default:
- cpu_hash = 0;
- }
-
- /* Choose CPU based on hash */
- cpu_idx = cpu_hash % *cpu_max;
-
- cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
- if (!cpu_lookup)
- return XDP_ABORTED;
- cpu_dest = *cpu_lookup;
-
- if (cpu_dest >= nr_cpus) {
- NO_TEAR_INC(rec->issue);
- return XDP_ABORTED;
- }
- return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp/cpumap")
-int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return XDP_DROP;
-
- swap_src_dst_mac(data);
- return bpf_redirect_map(&tx_port, 0, 0);
-}
-
-SEC("xdp/cpumap")
-int xdp_redirect_cpu_pass(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-SEC("xdp/cpumap")
-int xdp_redirect_cpu_drop(struct xdp_md *ctx)
-{
- return XDP_DROP;
-}
-
-SEC("xdp/devmap")
-int xdp_redirect_egress_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return XDP_DROP;
-
- __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
-
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
deleted file mode 100644
index a12381c37d2b..000000000000
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ /dev/null
@@ -1,559 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
-static const char *__doc__ =
-"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
-"Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
-"Valid specification for CPUMAP BPF program:\n"
-" --mprog-name/-e pass (use built-in XDP_PASS program)\n"
-" --mprog-name/-e drop (use built-in XDP_DROP program)\n"
-" --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
-" Custom CPUMAP BPF program:\n"
-" --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
-" Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
-" to configure DEVMAP in BPF object <filename>\n";
-
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-#include <locale.h>
-#include <sys/sysinfo.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <time.h>
-#include <linux/limits.h>
-#include <arpa/inet.h>
-#include <linux/if_link.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-#include "xdp_sample_user.h"
-#include "xdp_redirect_cpu.skel.h"
-
-static int map_fd;
-static int avail_fd;
-static int count_fd;
-
-static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
- SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
- SAMPLE_EXCEPTION_CNT;
-
-DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
-
-static const struct option long_options[] = {
- { "help", no_argument, NULL, 'h' },
- { "dev", required_argument, NULL, 'd' },
- { "skb-mode", no_argument, NULL, 'S' },
- { "progname", required_argument, NULL, 'p' },
- { "qsize", required_argument, NULL, 'q' },
- { "cpu", required_argument, NULL, 'c' },
- { "stress-mode", no_argument, NULL, 'x' },
- { "force", no_argument, NULL, 'F' },
- { "interval", required_argument, NULL, 'i' },
- { "verbose", no_argument, NULL, 'v' },
- { "stats", no_argument, NULL, 's' },
- { "mprog-name", required_argument, NULL, 'e' },
- { "mprog-filename", required_argument, NULL, 'f' },
- { "redirect-device", required_argument, NULL, 'r' },
- { "redirect-map", required_argument, NULL, 'm' },
- {}
-};
-
-static void print_avail_progs(struct bpf_object *obj)
-{
- struct bpf_program *pos;
-
- printf(" Programs to be used for -p/--progname:\n");
- bpf_object__for_each_program(pos, obj) {
- if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) {
- if (!strncmp(bpf_program__name(pos), "xdp_prognum",
- sizeof("xdp_prognum") - 1))
- printf(" %s\n", bpf_program__name(pos));
- }
- }
-}
-
-static void usage(char *argv[], const struct option *long_options,
- const char *doc, int mask, bool error, struct bpf_object *obj)
-{
- sample_usage(argv, long_options, doc, mask, error);
- print_avail_progs(obj);
-}
-
-static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
- __u32 avail_idx, bool new)
-{
- __u32 curr_cpus_count = 0;
- __u32 key = 0;
- int ret;
-
- /* Add a CPU entry to cpumap, as this allocate a cpu entry in
- * the kernel for the cpu.
- */
- ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
- if (ret < 0) {
- fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
- return ret;
- }
-
- /* Inform bpf_prog's that a new CPU is available to select
- * from via some control maps.
- */
- ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
- if (ret < 0) {
- fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
- return ret;
- }
-
- /* When not replacing/updating existing entry, bump the count */
- ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
- if (ret < 0) {
- fprintf(stderr, "Failed reading curr cpus_count: %s\n",
- strerror(errno));
- return ret;
- }
- if (new) {
- curr_cpus_count++;
- ret = bpf_map_update_elem(count_fd, &key,
- &curr_cpus_count, 0);
- if (ret < 0) {
- fprintf(stderr, "Failed write curr cpus_count: %s\n",
- strerror(errno));
- return ret;
- }
- }
-
- printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
- new ? "Add new" : "Replace", cpu, avail_idx,
- value->qsize, value->bpf_prog.fd, curr_cpus_count);
-
- return 0;
-}
-
-/* CPUs are zero-indexed. Thus, add a special sentinel default value
- * in map cpus_available to mark CPU index'es not configured
- */
-static int mark_cpus_unavailable(void)
-{
- int ret, i, n_cpus = libbpf_num_possible_cpus();
- __u32 invalid_cpu = n_cpus;
-
- for (i = 0; i < n_cpus; i++) {
- ret = bpf_map_update_elem(avail_fd, &i,
- &invalid_cpu, 0);
- if (ret < 0) {
- fprintf(stderr, "Failed marking CPU unavailable: %s\n",
- strerror(errno));
- return ret;
- }
- }
-
- return 0;
-}
-
-/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(void *ctx)
-{
- struct bpf_cpumap_val *value = ctx;
-
- /* Changing qsize will cause kernel to free and alloc a new
- * bpf_cpu_map_entry, with an associated/complicated tear-down
- * procedure.
- */
- value->qsize = 1024;
- create_cpu_entry(1, value, 0, false);
- value->qsize = 8;
- create_cpu_entry(1, value, 0, false);
- value->qsize = 16000;
- create_cpu_entry(1, value, 0, false);
-}
-
-static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
- const char *redir_interface, const char *redir_map,
- const char *mprog_filename, const char *mprog_name)
-{
- if (mprog_filename) {
- struct bpf_program *prog;
- struct bpf_object *obj;
- int ret;
-
- if (!mprog_name) {
- fprintf(stderr, "BPF program not specified for file %s\n",
- mprog_filename);
- goto end;
- }
- if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
- fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
- redir_interface ? "device" : "map", redir_interface ? "map" : "device");
- goto end;
- }
-
- /* Custom BPF program */
- obj = bpf_object__open_file(mprog_filename, NULL);
- if (!obj) {
- ret = -errno;
- fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
- strerror(errno));
- return ret;
- }
-
- ret = bpf_object__load(obj);
- if (ret < 0) {
- ret = -errno;
- fprintf(stderr, "Failed to bpf_object__load: %s\n",
- strerror(errno));
- return ret;
- }
-
- if (redir_map) {
- int err, redir_map_fd, ifindex_out, key = 0;
-
- redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
- if (redir_map_fd < 0) {
- fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
- strerror(errno));
- return redir_map_fd;
- }
-
- ifindex_out = if_nametoindex(redir_interface);
- if (!ifindex_out)
- ifindex_out = strtoul(redir_interface, NULL, 0);
- if (!ifindex_out) {
- fprintf(stderr, "Bad interface name or index\n");
- return -EINVAL;
- }
-
- err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
- if (err < 0)
- return err;
- }
-
- prog = bpf_object__find_program_by_name(obj, mprog_name);
- if (!prog) {
- ret = -errno;
- fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
- strerror(errno));
- return ret;
- }
-
- return bpf_program__fd(prog);
- } else {
- if (mprog_name) {
- if (redir_interface || redir_map) {
- fprintf(stderr, "Need to specify --mprog-filename/-f\n");
- goto end;
- }
- if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
- /* Use built-in pass/drop programs */
- return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
- : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
- } else {
- fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
- mprog_name);
- goto end;
- }
- } else {
- if (redir_map) {
- fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
- " --redirect-device with --redirect-map\n");
- goto end;
- }
- if (redir_interface) {
- /* Use built-in devmap redirect */
- struct bpf_devmap_val val = {};
- int ifindex_out, err;
- __u32 key = 0;
-
- if (!redir_interface)
- return 0;
-
- ifindex_out = if_nametoindex(redir_interface);
- if (!ifindex_out)
- ifindex_out = strtoul(redir_interface, NULL, 0);
- if (!ifindex_out) {
- fprintf(stderr, "Bad interface name or index\n");
- return -EINVAL;
- }
-
- if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
- printf("Get interface %d mac failed\n", ifindex_out);
- return -EINVAL;
- }
-
- val.ifindex = ifindex_out;
- val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
- err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
- if (err < 0)
- return -errno;
-
- return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
- }
- }
- }
-
- /* Disabled */
- return 0;
-end:
- fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
- return -EINVAL;
-}
-
-int main(int argc, char **argv)
-{
- const char *redir_interface = NULL, *redir_map = NULL;
- const char *mprog_filename = NULL, *mprog_name = NULL;
- struct xdp_redirect_cpu *skel;
- struct bpf_map_info info = {};
- struct bpf_cpumap_val value;
- __u32 infosz = sizeof(info);
- int ret = EXIT_FAIL_OPTION;
- unsigned long interval = 2;
- bool stress_mode = false;
- struct bpf_program *prog;
- const char *prog_name;
- bool generic = false;
- bool force = false;
- int added_cpus = 0;
- bool error = true;
- int longindex = 0;
- int add_cpu = -1;
- int ifindex = -1;
- int *cpu, i, opt;
- __u32 qsize;
- int n_cpus;
-
- n_cpus = libbpf_num_possible_cpus();
-
- /* Notice: Choosing the queue size is very important when CPU is
- * configured with power-saving states.
- *
- * If deepest state take 133 usec to wakeup from (133/10^6). When link
- * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
- * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
- * 166250 bytes. With MTU size packets this is 110 packets, and with
- * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
- *
- * Setting default cpumap queue to 2048 as worst-case (small packet)
- * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
- * worst-case is 2043 packets.
- *
- * Sysadm can configured system to avoid deep-sleep via:
- * tuned-adm profile network-latency
- */
- qsize = 2048;
-
- skel = xdp_redirect_cpu__open();
- if (!skel) {
- fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end;
- }
-
- ret = sample_init_pre_load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
- fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
- fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- cpu = calloc(n_cpus, sizeof(int));
- if (!cpu) {
- fprintf(stderr, "Failed to allocate cpu array\n");
- goto end_destroy;
- }
-
- prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
- while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
- long_options, &longindex)) != -1) {
- switch (opt) {
- case 'd':
- if (strlen(optarg) >= IF_NAMESIZE) {
- fprintf(stderr, "-d/--dev name too long\n");
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- goto end_cpu;
- }
- ifindex = if_nametoindex(optarg);
- if (!ifindex)
- ifindex = strtoul(optarg, NULL, 0);
- if (!ifindex) {
- fprintf(stderr, "Bad interface index or name (%d): %s\n",
- errno, strerror(errno));
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- goto end_cpu;
- }
- break;
- case 's':
- mask |= SAMPLE_REDIRECT_MAP_CNT;
- break;
- case 'i':
- interval = strtoul(optarg, NULL, 0);
- break;
- case 'S':
- generic = true;
- break;
- case 'x':
- stress_mode = true;
- break;
- case 'p':
- /* Selecting eBPF prog to load */
- prog_name = optarg;
- prog = bpf_object__find_program_by_name(skel->obj,
- prog_name);
- if (!prog) {
- fprintf(stderr,
- "Failed to find program %s specified by"
- " option -p/--progname\n",
- prog_name);
- print_avail_progs(skel->obj);
- goto end_cpu;
- }
- break;
- case 'f':
- mprog_filename = optarg;
- break;
- case 'e':
- mprog_name = optarg;
- break;
- case 'r':
- redir_interface = optarg;
- mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
- break;
- case 'm':
- redir_map = optarg;
- break;
- case 'c':
- /* Add multiple CPUs */
- add_cpu = strtoul(optarg, NULL, 0);
- if (add_cpu >= n_cpus) {
- fprintf(stderr,
- "--cpu nr too large for cpumap err (%d):%s\n",
- errno, strerror(errno));
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- goto end_cpu;
- }
- cpu[added_cpus++] = add_cpu;
- break;
- case 'q':
- qsize = strtoul(optarg, NULL, 0);
- break;
- case 'F':
- force = true;
- break;
- case 'v':
- sample_switch_mode();
- break;
- case 'h':
- error = false;
- default:
- usage(argv, long_options, __doc__, mask, error, skel->obj);
- goto end_cpu;
- }
- }
-
- ret = EXIT_FAIL_OPTION;
- if (ifindex == -1) {
- fprintf(stderr, "Required option --dev missing\n");
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- goto end_cpu;
- }
-
- if (add_cpu == -1) {
- fprintf(stderr, "Required option --cpu missing\n"
- "Specify multiple --cpu option to add more\n");
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- goto end_cpu;
- }
-
- skel->rodata->from_match[0] = ifindex;
- if (redir_interface)
- skel->rodata->to_match[0] = if_nametoindex(redir_interface);
-
- ret = xdp_redirect_cpu__load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
- strerror(errno));
- goto end_cpu;
- }
-
- ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
- if (ret < 0) {
- fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
- strerror(errno));
- goto end_cpu;
- }
-
- skel->bss->cpumap_map_id = info.id;
-
- map_fd = bpf_map__fd(skel->maps.cpu_map);
- avail_fd = bpf_map__fd(skel->maps.cpus_available);
- count_fd = bpf_map__fd(skel->maps.cpus_count);
-
- ret = mark_cpus_unavailable();
- if (ret < 0) {
- fprintf(stderr, "Unable to mark CPUs as unavailable\n");
- goto end_cpu;
- }
-
- ret = sample_init(skel, mask);
- if (ret < 0) {
- fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_cpu;
- }
-
- value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
- mprog_filename, mprog_name);
- if (value.bpf_prog.fd < 0) {
- fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
- strerror(-value.bpf_prog.fd));
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- ret = EXIT_FAIL_BPF;
- goto end_cpu;
- }
- value.qsize = qsize;
-
- for (i = 0; i < added_cpus; i++) {
- if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
- fprintf(stderr, "Cannot proceed, exiting\n");
- usage(argv, long_options, __doc__, mask, true, skel->obj);
- goto end_cpu;
- }
- }
-
- ret = EXIT_FAIL_XDP;
- if (sample_install_xdp(prog, ifindex, generic, force) < 0)
- goto end_cpu;
-
- ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
- if (ret < 0) {
- fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_cpu;
- }
- ret = EXIT_OK;
-end_cpu:
- free(cpu);
-end_destroy:
- xdp_redirect_cpu__destroy(skel);
-end:
- sample_exit(ret);
-}
diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c
deleted file mode 100644
index 8557c278df77..000000000000
--- a/samples/bpf/xdp_redirect_map.bpf.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-
-#include "vmlinux.h"
-#include "xdp_sample.bpf.h"
-#include "xdp_sample_shared.h"
-
-/* The 2nd xdp prog on egress does not support skb mode, so we define two
- * maps, tx_port_general and tx_port_native.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
- __uint(max_entries, 1);
-} tx_port_general SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(struct bpf_devmap_val));
- __uint(max_entries, 1);
-} tx_port_native SEC(".maps");
-
-/* store egress interface mac address */
-const volatile __u8 tx_mac_addr[ETH_ALEN];
-
-static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = bpf_get_smp_processor_id();
- struct ethhdr *eth = data;
- struct datarec *rec;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return XDP_DROP;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
- swap_src_dst_mac(data);
- return bpf_redirect_map(redirect_map, 0, 0);
-}
-
-SEC("xdp")
-int xdp_redirect_map_general(struct xdp_md *ctx)
-{
- return xdp_redirect_map(ctx, &tx_port_general);
-}
-
-SEC("xdp")
-int xdp_redirect_map_native(struct xdp_md *ctx)
-{
- return xdp_redirect_map(ctx, &tx_port_native);
-}
-
-SEC("xdp/devmap")
-int xdp_redirect_map_egress(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u8 *mac_addr = (u8 *) tx_mac_addr;
- struct ethhdr *eth = data;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return XDP_DROP;
-
- barrier_var(mac_addr); /* prevent optimizing out memcpy */
- __builtin_memcpy(eth->h_source, mac_addr, ETH_ALEN);
-
- return XDP_PASS;
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
deleted file mode 100644
index 8b2fd4ec2c76..000000000000
--- a/samples/bpf/xdp_redirect_map_multi.bpf.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define KBUILD_MODNAME "foo"
-
-#include "vmlinux.h"
-#include "xdp_sample.bpf.h"
-#include "xdp_sample_shared.h"
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
- __uint(max_entries, 32);
-} forward_map_general SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(struct bpf_devmap_val));
- __uint(max_entries, 32);
-} forward_map_native SEC(".maps");
-
-/* map to store egress interfaces mac addresses */
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, u32);
- __type(value, __be64);
- __uint(max_entries, 32);
-} mac_map SEC(".maps");
-
-static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
-{
- u32 key = bpf_get_smp_processor_id();
- struct datarec *rec;
-
- rec = bpf_map_lookup_elem(&rx_cnt, &key);
- if (!rec)
- return XDP_PASS;
- NO_TEAR_INC(rec->processed);
-
- return bpf_redirect_map(forward_map, 0,
- BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
-}
-
-SEC("xdp")
-int xdp_redirect_map_general(struct xdp_md *ctx)
-{
- return xdp_redirect_map(ctx, &forward_map_general);
-}
-
-SEC("xdp")
-int xdp_redirect_map_native(struct xdp_md *ctx)
-{
- return xdp_redirect_map(ctx, &forward_map_native);
-}
-
-SEC("xdp/devmap")
-int xdp_devmap_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- u32 key = ctx->egress_ifindex;
- struct ethhdr *eth = data;
- __be64 *mac;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return XDP_DROP;
-
- mac = bpf_map_lookup_elem(&mac_map, &key);
- if (mac)
- __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
-
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c
deleted file mode 100644
index 9e24f2705b67..000000000000
--- a/samples/bpf/xdp_redirect_map_multi_user.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-static const char *__doc__ =
-"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n"
-"Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n";
-
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <getopt.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <linux/if_ether.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-#include "xdp_sample_user.h"
-#include "xdp_redirect_map_multi.skel.h"
-
-#define MAX_IFACE_NUM 32
-static int ifaces[MAX_IFACE_NUM] = {};
-
-static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
- SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT |
- SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING;
-
-DEFINE_SAMPLE_INIT(xdp_redirect_map_multi);
-
-static const struct option long_options[] = {
- { "help", no_argument, NULL, 'h' },
- { "skb-mode", no_argument, NULL, 'S' },
- { "force", no_argument, NULL, 'F' },
- { "load-egress", no_argument, NULL, 'X' },
- { "stats", no_argument, NULL, 's' },
- { "interval", required_argument, NULL, 'i' },
- { "verbose", no_argument, NULL, 'v' },
- {}
-};
-
-static int update_mac_map(struct bpf_map *map)
-{
- int mac_map_fd = bpf_map__fd(map);
- unsigned char mac_addr[6];
- unsigned int ifindex;
- int i, ret = -1;
-
- for (i = 0; ifaces[i] > 0; i++) {
- ifindex = ifaces[i];
-
- ret = get_mac_addr(ifindex, mac_addr);
- if (ret < 0) {
- fprintf(stderr, "get interface %d mac failed\n",
- ifindex);
- return ret;
- }
-
- ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
- if (ret < 0) {
- fprintf(stderr, "Failed to update mac address for ifindex %d\n",
- ifindex);
- return ret;
- }
- }
-
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- struct bpf_devmap_val devmap_val = {};
- struct xdp_redirect_map_multi *skel;
- struct bpf_program *ingress_prog;
- bool xdp_devmap_attached = false;
- struct bpf_map *forward_map;
- int ret = EXIT_FAIL_OPTION;
- unsigned long interval = 2;
- char ifname[IF_NAMESIZE];
- unsigned int ifindex;
- bool generic = false;
- bool force = false;
- bool tried = false;
- bool error = true;
- int i, opt;
-
- while ((opt = getopt_long(argc, argv, "hSFXi:vs",
- long_options, NULL)) != -1) {
- switch (opt) {
- case 'S':
- generic = true;
- /* devmap_xmit tracepoint not available */
- mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
- SAMPLE_DEVMAP_XMIT_CNT_MULTI);
- break;
- case 'F':
- force = true;
- break;
- case 'X':
- xdp_devmap_attached = true;
- break;
- case 'i':
- interval = strtoul(optarg, NULL, 0);
- break;
- case 'v':
- sample_switch_mode();
- break;
- case 's':
- mask |= SAMPLE_REDIRECT_MAP_CNT;
- break;
- case 'h':
- error = false;
- default:
- sample_usage(argv, long_options, __doc__, mask, error);
- return ret;
- }
- }
-
- if (argc <= optind + 1) {
- sample_usage(argv, long_options, __doc__, mask, error);
- return ret;
- }
-
- skel = xdp_redirect_map_multi__open();
- if (!skel) {
- fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end;
- }
-
- ret = sample_init_pre_load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- ret = EXIT_FAIL_OPTION;
- for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
- ifaces[i] = if_nametoindex(argv[optind + i]);
- if (!ifaces[i])
- ifaces[i] = strtoul(argv[optind + i], NULL, 0);
- if (!if_indextoname(ifaces[i], ifname)) {
- fprintf(stderr, "Bad interface index or name\n");
- sample_usage(argv, long_options, __doc__, mask, true);
- goto end_destroy;
- }
-
- skel->rodata->from_match[i] = ifaces[i];
- skel->rodata->to_match[i] = ifaces[i];
- }
-
- ret = xdp_redirect_map_multi__load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- if (xdp_devmap_attached) {
- /* Update mac_map with all egress interfaces' mac addr */
- if (update_mac_map(skel->maps.mac_map) < 0) {
- fprintf(stderr, "Updating mac address failed\n");
- ret = EXIT_FAIL;
- goto end_destroy;
- }
- }
-
- ret = sample_init(skel, mask);
- if (ret < 0) {
- fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
-
- ingress_prog = skel->progs.xdp_redirect_map_native;
- forward_map = skel->maps.forward_map_native;
-
- for (i = 0; ifaces[i] > 0; i++) {
- ifindex = ifaces[i];
-
- ret = EXIT_FAIL_XDP;
-restart:
- /* bind prog_fd to each interface */
- if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) {
- if (generic && !tried) {
- fprintf(stderr,
- "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
- ingress_prog = skel->progs.xdp_redirect_map_general;
- forward_map = skel->maps.forward_map_general;
- tried = true;
- goto restart;
- }
- goto end_destroy;
- }
-
- /* Add all the interfaces to forward group and attach
- * egress devmap program if exist
- */
- devmap_val.ifindex = ifindex;
- if (xdp_devmap_attached)
- devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog);
- ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
- if (ret < 0) {
- fprintf(stderr, "Failed to update devmap value: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
- }
-
- ret = sample_run(interval, NULL, NULL);
- if (ret < 0) {
- fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
- ret = EXIT_OK;
-end_destroy:
- xdp_redirect_map_multi__destroy(skel);
-end:
- sample_exit(ret);
-}
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
deleted file mode 100644
index c889a1394dc1..000000000000
--- a/samples/bpf/xdp_redirect_map_user.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- */
-static const char *__doc__ =
-"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n"
-"Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
-
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <getopt.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-#include "xdp_sample_user.h"
-#include "xdp_redirect_map.skel.h"
-
-static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
- SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-
-DEFINE_SAMPLE_INIT(xdp_redirect_map);
-
-static const struct option long_options[] = {
- { "help", no_argument, NULL, 'h' },
- { "skb-mode", no_argument, NULL, 'S' },
- { "force", no_argument, NULL, 'F' },
- { "load-egress", no_argument, NULL, 'X' },
- { "stats", no_argument, NULL, 's' },
- { "interval", required_argument, NULL, 'i' },
- { "verbose", no_argument, NULL, 'v' },
- {}
-};
-
-static int verbose = 0;
-
-int main(int argc, char **argv)
-{
- struct bpf_devmap_val devmap_val = {};
- bool xdp_devmap_attached = false;
- struct xdp_redirect_map *skel;
- char str[2 * IF_NAMESIZE + 1];
- char ifname_out[IF_NAMESIZE];
- struct bpf_map *tx_port_map;
- char ifname_in[IF_NAMESIZE];
- int ifindex_in, ifindex_out;
- unsigned long interval = 2;
- int ret = EXIT_FAIL_OPTION;
- struct bpf_program *prog;
- bool generic = false;
- bool force = false;
- bool tried = false;
- bool error = true;
- int opt, key = 0;
-
- while ((opt = getopt_long(argc, argv, "hSFXi:vs",
- long_options, NULL)) != -1) {
- switch (opt) {
- case 'S':
- generic = true;
- /* devmap_xmit tracepoint not available */
- mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
- SAMPLE_DEVMAP_XMIT_CNT_MULTI);
- break;
- case 'F':
- force = true;
- break;
- case 'X':
- xdp_devmap_attached = true;
- break;
- case 'i':
- interval = strtoul(optarg, NULL, 0);
- break;
- case 'v':
- sample_switch_mode();
- verbose = 1;
- break;
- case 's':
- mask |= SAMPLE_REDIRECT_MAP_CNT;
- break;
- case 'h':
- error = false;
- default:
- sample_usage(argv, long_options, __doc__, mask, error);
- return ret;
- }
- }
-
- if (argc <= optind + 1) {
- sample_usage(argv, long_options, __doc__, mask, true);
- goto end;
- }
-
- ifindex_in = if_nametoindex(argv[optind]);
- if (!ifindex_in)
- ifindex_in = strtoul(argv[optind], NULL, 0);
-
- ifindex_out = if_nametoindex(argv[optind + 1]);
- if (!ifindex_out)
- ifindex_out = strtoul(argv[optind + 1], NULL, 0);
-
- if (!ifindex_in || !ifindex_out) {
- fprintf(stderr, "Bad interface index or name\n");
- sample_usage(argv, long_options, __doc__, mask, true);
- goto end;
- }
-
- skel = xdp_redirect_map__open();
- if (!skel) {
- fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end;
- }
-
- ret = sample_init_pre_load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- /* Load 2nd xdp prog on egress. */
- if (xdp_devmap_attached) {
- ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr);
- if (ret < 0) {
- fprintf(stderr, "Failed to get interface %d mac address: %s\n",
- ifindex_out, strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
- if (verbose)
- printf("Egress ifindex:%d using src MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
- ifindex_out,
- skel->rodata->tx_mac_addr[0], skel->rodata->tx_mac_addr[1],
- skel->rodata->tx_mac_addr[2], skel->rodata->tx_mac_addr[3],
- skel->rodata->tx_mac_addr[4], skel->rodata->tx_mac_addr[5]);
- }
-
- skel->rodata->from_match[0] = ifindex_in;
- skel->rodata->to_match[0] = ifindex_out;
-
- ret = xdp_redirect_map__load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- ret = sample_init(skel, mask);
- if (ret < 0) {
- fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
-
- prog = skel->progs.xdp_redirect_map_native;
- tx_port_map = skel->maps.tx_port_native;
-restart:
- if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) {
- /* First try with struct bpf_devmap_val as value for generic
- * mode, then fallback to sizeof(int) for older kernels.
- */
- fprintf(stderr,
- "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
- if (generic && !tried) {
- prog = skel->progs.xdp_redirect_map_general;
- tx_port_map = skel->maps.tx_port_general;
- tried = true;
- goto restart;
- }
- ret = EXIT_FAIL_XDP;
- goto end_destroy;
- }
-
- /* Loading dummy XDP prog on out-device */
- sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force);
-
- devmap_val.ifindex = ifindex_out;
- if (xdp_devmap_attached)
- devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress);
- ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0);
- if (ret < 0) {
- fprintf(stderr, "Failed to update devmap value: %s\n",
- strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- ret = EXIT_FAIL;
- if (!if_indextoname(ifindex_in, ifname_in)) {
- fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
- strerror(errno));
- goto end_destroy;
- }
-
- if (!if_indextoname(ifindex_out, ifname_out)) {
- fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
- strerror(errno));
- goto end_destroy;
- }
-
- safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
- printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
- ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
- snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
-
- ret = sample_run(interval, NULL, NULL);
- if (ret < 0) {
- fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
- ret = EXIT_OK;
-end_destroy:
- xdp_redirect_map__destroy(skel);
-end:
- sample_exit(ret);
-}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
deleted file mode 100644
index 8663dd631b6e..000000000000
--- a/samples/bpf/xdp_redirect_user.c
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- */
-static const char *__doc__ =
-"XDP redirect tool, using bpf_redirect helper\n"
-"Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
-
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <getopt.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-#include "xdp_sample_user.h"
-#include "xdp_redirect.skel.h"
-
-static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT |
- SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-
-DEFINE_SAMPLE_INIT(xdp_redirect);
-
-static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"skb-mode", no_argument, NULL, 'S' },
- {"force", no_argument, NULL, 'F' },
- {"stats", no_argument, NULL, 's' },
- {"interval", required_argument, NULL, 'i' },
- {"verbose", no_argument, NULL, 'v' },
- {}
-};
-
-int main(int argc, char **argv)
-{
- int ifindex_in, ifindex_out, opt;
- char str[2 * IF_NAMESIZE + 1];
- char ifname_out[IF_NAMESIZE];
- char ifname_in[IF_NAMESIZE];
- int ret = EXIT_FAIL_OPTION;
- unsigned long interval = 2;
- struct xdp_redirect *skel;
- bool generic = false;
- bool force = false;
- bool error = true;
-
- while ((opt = getopt_long(argc, argv, "hSFi:vs",
- long_options, NULL)) != -1) {
- switch (opt) {
- case 'S':
- generic = true;
- mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
- SAMPLE_DEVMAP_XMIT_CNT_MULTI);
- break;
- case 'F':
- force = true;
- break;
- case 'i':
- interval = strtoul(optarg, NULL, 0);
- break;
- case 'v':
- sample_switch_mode();
- break;
- case 's':
- mask |= SAMPLE_REDIRECT_CNT;
- break;
- case 'h':
- error = false;
- default:
- sample_usage(argv, long_options, __doc__, mask, error);
- return ret;
- }
- }
-
- if (argc <= optind + 1) {
- sample_usage(argv, long_options, __doc__, mask, true);
- return ret;
- }
-
- ifindex_in = if_nametoindex(argv[optind]);
- if (!ifindex_in)
- ifindex_in = strtoul(argv[optind], NULL, 0);
-
- ifindex_out = if_nametoindex(argv[optind + 1]);
- if (!ifindex_out)
- ifindex_out = strtoul(argv[optind + 1], NULL, 0);
-
- if (!ifindex_in || !ifindex_out) {
- fprintf(stderr, "Bad interface index or name\n");
- sample_usage(argv, long_options, __doc__, mask, true);
- goto end;
- }
-
- skel = xdp_redirect__open();
- if (!skel) {
- fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end;
- }
-
- ret = sample_init_pre_load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- skel->rodata->from_match[0] = ifindex_in;
- skel->rodata->to_match[0] = ifindex_out;
- skel->rodata->ifindex_out = ifindex_out;
-
- ret = xdp_redirect__load(skel);
- if (ret < 0) {
- fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno));
- ret = EXIT_FAIL_BPF;
- goto end_destroy;
- }
-
- ret = sample_init(skel, mask);
- if (ret < 0) {
- fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
-
- ret = EXIT_FAIL_XDP;
- if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in,
- generic, force) < 0)
- goto end_destroy;
-
- /* Loading dummy XDP prog on out-device */
- sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out,
- generic, force);
-
- ret = EXIT_FAIL;
- if (!if_indextoname(ifindex_in, ifname_in)) {
- fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
- strerror(errno));
- goto end_destroy;
- }
-
- if (!if_indextoname(ifindex_out, ifname_out)) {
- fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
- strerror(errno));
- goto end_destroy;
- }
-
- safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
- printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
- ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
- snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
-
- ret = sample_run(interval, NULL, NULL);
- if (ret < 0) {
- fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
- ret = EXIT_FAIL;
- goto end_destroy;
- }
- ret = EXIT_OK;
-end_destroy:
- xdp_redirect__destroy(skel);
-end:
- sample_exit(ret);
-}
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 9d41db09c480..266fdd0b025d 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock)
static void read_route(struct nlmsghdr *nh, int nll)
{
char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
- struct bpf_lpm_trie_key *prefix_key;
+ struct bpf_lpm_trie_key_u8 *prefix_key;
struct rtattr *rt_attr;
struct rtmsg *rt_msg;
int rtm_family;
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
deleted file mode 100644
index 5e7459f9bf3e..000000000000
--- a/samples/bpf/xdp_rxq_info_kern.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
- *
- * Example howto extract XDP RX-queue info
- */
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/in.h>
-#include <bpf/bpf_helpers.h>
-
-/* Config setup from with userspace
- *
- * User-side setup ifindex in config_map, to verify that
- * ctx->ingress_ifindex is correct (against configured ifindex)
- */
-struct config {
- __u32 action;
- int ifindex;
- __u32 options;
-};
-enum cfg_options_flags {
- NO_TOUCH = 0x0U,
- READ_MEM = 0x1U,
- SWAP_MAC = 0x2U,
-};
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, int);
- __type(value, struct config);
- __uint(max_entries, 1);
-} config_map SEC(".maps");
-
-/* Common stats data record (shared with userspace) */
-struct datarec {
- __u64 processed;
- __u64 issue;
-};
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} stats_global_map SEC(".maps");
-
-#define MAX_RXQs 64
-
-/* Stats per rx_queue_index (per CPU) */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, MAX_RXQs + 1);
-} rx_queue_index_map SEC(".maps");
-
-static __always_inline
-void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-SEC("xdp_prog0")
-int xdp_prognum0(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct datarec *rec, *rxq_rec;
- int ingress_ifindex;
- struct config *config;
- u32 key = 0;
-
- /* Global stats record */
- rec = bpf_map_lookup_elem(&stats_global_map, &key);
- if (!rec)
- return XDP_ABORTED;
- rec->processed++;
-
- /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF
- * instructions inside kernel to access xdp_rxq->dev->ifindex
- */
- ingress_ifindex = ctx->ingress_ifindex;
-
- config = bpf_map_lookup_elem(&config_map, &key);
- if (!config)
- return XDP_ABORTED;
-
- /* Simple test: check ctx provided ifindex is as expected */
- if (ingress_ifindex != config->ifindex) {
- /* count this error case */
- rec->issue++;
- return XDP_ABORTED;
- }
-
- /* Update stats per rx_queue_index. Handle if rx_queue_index
- * is larger than stats map can contain info for.
- */
- key = ctx->rx_queue_index;
- if (key >= MAX_RXQs)
- key = MAX_RXQs;
- rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key);
- if (!rxq_rec)
- return XDP_ABORTED;
- rxq_rec->processed++;
- if (key == MAX_RXQs)
- rxq_rec->issue++;
-
- /* Default: Don't touch packet data, only count packets */
- if (unlikely(config->options & (READ_MEM|SWAP_MAC))) {
- struct ethhdr *eth = data;
-
- if (eth + 1 > data_end)
- return XDP_ABORTED;
-
- /* Avoid compiler removing this: Drop non 802.3 Ethertypes */
- if (ntohs(eth->h_proto) < ETH_P_802_3_MIN)
- return XDP_ABORTED;
-
- /* XDP_TX requires changing MAC-addrs, else HW may drop.
- * Can also be enabled with --swapmac (for test purposes)
- */
- if (unlikely(config->options & SWAP_MAC))
- swap_src_dst_mac(data);
- }
-
- return config->action;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
deleted file mode 100644
index 08f5331d2b00..000000000000
--- a/samples/bpf/xdp_rxq_info_user.c
+++ /dev/null
@@ -1,614 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
- */
-static const char *__doc__ = " XDP RX-queue info extract example\n\n"
- "Monitor how many packets per sec (pps) are received\n"
- "per NIC RX queue index and which CPU processed the packet\n"
- ;
-
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-#include <locale.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <time.h>
-#include <limits.h>
-#include <arpa/inet.h>
-#include <linux/if_link.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include "bpf_util.h"
-
-static int ifindex = -1;
-static char ifname_buf[IF_NAMESIZE];
-static char *ifname;
-static __u32 prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-
-static struct bpf_map *stats_global_map;
-static struct bpf_map *rx_queue_index_map;
-
-/* Exit return codes */
-#define EXIT_OK 0
-#define EXIT_FAIL 1
-#define EXIT_FAIL_OPTION 2
-#define EXIT_FAIL_XDP 3
-#define EXIT_FAIL_BPF 4
-#define EXIT_FAIL_MEM 5
-
-#define FAIL_MEM_SIG INT_MAX
-#define FAIL_STAT_SIG (INT_MAX - 1)
-
-static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"dev", required_argument, NULL, 'd' },
- {"skb-mode", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {"no-separators", no_argument, NULL, 'z' },
- {"action", required_argument, NULL, 'a' },
- {"readmem", no_argument, NULL, 'r' },
- {"swapmac", no_argument, NULL, 'm' },
- {"force", no_argument, NULL, 'F' },
- {0, 0, NULL, 0 }
-};
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (ifindex > -1) {
- if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
- printf("bpf_xdp_query_id failed\n");
- exit(EXIT_FAIL);
- }
- if (prog_id == curr_prog_id) {
- fprintf(stderr,
- "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
- ifindex, ifname);
- bpf_xdp_detach(ifindex, xdp_flags, NULL);
- } else if (!curr_prog_id) {
- printf("couldn't find a prog id on a given iface\n");
- } else {
- printf("program on interface changed, not removing\n");
- }
- }
-
- if (sig == FAIL_MEM_SIG)
- exit(EXIT_FAIL_MEM);
- else if (sig == FAIL_STAT_SIG)
- exit(EXIT_FAIL);
-
- exit(EXIT_OK);
-}
-
-struct config {
- __u32 action;
- int ifindex;
- __u32 options;
-};
-enum cfg_options_flags {
- NO_TOUCH = 0x0U,
- READ_MEM = 0x1U,
- SWAP_MAC = 0x2U,
-};
-#define XDP_ACTION_MAX (XDP_TX + 1)
-#define XDP_ACTION_MAX_STRLEN 11
-static const char *xdp_action_names[XDP_ACTION_MAX] = {
- [XDP_ABORTED] = "XDP_ABORTED",
- [XDP_DROP] = "XDP_DROP",
- [XDP_PASS] = "XDP_PASS",
- [XDP_TX] = "XDP_TX",
-};
-
-static const char *action2str(int action)
-{
- if (action < XDP_ACTION_MAX)
- return xdp_action_names[action];
- return NULL;
-}
-
-static int parse_xdp_action(char *action_str)
-{
- size_t maxlen;
- __u64 action = -1;
- int i;
-
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- maxlen = XDP_ACTION_MAX_STRLEN;
- if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) {
- action = i;
- break;
- }
- }
- return action;
-}
-
-static void list_xdp_actions(void)
-{
- int i;
-
- printf("Available XDP --action <options>\n");
- for (i = 0; i < XDP_ACTION_MAX; i++)
- printf("\t%s\n", xdp_action_names[i]);
- printf("\n");
-}
-
-static char* options2str(enum cfg_options_flags flag)
-{
- if (flag == NO_TOUCH)
- return "no_touch";
- if (flag & SWAP_MAC)
- return "swapmac";
- if (flag & READ_MEM)
- return "read";
- fprintf(stderr, "ERR: Unknown config option flags");
- int_exit(FAIL_STAT_SIG);
- return "unknown";
-}
-
-static void usage(char *argv[])
-{
- int i;
-
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf(" Usage: %s (options-see-below)\n", argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-12s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf(" short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n");
- list_xdp_actions();
-}
-
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- int_exit(FAIL_STAT_SIG);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 issue;
-};
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
-};
-struct stats_record {
- struct record stats;
- struct record *rxq;
-};
-
-static struct datarec *alloc_record_per_cpu(void)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec *array;
-
- array = calloc(nr_cpus, sizeof(struct datarec));
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- int_exit(FAIL_MEM_SIG);
- }
- return array;
-}
-
-static struct record *alloc_record_per_rxq(void)
-{
- unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
- struct record *array;
-
- array = calloc(nr_rxqs, sizeof(struct record));
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
- int_exit(FAIL_MEM_SIG);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
- struct stats_record *rec;
- int i;
-
- rec = calloc(1, sizeof(struct stats_record));
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- int_exit(FAIL_MEM_SIG);
- }
- rec->rxq = alloc_record_per_rxq();
- for (i = 0; i < nr_rxqs; i++)
- rec->rxq[i].cpu = alloc_record_per_cpu();
-
- rec->stats.cpu = alloc_record_per_cpu();
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
- int i;
-
- for (i = 0; i < nr_rxqs; i++)
- free(r->rxq[i].cpu);
-
- free(r->rxq);
- free(r->stats.cpu);
- free(r);
-}
-
-static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_processed = 0;
- __u64 sum_issue = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].issue = values[i].issue;
- sum_issue += values[i].issue;
- }
- rec->total.processed = sum_processed;
- rec->total.issue = sum_issue;
- return true;
-}
-
-static void stats_collect(struct stats_record *rec)
-{
- int fd, i, max_rxqs;
-
- fd = bpf_map__fd(stats_global_map);
- map_collect_percpu(fd, 0, &rec->stats);
-
- fd = bpf_map__fd(rx_queue_index_map);
- max_rxqs = bpf_map__max_entries(rx_queue_index_map);
- for (i = 0; i < max_rxqs; i++)
- map_collect_percpu(fd, i, &rec->rxq[i]);
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->processed - p->processed;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_errs_pps(struct datarec *r,
- struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->issue - p->issue;
- pps = packets / period_;
- }
- return pps;
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- int action, __u32 cfg_opt)
-{
- unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
- unsigned int nr_cpus = bpf_num_possible_cpus();
- double pps = 0, err = 0;
- struct record *rec, *prev;
- double t;
- int rxq;
- int i;
-
- /* Header */
- printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n",
- ifname, ifindex, action2str(action), options2str(cfg_opt));
-
- /* stats_global_map */
- {
- char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n";
- char *fm2_rx = "%-15s %-7s %'-11.0f\n";
- char *errstr = "";
-
- printf("%-15s %-7s %-11s %-11s\n",
- "XDP stats", "CPU", "pps", "issue-pps");
-
- rec = &stats_rec->stats;
- prev = &stats_prev->stats;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps (r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- errstr = "invalid-ifindex";
- if (pps > 0)
- printf(fmt_rx, "XDP-RX CPU",
- i, pps, err, errstr);
- }
- pps = calc_pps (&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- printf(fm2_rx, "XDP-RX CPU", "total", pps, err);
- }
-
- /* rx_queue_index_map */
- printf("\n%-15s %-7s %-11s %-11s\n",
- "RXQ stats", "RXQ:CPU", "pps", "issue-pps");
-
- for (rxq = 0; rxq < nr_rxqs; rxq++) {
- char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n";
- char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n";
- char *errstr = "";
- int rxq_ = rxq;
-
- /* Last RXQ in map catch overflows */
- if (rxq_ == nr_rxqs - 1)
- rxq_ = -1;
-
- rec = &stats_rec->rxq[rxq];
- prev = &stats_prev->rxq[rxq];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps (r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0) {
- if (rxq_ == -1)
- errstr = "map-overflow-RXQ";
- else
- errstr = "err";
- }
- if (pps > 0)
- printf(fmt_rx, "rx_queue_index",
- rxq_, i, pps, err, errstr);
- }
- pps = calc_pps (&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (pps || err)
- printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err);
- }
-}
-
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-static void stats_poll(int interval, int action, __u32 cfg_opt)
-{
- struct stats_record *record, *prev;
-
- record = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(record);
-
- while (1) {
- swap(&prev, &record);
- stats_collect(record);
- stats_print(record, prev, action, cfg_opt);
- sleep(interval);
- }
-
- free_stats_record(record);
- free_stats_record(prev);
-}
-
-
-int main(int argc, char **argv)
-{
- __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- int prog_fd, map_fd, opt, err;
- bool use_separators = true;
- struct config cfg = { 0 };
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_map *map;
- char filename[256];
- int longindex = 0;
- int interval = 2;
- __u32 key = 0;
-
-
- char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
- int action = XDP_PASS; /* Default action */
- char *action_str = NULL;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj))
- return EXIT_FAIL;
-
- prog = bpf_object__next_program(obj, NULL);
- bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
-
- err = bpf_object__load(obj);
- if (err)
- return EXIT_FAIL;
- prog_fd = bpf_program__fd(prog);
-
- map = bpf_object__find_map_by_name(obj, "config_map");
- stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map");
- rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map");
- if (!map || !stats_global_map || !rx_queue_index_map) {
- printf("finding a map in obj file failed\n");
- return EXIT_FAIL;
- }
- map_fd = bpf_map__fd(map);
-
- if (!prog_fd) {
- fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno));
- return EXIT_FAIL;
- }
-
- /* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:",
- long_options, &longindex)) != -1) {
- switch (opt) {
- case 'd':
- if (strlen(optarg) >= IF_NAMESIZE) {
- fprintf(stderr, "ERR: --dev name too long\n");
- goto error;
- }
- ifname = (char *)&ifname_buf;
- strncpy(ifname, optarg, IF_NAMESIZE);
- ifindex = if_nametoindex(ifname);
- if (ifindex == 0) {
- fprintf(stderr,
- "ERR: --dev name unknown err(%d):%s\n",
- errno, strerror(errno));
- goto error;
- }
- break;
- case 's':
- interval = atoi(optarg);
- break;
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'z':
- use_separators = false;
- break;
- case 'a':
- action_str = (char *)&action_str_buf;
- strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
- break;
- case 'r':
- cfg_options |= READ_MEM;
- break;
- case 'm':
- cfg_options |= SWAP_MAC;
- break;
- case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- case 'h':
- error:
- default:
- usage(argv);
- return EXIT_FAIL_OPTION;
- }
- }
-
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- /* Required option */
- if (ifindex == -1) {
- fprintf(stderr, "ERR: required option --dev missing\n");
- usage(argv);
- return EXIT_FAIL_OPTION;
- }
- cfg.ifindex = ifindex;
-
- /* Parse action string */
- if (action_str) {
- action = parse_xdp_action(action_str);
- if (action < 0) {
- fprintf(stderr, "ERR: Invalid XDP --action: %s\n",
- action_str);
- list_xdp_actions();
- return EXIT_FAIL_OPTION;
- }
- }
- cfg.action = action;
-
- /* XDP_TX requires changing MAC-addrs, else HW may drop */
- if (action == XDP_TX)
- cfg_options |= SWAP_MAC;
- cfg.options = cfg_options;
-
- /* Trick to pretty printf with thousands separators use %' */
- if (use_separators)
- setlocale(LC_NUMERIC, "en_US");
-
- /* User-side setup ifindex in config_map */
- err = bpf_map_update_elem(map_fd, &key, &cfg, 0);
- if (err) {
- fprintf(stderr, "Store config failed (err:%d)\n", err);
- exit(EXIT_FAIL_BPF);
- }
-
- /* Remove XDP program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
- fprintf(stderr, "link set xdp fd failed\n");
- return EXIT_FAIL_XDP;
- }
-
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- return err;
- }
- prog_id = info.id;
-
- stats_poll(interval, action, cfg_options);
- return EXIT_OK;
-}
diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h
index 25b1dbe9b37b..fecc41c5df04 100644
--- a/samples/bpf/xdp_sample.bpf.h
+++ b/samples/bpf/xdp_sample.bpf.h
@@ -7,17 +7,9 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
+#include "net_shared.h"
#include "xdp_sample_shared.h"
-#define ETH_ALEN 6
-#define ETH_P_802_3_MIN 0x0600
-#define ETH_P_8021Q 0x8100
-#define ETH_P_8021AD 0x88A8
-#define ETH_P_IP 0x0800
-#define ETH_P_IPV6 0x86DD
-#define ETH_P_ARP 0x0806
-#define IPPROTO_ICMPV6 58
-
#define EINVAL 22
#define ENETDOWN 100
#define EMSGSIZE 90
@@ -55,18 +47,6 @@ static __always_inline void swap_src_dst_mac(void *data)
p[5] = dst[2];
}
-#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
- __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define bpf_ntohs(x) __builtin_bswap16(x)
-#define bpf_htons(x) __builtin_bswap16(x)
-#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
- __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#define bpf_ntohs(x) (x)
-#define bpf_htons(x) (x)
-#else
-# error "Endianness detection needs to be set up for your compiler?!"
-#endif
-
/*
* Note: including linux/compiler.h or linux/kernel.h for the macros below
* conflicts with vmlinux.h include in BPF files, so we define them here.
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
deleted file mode 100644
index 9cf76b340dd7..000000000000
--- a/samples/bpf/xdp_sample_pkts_kern.c
+++ /dev/null
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ptrace.h>
-#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-#define SAMPLE_SIZE 64ul
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(u32));
-} my_map SEC(".maps");
-
-SEC("xdp_sample")
-int xdp_sample_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
-
- /* Metadata will be in the perf event before the packet data. */
- struct S {
- u16 cookie;
- u16 pkt_len;
- } __packed metadata;
-
- if (data < data_end) {
- /* The XDP perf_event_output handler will use the upper 32 bits
- * of the flags argument as a number of bytes to include of the
- * packet payload in the event data. If the size is too big, the
- * call to bpf_perf_event_output will fail and return -EFAULT.
- *
- * See bpf_xdp_event_output in net/core/filter.c.
- *
- * The BPF_F_CURRENT_CPU flag means that the event output fd
- * will be indexed by the CPU number in the event map.
- */
- u64 flags = BPF_F_CURRENT_CPU;
- u16 sample_size;
- int ret;
-
- metadata.cookie = 0xdead;
- metadata.pkt_len = (u16)(data_end - data);
- sample_size = min(metadata.pkt_len, SAMPLE_SIZE);
- flags |= (u64)sample_size << 32;
-
- ret = bpf_perf_event_output(ctx, &my_map, flags,
- &metadata, sizeof(metadata));
- if (ret)
- bpf_printk("perf_event_output failed: %d\n", ret);
- }
-
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
deleted file mode 100644
index 7df7163239ac..000000000000
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ /dev/null
@@ -1,196 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <net/if.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/ioctl.h>
-#include <signal.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include <libgen.h>
-#include <linux/if_link.h>
-
-#include "perf-sys.h"
-
-static int if_idx;
-static char *if_name;
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static __u32 prog_id;
-static struct perf_buffer *pb = NULL;
-
-static int do_attach(int idx, int fd, const char *name)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- int err;
-
- err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
- if (err < 0) {
- printf("ERROR: failed to attach program to %s\n", name);
- return err;
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- return err;
- }
- prog_id = info.id;
-
- return err;
-}
-
-static int do_detach(int idx, const char *name)
-{
- __u32 curr_prog_id = 0;
- int err = 0;
-
- err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
- if (err) {
- printf("bpf_xdp_query_id failed\n");
- return err;
- }
- if (prog_id == curr_prog_id) {
- err = bpf_xdp_detach(idx, xdp_flags, NULL);
- if (err < 0)
- printf("ERROR: failed to detach prog from %s\n", name);
- } else if (!curr_prog_id) {
- printf("couldn't find a prog id on a %s\n", name);
- } else {
- printf("program on interface changed, not removing\n");
- }
-
- return err;
-}
-
-#define SAMPLE_SIZE 64
-
-static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
-{
- struct {
- __u16 cookie;
- __u16 pkt_len;
- __u8 pkt_data[SAMPLE_SIZE];
- } __packed *e = data;
- int i;
-
- if (e->cookie != 0xdead) {
- printf("BUG cookie %x sized %d\n", e->cookie, size);
- return;
- }
-
- printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
- for (i = 0; i < 14 && i < e->pkt_len; i++)
- printf("%02x ", e->pkt_data[i]);
- printf("\n");
-}
-
-static void sig_handler(int signo)
-{
- do_detach(if_idx, if_name);
- perf_buffer__free(pb);
- exit(0);
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "%s: %s [OPTS] <ifname|ifindex>\n\n"
- "OPTS:\n"
- " -F force loading prog\n"
- " -S use skb-mode\n",
- __func__, prog);
-}
-
-int main(int argc, char **argv)
-{
- const char *optstr = "FS";
- int prog_fd, map_fd, opt;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_map *map;
- char filename[256];
- int ret, err;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- default:
- usage(basename(argv[0]));
- return 1;
- }
- }
-
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind == argc) {
- usage(basename(argv[0]));
- return 1;
- }
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj))
- return 1;
-
- prog = bpf_object__next_program(obj, NULL);
- bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
-
- err = bpf_object__load(obj);
- if (err)
- return 1;
-
- prog_fd = bpf_program__fd(prog);
-
- map = bpf_object__next_map(obj, NULL);
- if (!map) {
- printf("finding a map in obj file failed\n");
- return 1;
- }
- map_fd = bpf_map__fd(map);
-
- if_idx = if_nametoindex(argv[optind]);
- if (!if_idx)
- if_idx = strtoul(argv[optind], NULL, 0);
-
- if (!if_idx) {
- fprintf(stderr, "Invalid ifname\n");
- return 1;
- }
- if_name = argv[optind];
- err = do_attach(if_idx, prog_fd, if_name);
- if (err)
- return err;
-
- if (signal(SIGINT, sig_handler) ||
- signal(SIGHUP, sig_handler) ||
- signal(SIGTERM, sig_handler)) {
- perror("signal");
- return 1;
- }
-
- pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL);
- err = libbpf_get_error(pb);
- if (err) {
- perror("perf_buffer setup failed");
- return 1;
- }
-
- while ((ret = perf_buffer__poll(pb, 1000)) >= 0) {
- }
-
- kill(0, SIGINT);
- return ret;
-}
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 307baef6861a..7e4b2f7108a6 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -295,7 +295,7 @@ int main(int argc, char **argv)
return 1;
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
diff --git a/samples/cgroup/.gitignore b/samples/cgroup/.gitignore
new file mode 100644
index 000000000000..3a0161194cce
--- /dev/null
+++ b/samples/cgroup/.gitignore
@@ -0,0 +1,3 @@
+/cgroup_event_listener
+/memcg_event_listener
+
diff --git a/samples/cgroup/Makefile b/samples/cgroup/Makefile
new file mode 100644
index 000000000000..526c8569707c
--- /dev/null
+++ b/samples/cgroup/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+userprogs-always-y += cgroup_event_listener memcg_event_listener
+
+userccflags += -I usr/include
diff --git a/samples/cgroup/cgroup_event_listener.c b/samples/cgroup/cgroup_event_listener.c
new file mode 100644
index 000000000000..3d70dc831a76
--- /dev/null
+++ b/samples/cgroup/cgroup_event_listener.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cgroup_event_listener.c - Simple listener of cgroup events
+ *
+ * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
+ */
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+
+#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>"
+
+int main(int argc, char **argv)
+{
+ int efd = -1;
+ int cfd = -1;
+ int event_control = -1;
+ char event_control_path[PATH_MAX];
+ char line[LINE_MAX];
+ int ret;
+
+ if (argc != 3)
+ errx(1, "%s", USAGE_STR);
+
+ cfd = open(argv[1], O_RDONLY);
+ if (cfd == -1)
+ err(1, "Cannot open %s", argv[1]);
+
+ ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
+ dirname(argv[1]));
+ if (ret >= PATH_MAX)
+ errx(1, "Path to cgroup.event_control is too long");
+
+ event_control = open(event_control_path, O_WRONLY);
+ if (event_control == -1)
+ err(1, "Cannot open %s", event_control_path);
+
+ efd = eventfd(0, 0);
+ if (efd == -1)
+ err(1, "eventfd() failed");
+
+ ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
+ if (ret >= LINE_MAX)
+ errx(1, "Arguments string is too long");
+
+ ret = write(event_control, line, strlen(line) + 1);
+ if (ret == -1)
+ err(1, "Cannot write to cgroup.event_control");
+
+ while (1) {
+ uint64_t result;
+
+ ret = read(efd, &result, sizeof(result));
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+ err(1, "Cannot read from eventfd");
+ }
+ assert(ret == sizeof(result));
+
+ ret = access(event_control_path, W_OK);
+ if ((ret == -1) && (errno == ENOENT)) {
+ puts("The cgroup seems to have removed.");
+ break;
+ }
+
+ if (ret == -1)
+ err(1, "cgroup.event_control is not accessible any more");
+
+ printf("%s %s: crossed\n", argv[1], argv[2]);
+ }
+
+ return 0;
+}
diff --git a/samples/cgroup/memcg_event_listener.c b/samples/cgroup/memcg_event_listener.c
new file mode 100644
index 000000000000..41425edbd88a
--- /dev/null
+++ b/samples/cgroup/memcg_event_listener.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memcg_event_listener.c - Simple listener of memcg memory.events
+ *
+ * Copyright (c) 2023, SaluteDevices. All Rights Reserved.
+ *
+ * Author: Dmitry Rokosov <ddrokosov@salutedevices.com>
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+/* Size of buffer to use when reading inotify events */
+#define INOTIFY_BUFFER_SIZE 8192
+
+#define INOTIFY_EVENT_NEXT(event, length) ({ \
+ (length) -= sizeof(*(event)) + (event)->len; \
+ (event)++; \
+})
+
+#define INOTIFY_EVENT_OK(event, length) ((length) >= (ssize_t)sizeof(*(event)))
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+struct memcg_counters {
+ long low;
+ long high;
+ long max;
+ long oom;
+ long oom_kill;
+ long oom_group_kill;
+};
+
+struct memcg_events {
+ struct memcg_counters counters;
+ char path[PATH_MAX];
+ int inotify_fd;
+ int inotify_wd;
+};
+
+static void print_memcg_counters(const struct memcg_counters *counters)
+{
+ printf("MEMCG events:\n");
+ printf("\tlow: %ld\n", counters->low);
+ printf("\thigh: %ld\n", counters->high);
+ printf("\tmax: %ld\n", counters->max);
+ printf("\toom: %ld\n", counters->oom);
+ printf("\toom_kill: %ld\n", counters->oom_kill);
+ printf("\toom_group_kill: %ld\n", counters->oom_group_kill);
+}
+
+static int get_memcg_counter(char *line, const char *name, long *counter)
+{
+ size_t len = strlen(name);
+ char *endptr;
+ long tmp;
+
+ if (memcmp(line, name, len)) {
+ warnx("Counter line %s has wrong name, %s is expected",
+ line, name);
+ return -EINVAL;
+ }
+
+ /* skip the whitespace delimiter */
+ len += 1;
+
+ errno = 0;
+ tmp = strtol(&line[len], &endptr, 10);
+ if (((tmp == LONG_MAX || tmp == LONG_MIN) && errno == ERANGE) ||
+ (errno && !tmp)) {
+ warnx("Failed to parse: %s", &line[len]);
+ return -ERANGE;
+ }
+
+ if (endptr == &line[len]) {
+ warnx("Not digits were found in line %s", &line[len]);
+ return -EINVAL;
+ }
+
+ if (!(*endptr == '\0' || (*endptr == '\n' && *++endptr == '\0'))) {
+ warnx("Further characters after number: %s", endptr);
+ return -EINVAL;
+ }
+
+ *counter = tmp;
+
+ return 0;
+}
+
+static int read_memcg_events(struct memcg_events *events, bool show_diff)
+{
+ FILE *fp = fopen(events->path, "re");
+ size_t i;
+ int ret = 0;
+ bool any_new_events = false;
+ char *line = NULL;
+ size_t len = 0;
+ struct memcg_counters new_counters;
+ struct memcg_counters *counters = &events->counters;
+ struct {
+ const char *name;
+ long *new;
+ long *old;
+ } map[] = {
+ {
+ .name = "low",
+ .new = &new_counters.low,
+ .old = &counters->low,
+ },
+ {
+ .name = "high",
+ .new = &new_counters.high,
+ .old = &counters->high,
+ },
+ {
+ .name = "max",
+ .new = &new_counters.max,
+ .old = &counters->max,
+ },
+ {
+ .name = "oom",
+ .new = &new_counters.oom,
+ .old = &counters->oom,
+ },
+ {
+ .name = "oom_kill",
+ .new = &new_counters.oom_kill,
+ .old = &counters->oom_kill,
+ },
+ {
+ .name = "oom_group_kill",
+ .new = &new_counters.oom_group_kill,
+ .old = &counters->oom_group_kill,
+ },
+ };
+
+ if (!fp) {
+ warn("Failed to open memcg events file %s", events->path);
+ return -EBADF;
+ }
+
+ /* Read new values for memcg counters */
+ for (i = 0; i < ARRAY_SIZE(map); ++i) {
+ ssize_t nread;
+
+ errno = 0;
+ nread = getline(&line, &len, fp);
+ if (nread == -1) {
+ if (errno) {
+ warn("Failed to read line for counter %s",
+ map[i].name);
+ ret = -EIO;
+ goto exit;
+ }
+
+ break;
+ }
+
+ ret = get_memcg_counter(line, map[i].name, map[i].new);
+ if (ret) {
+ warnx("Failed to get counter value from line %s", line);
+ goto exit;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(map); ++i) {
+ long diff;
+
+ if (*map[i].new > *map[i].old) {
+ diff = *map[i].new - *map[i].old;
+
+ if (show_diff)
+ printf("*** %ld MEMCG %s event%s, "
+ "change counter %ld => %ld\n",
+ diff, map[i].name,
+ (diff == 1) ? "" : "s",
+ *map[i].old, *map[i].new);
+
+ *map[i].old += diff;
+ any_new_events = true;
+ }
+ }
+
+ if (show_diff && !any_new_events)
+ printf("*** No new untracked memcg events available\n");
+
+exit:
+ free(line);
+ fclose(fp);
+
+ return ret;
+}
+
+static void process_memcg_events(struct memcg_events *events,
+ struct inotify_event *event)
+{
+ int ret;
+
+ if (events->inotify_wd != event->wd) {
+ warnx("Unknown inotify event %d, should be %d", event->wd,
+ events->inotify_wd);
+ return;
+ }
+
+ printf("Received event in %s:\n", events->path);
+
+ if (!(event->mask & IN_MODIFY)) {
+ warnx("No IN_MODIFY event, skip it");
+ return;
+ }
+
+ ret = read_memcg_events(events, /* show_diff = */true);
+ if (ret)
+ warnx("Can't read memcg events");
+}
+
+static void monitor_events(struct memcg_events *events)
+{
+ struct pollfd fds[1];
+ int ret;
+
+ printf("Started monitoring memory events from '%s'...\n", events->path);
+
+ fds[0].fd = events->inotify_fd;
+ fds[0].events = POLLIN;
+
+ for (;;) {
+ ret = poll(fds, ARRAY_SIZE(fds), -1);
+ if (ret < 0 && errno != EAGAIN)
+ err(EXIT_FAILURE, "Can't poll memcg events (%d)", ret);
+
+ if (fds[0].revents & POLLERR)
+ err(EXIT_FAILURE, "Got POLLERR during monitor events");
+
+ if (fds[0].revents & POLLIN) {
+ struct inotify_event *event;
+ char buffer[INOTIFY_BUFFER_SIZE];
+ ssize_t length;
+
+ length = read(fds[0].fd, buffer, INOTIFY_BUFFER_SIZE);
+ if (length <= 0)
+ continue;
+
+ event = (struct inotify_event *)buffer;
+ while (INOTIFY_EVENT_OK(event, length)) {
+ process_memcg_events(events, event);
+ event = INOTIFY_EVENT_NEXT(event, length);
+ }
+ }
+ }
+}
+
+static int initialize_memcg_events(struct memcg_events *events,
+ const char *cgroup)
+{
+ int ret;
+
+ memset(events, 0, sizeof(struct memcg_events));
+
+ ret = snprintf(events->path, PATH_MAX,
+ "/sys/fs/cgroup/%s/memory.events", cgroup);
+ if (ret >= PATH_MAX) {
+ warnx("Path to cgroup memory.events is too long");
+ return -EMSGSIZE;
+ } else if (ret < 0) {
+ warn("Can't generate cgroup event full name");
+ return ret;
+ }
+
+ ret = read_memcg_events(events, /* show_diff = */false);
+ if (ret) {
+ warnx("Failed to read initial memcg events state (%d)", ret);
+ return ret;
+ }
+
+ events->inotify_fd = inotify_init();
+ if (events->inotify_fd < 0) {
+ warn("Failed to setup new inotify device");
+ return -EMFILE;
+ }
+
+ events->inotify_wd = inotify_add_watch(events->inotify_fd,
+ events->path, IN_MODIFY);
+ if (events->inotify_wd < 0) {
+ warn("Couldn't add monitor in dir %s", events->path);
+ return -EIO;
+ }
+
+ printf("Initialized MEMCG events with counters:\n");
+ print_memcg_counters(&events->counters);
+
+ return 0;
+}
+
+static void cleanup_memcg_events(struct memcg_events *events)
+{
+ inotify_rm_watch(events->inotify_fd, events->inotify_wd);
+ close(events->inotify_fd);
+}
+
+int main(int argc, const char **argv)
+{
+ struct memcg_events events;
+ ssize_t ret;
+
+ if (argc != 2)
+ errx(EXIT_FAILURE, "Usage: %s <cgroup>", argv[0]);
+
+ ret = initialize_memcg_events(&events, argv[1]);
+ if (ret)
+ errx(EXIT_FAILURE, "Can't initialize memcg events (%zd)", ret);
+
+ monitor_events(&events);
+
+ cleanup_memcg_events(&events);
+
+ printf("Exiting memcg event listener...\n");
+
+ return EXIT_SUCCESS;
+}
diff --git a/samples/check-exec/.gitignore b/samples/check-exec/.gitignore
new file mode 100644
index 000000000000..cd759a19dacd
--- /dev/null
+++ b/samples/check-exec/.gitignore
@@ -0,0 +1,2 @@
+/inc
+/set-exec
diff --git a/samples/check-exec/Makefile b/samples/check-exec/Makefile
new file mode 100644
index 000000000000..c4f08ad0f8e3
--- /dev/null
+++ b/samples/check-exec/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+userprogs-always-y := \
+ inc \
+ set-exec
+
+userccflags += -I usr/include
+
+.PHONY: all clean
+
+all:
+ $(MAKE) -C ../.. samples/check-exec/
+
+clean:
+ $(MAKE) -C ../.. M=samples/check-exec/ clean
diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c
new file mode 100644
index 000000000000..7f6ef06a2f06
--- /dev/null
+++ b/samples/check-exec/inc.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Very simple script interpreter that can evaluate two different commands (one
+ * per line):
+ * - "?" to initialize a counter from user's input;
+ * - "+" to increment the counter (which is set to 0 by default).
+ *
+ * See tools/testing/selftests/exec/check-exec-tests.sh and
+ * Documentation/userspace-api/check_exec.rst
+ *
+ * Copyright © 2024 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/fcntl.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+ char *const envp[], int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
+/* Returns 1 on error, 0 otherwise. */
+static int interpret_buffer(char *buffer, size_t buffer_size)
+{
+ char *line, *saveptr = NULL;
+ long long number = 0;
+
+ /* Each command is the first character of a line. */
+ saveptr = NULL;
+ line = strtok_r(buffer, "\n", &saveptr);
+ while (line) {
+ if (*line != '#' && strlen(line) != 1) {
+ fprintf(stderr, "# ERROR: Unknown string\n");
+ return 1;
+ }
+ switch (*line) {
+ case '#':
+ /* Skips shebang and comments. */
+ break;
+ case '+':
+ /* Increments and prints the number. */
+ number++;
+ printf("%lld\n", number);
+ break;
+ case '?':
+ /* Reads integer from stdin. */
+ fprintf(stderr, "> Enter new number: \n");
+ if (scanf("%lld", &number) != 1) {
+ fprintf(stderr,
+ "# WARNING: Failed to read number from stdin\n");
+ }
+ break;
+ default:
+ fprintf(stderr, "# ERROR: Unknown character '%c'\n",
+ *line);
+ return 1;
+ }
+ line = strtok_r(NULL, "\n", &saveptr);
+ }
+ return 0;
+}
+
+/* Returns 1 on error, 0 otherwise. */
+static int interpret_stream(FILE *script, char *const script_name,
+ char *const *const envp, const bool restrict_stream)
+{
+ int err;
+ char *const script_argv[] = { script_name, NULL };
+ char buf[128] = {};
+ size_t buf_size = sizeof(buf);
+
+ /*
+ * We pass a valid argv and envp to the kernel to emulate a native
+ * script execution. We must use the script file descriptor instead of
+ * the script path name to avoid race conditions.
+ */
+ err = sys_execveat(fileno(script), "", script_argv, envp,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ if (err && restrict_stream) {
+ perror("ERROR: Script execution check");
+ return 1;
+ }
+
+ /* Reads script. */
+ buf_size = fread(buf, 1, buf_size - 1, script);
+ return interpret_buffer(buf, buf_size);
+}
+
+static void print_usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s <script.inc> | -i | -c <command>\n\n",
+ argv0);
+ fprintf(stderr, "Example:\n");
+ fprintf(stderr, " ./set-exec -fi -- ./inc -i < script-exec.inc\n");
+}
+
+int main(const int argc, char *const argv[], char *const *const envp)
+{
+ int opt;
+ char *cmd = NULL;
+ char *script_name = NULL;
+ bool interpret_stdin = false;
+ FILE *script_file = NULL;
+ int secbits;
+ bool deny_interactive, restrict_file;
+ size_t arg_nb;
+
+ secbits = prctl(PR_GET_SECUREBITS);
+ if (secbits == -1) {
+ /*
+ * This should never happen, except with a buggy seccomp
+ * filter.
+ */
+ perror("ERROR: Failed to get securebits");
+ return 1;
+ }
+
+ deny_interactive = !!(secbits & SECBIT_EXEC_DENY_INTERACTIVE);
+ restrict_file = !!(secbits & SECBIT_EXEC_RESTRICT_FILE);
+
+ while ((opt = getopt(argc, argv, "c:i")) != -1) {
+ switch (opt) {
+ case 'c':
+ if (cmd) {
+ fprintf(stderr, "ERROR: Command already set");
+ return 1;
+ }
+ cmd = optarg;
+ break;
+ case 'i':
+ interpret_stdin = true;
+ break;
+ default:
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ /* Checks that only one argument is used, or read stdin. */
+ arg_nb = !!cmd + !!interpret_stdin;
+ if (arg_nb == 0 && argc == 2) {
+ script_name = argv[1];
+ } else if (arg_nb != 1) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ if (cmd) {
+ /*
+ * Other kind of interactive interpretations should be denied
+ * as well (e.g. CLI arguments passing script snippets,
+ * environment variables interpreted as script). However, any
+ * way to pass script files should only be restricted according
+ * to restrict_file.
+ */
+ if (deny_interactive) {
+ fprintf(stderr,
+ "ERROR: Interactive interpretation denied.\n");
+ return 1;
+ }
+
+ return interpret_buffer(cmd, strlen(cmd));
+ }
+
+ if (interpret_stdin && !script_name) {
+ script_file = stdin;
+ /*
+ * As for any execve(2) call, this path may be logged by the
+ * kernel.
+ */
+ script_name = "/proc/self/fd/0";
+ /*
+ * When stdin is used, it can point to a regular file or a
+ * pipe. Restrict stdin execution according to
+ * SECBIT_EXEC_DENY_INTERACTIVE but always allow executable
+ * files (which are not considered as interactive inputs).
+ */
+ return interpret_stream(script_file, script_name, envp,
+ deny_interactive);
+ } else if (script_name && !interpret_stdin) {
+ /*
+ * In this sample, we don't pass any argument to scripts, but
+ * otherwise we would have to forge an argv with such
+ * arguments.
+ */
+ script_file = fopen(script_name, "r");
+ if (!script_file) {
+ perror("ERROR: Failed to open script");
+ return 1;
+ }
+ /*
+ * Restricts file execution according to
+ * SECBIT_EXEC_RESTRICT_FILE.
+ */
+ return interpret_stream(script_file, script_name, envp,
+ restrict_file);
+ }
+
+ print_usage(argv[0]);
+ return 1;
+}
diff --git a/samples/check-exec/run-script-ask.sh b/samples/check-exec/run-script-ask.sh
new file mode 100755
index 000000000000..8ef0fdc37266
--- /dev/null
+++ b/samples/check-exec/run-script-ask.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env sh
+# SPDX-License-Identifier: BSD-3-Clause
+
+DIR="$(dirname -- "$0")"
+
+PATH="${PATH}:${DIR}"
+
+set -x
+"${DIR}/script-ask.inc"
diff --git a/samples/check-exec/script-ask.inc b/samples/check-exec/script-ask.inc
new file mode 100755
index 000000000000..720a8e649225
--- /dev/null
+++ b/samples/check-exec/script-ask.inc
@@ -0,0 +1,5 @@
+#!/usr/bin/env inc
+# SPDX-License-Identifier: BSD-3-Clause
+
+?
++
diff --git a/samples/check-exec/script-exec.inc b/samples/check-exec/script-exec.inc
new file mode 100755
index 000000000000..3245cb9d8dd1
--- /dev/null
+++ b/samples/check-exec/script-exec.inc
@@ -0,0 +1,4 @@
+#!/usr/bin/env inc
+# SPDX-License-Identifier: BSD-3-Clause
+
++
diff --git a/samples/check-exec/script-noexec.inc b/samples/check-exec/script-noexec.inc
new file mode 100644
index 000000000000..3245cb9d8dd1
--- /dev/null
+++ b/samples/check-exec/script-noexec.inc
@@ -0,0 +1,4 @@
+#!/usr/bin/env inc
+# SPDX-License-Identifier: BSD-3-Clause
+
++
diff --git a/samples/check-exec/set-exec.c b/samples/check-exec/set-exec.c
new file mode 100644
index 000000000000..ba86a60a20dd
--- /dev/null
+++ b/samples/check-exec/set-exec.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Simple tool to set SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE,
+ * before executing a command.
+ *
+ * Copyright © 2024 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <errno.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+static void print_usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s -f|-i -- <cmd> [args]...\n\n", argv0);
+ fprintf(stderr, "Execute a command with\n");
+ fprintf(stderr, "- SECBIT_EXEC_RESTRICT_FILE set: -f\n");
+ fprintf(stderr, "- SECBIT_EXEC_DENY_INTERACTIVE set: -i\n");
+}
+
+int main(const int argc, char *const argv[], char *const *const envp)
+{
+ const char *cmd_path;
+ char *const *cmd_argv;
+ int opt, secbits_cur, secbits_new;
+ bool has_policy = false;
+
+ secbits_cur = prctl(PR_GET_SECUREBITS);
+ if (secbits_cur == -1) {
+ /*
+ * This should never happen, except with a buggy seccomp
+ * filter.
+ */
+ perror("ERROR: Failed to get securebits");
+ return 1;
+ }
+
+ secbits_new = secbits_cur;
+ while ((opt = getopt(argc, argv, "fi")) != -1) {
+ switch (opt) {
+ case 'f':
+ secbits_new |= SECBIT_EXEC_RESTRICT_FILE |
+ SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+ has_policy = true;
+ break;
+ case 'i':
+ secbits_new |= SECBIT_EXEC_DENY_INTERACTIVE |
+ SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+ has_policy = true;
+ break;
+ default:
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (!argv[optind] || !has_policy) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ if (secbits_cur != secbits_new &&
+ prctl(PR_SET_SECUREBITS, secbits_new)) {
+ perror("Failed to set secure bit(s).");
+ fprintf(stderr,
+ "Hint: The running kernel may not support this feature.\n");
+ return 1;
+ }
+
+ cmd_path = argv[optind];
+ cmd_argv = argv + optind;
+ fprintf(stderr, "Executing command...\n");
+ execvpe(cmd_path, cmd_argv, envp);
+ fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path,
+ strerror(errno));
+ return 1;
+}
diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c
index 37a657b25d58..fd5d163828c5 100644
--- a/samples/configfs/configfs_sample.c
+++ b/samples/configfs/configfs_sample.c
@@ -364,4 +364,5 @@ static void __exit configfs_example_exit(void)
module_init(configfs_example_init);
module_exit(configfs_example_exit);
+MODULE_DESCRIPTION("Sample configfs module");
MODULE_LICENSE("GPL");
diff --git a/samples/connector/cn_test.c b/samples/connector/cn_test.c
index 0958a171d048..73d50b4aebb6 100644
--- a/samples/connector/cn_test.c
+++ b/samples/connector/cn_test.c
@@ -172,7 +172,7 @@ static int cn_test_init(void)
static void cn_test_fini(void)
{
- del_timer_sync(&cn_test_timer);
+ timer_delete_sync(&cn_test_timer);
cn_del_callback(&cn_test_id);
cn_test_id.val--;
cn_del_callback(&cn_test_id);
diff --git a/samples/damon/Kconfig b/samples/damon/Kconfig
new file mode 100644
index 000000000000..cbf96fd8a8bf
--- /dev/null
+++ b/samples/damon/Kconfig
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "DAMON Samples"
+
+config SAMPLE_DAMON_WSSE
+ bool "DAMON sample module for working set size estimation"
+ depends on DAMON && DAMON_VADDR
+ help
+ This builds DAMON sample module for working set size estimation.
+
+ The module receives a pid, monitor access to the virtual address
+ space of the process, estimate working set size of the process, and
+ repeatedly prints the size on the kernel log.
+
+ If unsure, say N.
+
+config SAMPLE_DAMON_PRCL
+ bool "DAMON sample module for access-aware proactive reclamation"
+ depends on DAMON && DAMON_VADDR
+ help
+ This builds DAMON sample module for access-aware proactive
+ reclamation.
+
+ The module receives a pid, monitor access to the virtual address
+ space of the process, find memory regions that not accessed, and
+ proactively reclaim the regions.
+
+ If unsure, say N.
+
+config SAMPLE_DAMON_MTIER
+ bool "DAMON sample module for memory tiering"
+ depends on DAMON && DAMON_PADDR
+ help
+ Thps builds DAMON sample module for memory tierign.
+
+ The module assumes the system is constructed with two NUMA nodes,
+ which seems as local and remote nodes to all CPUs. For example,
+ node0 is for DDR5 DRAMs connected via DIMM, while node1 is for DDR4
+ DRAMs connected via CXL.
+
+ If unsure, say N.
+
+endmenu
diff --git a/samples/damon/Makefile b/samples/damon/Makefile
new file mode 100644
index 000000000000..72f68cbf422a
--- /dev/null
+++ b/samples/damon/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SAMPLE_DAMON_WSSE) += wsse.o
+obj-$(CONFIG_SAMPLE_DAMON_PRCL) += prcl.o
+obj-$(CONFIG_SAMPLE_DAMON_MTIER) += mtier.o
diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c
new file mode 100644
index 000000000000..775838a23d93
--- /dev/null
+++ b/samples/damon/mtier.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memory tiering: migrate cold pages in node 0 and hot pages in node 1 to node
+ * 1 and node 0, respectively. Adjust the hotness/coldness threshold aiming
+ * resulting 99.6 % node 0 utilization ratio.
+ */
+
+#define pr_fmt(fmt) "damon_sample_mtier: " fmt
+
+#include <linux/damon.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_sample_mtier."
+
+static unsigned long node0_start_addr __read_mostly;
+module_param(node0_start_addr, ulong, 0600);
+
+static unsigned long node0_end_addr __read_mostly;
+module_param(node0_end_addr, ulong, 0600);
+
+static unsigned long node1_start_addr __read_mostly;
+module_param(node1_start_addr, ulong, 0600);
+
+static unsigned long node1_end_addr __read_mostly;
+module_param(node1_end_addr, ulong, 0600);
+
+static unsigned long node0_mem_used_bp __read_mostly = 9970;
+module_param(node0_mem_used_bp, ulong, 0600);
+
+static unsigned long node0_mem_free_bp __read_mostly = 50;
+module_param(node0_mem_free_bp, ulong, 0600);
+
+static int damon_sample_mtier_enable_store(
+ const char *val, const struct kernel_param *kp);
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_sample_mtier_enable_store,
+ .get = param_get_bool,
+};
+
+static bool enabled __read_mostly;
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled, "Enable or disable DAMON_SAMPLE_MTIER");
+
+static bool detect_node_addresses __read_mostly;
+module_param(detect_node_addresses, bool, 0600);
+
+static struct damon_ctx *ctxs[2];
+
+struct region_range {
+ phys_addr_t start;
+ phys_addr_t end;
+};
+
+static int nid_to_phys(int target_node, struct region_range *range)
+{
+ if (!node_online(target_node)) {
+ pr_err("NUMA node %d is not online\n", target_node);
+ return -EINVAL;
+ }
+
+ range->start = PFN_PHYS(node_start_pfn(target_node));
+ range->end = PFN_PHYS(node_end_pfn(target_node));
+
+ return 0;
+}
+
+static struct damon_ctx *damon_sample_mtier_build_ctx(bool promote)
+{
+ struct damon_ctx *ctx;
+ struct damon_attrs attrs;
+ struct damon_target *target;
+ struct damon_region *region;
+ struct damos *scheme;
+ struct damos_quota_goal *quota_goal;
+ struct damos_filter *filter;
+ struct region_range addr;
+ int ret;
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return NULL;
+ attrs = (struct damon_attrs) {
+ .sample_interval = 5 * USEC_PER_MSEC,
+ .aggr_interval = 100 * USEC_PER_MSEC,
+ .ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+ };
+
+ /*
+ * auto-tune sampling and aggregation interval aiming 4% DAMON-observed
+ * accesses ratio, keeping sampling interval in [5ms, 10s] range.
+ */
+ attrs.intervals_goal = (struct damon_intervals_goal) {
+ .access_bp = 400, .aggrs = 3,
+ .min_sample_us = 5000, .max_sample_us = 10000000,
+ };
+ if (damon_set_attrs(ctx, &attrs))
+ goto free_out;
+ if (damon_select_ops(ctx, DAMON_OPS_PADDR))
+ goto free_out;
+
+ target = damon_new_target();
+ if (!target)
+ goto free_out;
+ damon_add_target(ctx, target);
+
+ if (detect_node_addresses) {
+ ret = promote ? nid_to_phys(1, &addr) : nid_to_phys(0, &addr);
+ if (ret)
+ goto free_out;
+ } else {
+ addr.start = promote ? node1_start_addr : node0_start_addr;
+ addr.end = promote ? node1_end_addr : node0_end_addr;
+ }
+
+ region = damon_new_region(addr.start, addr.end);
+ if (!region)
+ goto free_out;
+ damon_add_region(region, target);
+
+ scheme = damon_new_scheme(
+ /* access pattern */
+ &(struct damos_access_pattern) {
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ .min_nr_accesses = promote ? 1 : 0,
+ .max_nr_accesses = promote ? UINT_MAX : 0,
+ .min_age_region = 0,
+ .max_age_region = UINT_MAX},
+ /* action */
+ promote ? DAMOS_MIGRATE_HOT : DAMOS_MIGRATE_COLD,
+ 1000000, /* apply interval (1s) */
+ &(struct damos_quota){
+ /* 200 MiB per sec by most */
+ .reset_interval = 1000,
+ .sz = 200 * 1024 * 1024,
+ /* ignore size of region when prioritizing */
+ .weight_sz = 0,
+ .weight_nr_accesses = 100,
+ .weight_age = 100,
+ },
+ &(struct damos_watermarks){},
+ promote ? 0 : 1); /* migrate target node id */
+ if (!scheme)
+ goto free_out;
+ damon_set_schemes(ctx, &scheme, 1);
+ quota_goal = damos_new_quota_goal(
+ promote ? DAMOS_QUOTA_NODE_MEM_USED_BP :
+ DAMOS_QUOTA_NODE_MEM_FREE_BP,
+ promote ? node0_mem_used_bp : node0_mem_free_bp);
+ if (!quota_goal)
+ goto free_out;
+ quota_goal->nid = 0;
+ damos_add_quota_goal(&scheme->quota, quota_goal);
+ filter = damos_new_filter(DAMOS_FILTER_TYPE_YOUNG, true, promote);
+ if (!filter)
+ goto free_out;
+ damos_add_filter(scheme, filter);
+ return ctx;
+free_out:
+ damon_destroy_ctx(ctx);
+ return NULL;
+}
+
+static int damon_sample_mtier_start(void)
+{
+ struct damon_ctx *ctx;
+
+ ctx = damon_sample_mtier_build_ctx(true);
+ if (!ctx)
+ return -ENOMEM;
+ ctxs[0] = ctx;
+ ctx = damon_sample_mtier_build_ctx(false);
+ if (!ctx) {
+ damon_destroy_ctx(ctxs[0]);
+ return -ENOMEM;
+ }
+ ctxs[1] = ctx;
+ return damon_start(ctxs, 2, true);
+}
+
+static void damon_sample_mtier_stop(void)
+{
+ damon_stop(ctxs, 2);
+ damon_destroy_ctx(ctxs[0]);
+ damon_destroy_ctx(ctxs[1]);
+}
+
+static int damon_sample_mtier_enable_store(
+ const char *val, const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ int err;
+
+ err = kstrtobool(val, &enabled);
+ if (err)
+ return err;
+
+ if (enabled == is_enabled)
+ return 0;
+
+ if (!damon_initialized())
+ return 0;
+
+ if (enabled) {
+ err = damon_sample_mtier_start();
+ if (err)
+ enabled = false;
+ return err;
+ }
+ damon_sample_mtier_stop();
+ return 0;
+}
+
+static int __init damon_sample_mtier_init(void)
+{
+ int err = 0;
+
+ if (!damon_initialized()) {
+ if (enabled)
+ enabled = false;
+ return -ENOMEM;
+ }
+
+ if (enabled) {
+ err = damon_sample_mtier_start();
+ if (err)
+ enabled = false;
+ }
+ return 0;
+}
+
+module_init(damon_sample_mtier_init);
diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c
new file mode 100644
index 000000000000..b7c50f2656ce
--- /dev/null
+++ b/samples/damon/prcl.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * proactive reclamation: monitor access pattern of a given process, find
+ * regions that seems not accessed, and proactively page out the regions.
+ */
+
+#define pr_fmt(fmt) "damon_sample_prcl: " fmt
+
+#include <linux/damon.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_sample_prcl."
+
+static int target_pid __read_mostly;
+module_param(target_pid, int, 0600);
+
+static int damon_sample_prcl_enable_store(
+ const char *val, const struct kernel_param *kp);
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_sample_prcl_enable_store,
+ .get = param_get_bool,
+};
+
+static bool enabled __read_mostly;
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled, "Enable or disable DAMON_SAMPLE_PRCL");
+
+static struct damon_ctx *ctx;
+static struct pid *target_pidp;
+
+static int damon_sample_prcl_repeat_call_fn(void *data)
+{
+ struct damon_ctx *c = data;
+ struct damon_target *t;
+
+ damon_for_each_target(t, c) {
+ struct damon_region *r;
+ unsigned long wss = 0;
+
+ damon_for_each_region(r, t) {
+ if (r->nr_accesses > 0)
+ wss += r->ar.end - r->ar.start;
+ }
+ pr_info("wss: %lu\n", wss);
+ }
+ return 0;
+}
+
+static struct damon_call_control repeat_call_control = {
+ .fn = damon_sample_prcl_repeat_call_fn,
+ .repeat = true,
+};
+
+static int damon_sample_prcl_start(void)
+{
+ struct damon_target *target;
+ struct damos *scheme;
+ int err;
+
+ pr_info("start\n");
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return -ENOMEM;
+ if (damon_select_ops(ctx, DAMON_OPS_VADDR)) {
+ damon_destroy_ctx(ctx);
+ return -EINVAL;
+ }
+
+ target = damon_new_target();
+ if (!target) {
+ damon_destroy_ctx(ctx);
+ return -ENOMEM;
+ }
+ damon_add_target(ctx, target);
+ target_pidp = find_get_pid(target_pid);
+ if (!target_pidp) {
+ damon_destroy_ctx(ctx);
+ return -EINVAL;
+ }
+ target->pid = target_pidp;
+
+ scheme = damon_new_scheme(
+ &(struct damos_access_pattern) {
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ .min_nr_accesses = 0,
+ .max_nr_accesses = 0,
+ .min_age_region = 50,
+ .max_age_region = UINT_MAX},
+ DAMOS_PAGEOUT,
+ 0,
+ &(struct damos_quota){},
+ &(struct damos_watermarks){},
+ NUMA_NO_NODE);
+ if (!scheme) {
+ damon_destroy_ctx(ctx);
+ return -ENOMEM;
+ }
+ damon_set_schemes(ctx, &scheme, 1);
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+
+ repeat_call_control.data = ctx;
+ return damon_call(ctx, &repeat_call_control);
+}
+
+static void damon_sample_prcl_stop(void)
+{
+ pr_info("stop\n");
+ if (ctx) {
+ damon_stop(&ctx, 1);
+ damon_destroy_ctx(ctx);
+ }
+}
+
+static int damon_sample_prcl_enable_store(
+ const char *val, const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ int err;
+
+ err = kstrtobool(val, &enabled);
+ if (err)
+ return err;
+
+ if (enabled == is_enabled)
+ return 0;
+
+ if (!damon_initialized())
+ return 0;
+
+ if (enabled) {
+ err = damon_sample_prcl_start();
+ if (err)
+ enabled = false;
+ return err;
+ }
+ damon_sample_prcl_stop();
+ return 0;
+}
+
+static int __init damon_sample_prcl_init(void)
+{
+ int err = 0;
+
+ if (!damon_initialized()) {
+ if (enabled)
+ enabled = false;
+ return -ENOMEM;
+ }
+
+ if (enabled) {
+ err = damon_sample_prcl_start();
+ if (err)
+ enabled = false;
+ }
+ return 0;
+}
+
+module_init(damon_sample_prcl_init);
diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c
new file mode 100644
index 000000000000..799ad4443943
--- /dev/null
+++ b/samples/damon/wsse.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * working set size estimation: monitor access pattern of given process and
+ * print estimated working set size (total size of regions that showing some
+ * access).
+ */
+
+#define pr_fmt(fmt) "damon_sample_wsse: " fmt
+
+#include <linux/damon.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_sample_wsse."
+
+static int target_pid __read_mostly;
+module_param(target_pid, int, 0600);
+
+static int damon_sample_wsse_enable_store(
+ const char *val, const struct kernel_param *kp);
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_sample_wsse_enable_store,
+ .get = param_get_bool,
+};
+
+static bool enabled __read_mostly;
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled, "Enable or disable DAMON_SAMPLE_WSSE");
+
+static struct damon_ctx *ctx;
+static struct pid *target_pidp;
+
+static int damon_sample_wsse_repeat_call_fn(void *data)
+{
+ struct damon_ctx *c = data;
+ struct damon_target *t;
+
+ damon_for_each_target(t, c) {
+ struct damon_region *r;
+ unsigned long wss = 0;
+
+ damon_for_each_region(r, t) {
+ if (r->nr_accesses > 0)
+ wss += r->ar.end - r->ar.start;
+ }
+ pr_info("wss: %lu\n", wss);
+ }
+ return 0;
+}
+
+static struct damon_call_control repeat_call_control = {
+ .fn = damon_sample_wsse_repeat_call_fn,
+ .repeat = true,
+};
+
+static int damon_sample_wsse_start(void)
+{
+ struct damon_target *target;
+ int err;
+
+ pr_info("start\n");
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return -ENOMEM;
+ if (damon_select_ops(ctx, DAMON_OPS_VADDR)) {
+ damon_destroy_ctx(ctx);
+ return -EINVAL;
+ }
+
+ target = damon_new_target();
+ if (!target) {
+ damon_destroy_ctx(ctx);
+ return -ENOMEM;
+ }
+ damon_add_target(ctx, target);
+ target_pidp = find_get_pid(target_pid);
+ if (!target_pidp) {
+ damon_destroy_ctx(ctx);
+ return -EINVAL;
+ }
+ target->pid = target_pidp;
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+ repeat_call_control.data = ctx;
+ return damon_call(ctx, &repeat_call_control);
+}
+
+static void damon_sample_wsse_stop(void)
+{
+ pr_info("stop\n");
+ if (ctx) {
+ damon_stop(&ctx, 1);
+ damon_destroy_ctx(ctx);
+ }
+}
+
+static int damon_sample_wsse_enable_store(
+ const char *val, const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ int err;
+
+ err = kstrtobool(val, &enabled);
+ if (err)
+ return err;
+
+ if (enabled == is_enabled)
+ return 0;
+
+ if (!damon_initialized())
+ return 0;
+
+ if (enabled) {
+ err = damon_sample_wsse_start();
+ if (err)
+ enabled = false;
+ return err;
+ }
+ damon_sample_wsse_stop();
+ return 0;
+}
+
+static int __init damon_sample_wsse_init(void)
+{
+ int err = 0;
+
+ if (!damon_initialized()) {
+ err = -ENOMEM;
+ if (enabled)
+ enabled = false;
+ }
+
+ if (enabled) {
+ err = damon_sample_wsse_start();
+ if (err)
+ enabled = false;
+ }
+ return err;
+}
+
+module_init(damon_sample_wsse_init);
diff --git a/samples/fanotify/fs-monitor.c b/samples/fanotify/fs-monitor.c
index 608db24c471e..28c0a652ffeb 100644
--- a/samples/fanotify/fs-monitor.c
+++ b/samples/fanotify/fs-monitor.c
@@ -12,6 +12,9 @@
#include <sys/fanotify.h>
#include <sys/types.h>
#include <unistd.h>
+#ifndef __GLIBC__
+#include <asm-generic/int-ll64.h>
+#endif
#ifndef FAN_FS_ERROR
#define FAN_FS_ERROR 0x00008000
@@ -95,7 +98,11 @@ static void handle_notifications(char *buffer, int len)
fid = (struct fanotify_event_info_fid *) info;
printf("\tfsid: %x%x\n",
+#if defined(__GLIBC__)
fid->fsid.val[0], fid->fsid.val[1]);
+#else
+ fid->fsid.__val[0], fid->fsid.__val[1]);
+#endif
print_fh((struct file_handle *) &fid->handle);
break;
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
index e22da8573116..bfe98ce826f3 100644
--- a/samples/fprobe/fprobe_example.c
+++ b/samples/fprobe/fprobe_example.c
@@ -48,7 +48,9 @@ static void show_backtrace(void)
stack_trace_print(stacks, len, 24);
}
-static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
+static int sample_entry_handler(struct fprobe *fp, unsigned long ip,
+ unsigned long ret_ip,
+ struct ftrace_regs *fregs, void *data)
{
if (use_trace)
/*
@@ -61,11 +63,14 @@ static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_
nhit++;
if (stackdump)
show_backtrace();
+ return 0;
}
-static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
+static void sample_exit_handler(struct fprobe *fp, unsigned long ip,
+ unsigned long ret_ip, struct ftrace_regs *regs,
+ void *data)
{
- unsigned long rip = instruction_pointer(regs);
+ unsigned long rip = ret_ip;
if (use_trace)
/*
@@ -145,4 +150,5 @@ static void __exit fprobe_exit(void)
module_init(fprobe_init)
module_exit(fprobe_exit)
+MODULE_DESCRIPTION("sample kernel module showing the use of fprobe");
MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile
index faf8cdb79c5f..589baf2ec4e3 100644
--- a/samples/ftrace/Makefile
+++ b/samples/ftrace/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o
+obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace-ops.o
CFLAGS_sample-trace-array.o := -I$(src)
obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index de5a0f67f320..da3a9f2091f5 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -2,8 +2,9 @@
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/ftrace.h>
+#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
#include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
+#endif
extern void my_direct_func1(void);
extern void my_direct_func2(void);
@@ -23,9 +24,45 @@ extern void my_tramp2(void *);
static unsigned long my_ip = (unsigned long)schedule;
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:\n"
+" addi sp,sp,-2*"SZREG"\n"
+" "REG_S" t0,0*"SZREG"(sp)\n"
+" "REG_S" ra,1*"SZREG"(sp)\n"
+" call my_direct_func1\n"
+" "REG_L" t0,0*"SZREG"(sp)\n"
+" "REG_L" ra,1*"SZREG"(sp)\n"
+" addi sp,sp,2*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp1, .-my_tramp1\n"
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+
+" my_tramp2:\n"
+" addi sp,sp,-2*"SZREG"\n"
+" "REG_S" t0,0*"SZREG"(sp)\n"
+" "REG_S" ra,1*"SZREG"(sp)\n"
+" call my_direct_func2\n"
+" "REG_L" t0,0*"SZREG"(sp)\n"
+" "REG_L" ra,1*"SZREG"(sp)\n"
+" addi sp,sp,2*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
+#include <asm/nospec-branch.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
@@ -38,8 +75,8 @@ asm (
CALL_DEPTH_ACCOUNT
" call my_direct_func1\n"
" leave\n"
-" .size my_tramp1, .-my_tramp1\n"
ASM_RET
+" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
@@ -96,6 +133,157 @@ asm (
#endif /* CONFIG_S390 */
+#ifdef CONFIG_ARM64
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:"
+" hint 34\n" // bti c
+" sub sp, sp, #16\n"
+" stp x9, x30, [sp]\n"
+" bl my_direct_func1\n"
+" ldp x30, x9, [sp]\n"
+" add sp, sp, #16\n"
+" ret x9\n"
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:"
+" hint 34\n" // bti c
+" sub sp, sp, #16\n"
+" stp x9, x30, [sp]\n"
+" bl my_direct_func2\n"
+" ldp x30, x9, [sp]\n"
+" add sp, sp, #16\n"
+" ret x9\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
+#ifdef CONFIG_LOONGARCH
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:\n"
+" addi.d $sp, $sp, -16\n"
+" st.d $t0, $sp, 0\n"
+" st.d $ra, $sp, 8\n"
+" bl my_direct_func1\n"
+" ld.d $t0, $sp, 0\n"
+" ld.d $ra, $sp, 8\n"
+" addi.d $sp, $sp, 16\n"
+" jr $t0\n"
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:\n"
+" addi.d $sp, $sp, -16\n"
+" st.d $t0, $sp, 0\n"
+" st.d $ra, $sp, 8\n"
+" bl my_direct_func2\n"
+" ld.d $t0, $sp, 0\n"
+" ld.d $ra, $sp, 8\n"
+" addi.d $sp, $sp, 16\n"
+" jr $t0\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_LOONGARCH */
+
+#ifdef CONFIG_PPC
+#include <asm/ppc_asm.h>
+
+#ifdef CONFIG_PPC64
+#define STACK_FRAME_SIZE 48
+#else
+#define STACK_FRAME_SIZE 24
+#endif
+
+#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL)
+#define PPC64_TOC_SAVE_AND_UPDATE \
+" std 2, 24(1)\n" \
+" bcl 20, 31, 1f\n" \
+" 1: mflr 12\n" \
+" ld 2, (99f - 1b)(12)\n"
+#define PPC64_TOC_RESTORE \
+" ld 2, 24(1)\n"
+#define PPC64_TOC \
+" 99: .quad .TOC.@tocbase\n"
+#else
+#define PPC64_TOC_SAVE_AND_UPDATE ""
+#define PPC64_TOC_RESTORE ""
+#define PPC64_TOC ""
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtlr 0\n"
+#define PPC_FTRACE_RET \
+" blr\n"
+#else
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtctr 0\n"
+#define PPC_FTRACE_RET \
+" mtlr 0\n" \
+" bctr\n"
+#endif
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+" bl my_direct_func1\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+" bl my_direct_func2\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+ PPC64_TOC
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_PPC */
+
+static struct ftrace_ops direct;
+
static unsigned long my_tramp = (unsigned long)my_tramp1;
static unsigned long tramps[2] = {
(unsigned long)my_tramp1,
@@ -114,7 +302,7 @@ static int simple_thread(void *arg)
if (ret)
continue;
t ^= 1;
- ret = modify_ftrace_direct(my_ip, my_tramp, tramps[t]);
+ ret = modify_ftrace_direct(&direct, tramps[t]);
if (!ret)
my_tramp = tramps[t];
WARN_ON_ONCE(ret);
@@ -129,7 +317,9 @@ static int __init ftrace_direct_init(void)
{
int ret;
- ret = register_ftrace_direct(my_ip, my_tramp);
+ ftrace_set_filter_ip(&direct, (unsigned long) my_ip, 0, 0);
+ ret = register_ftrace_direct(&direct, my_tramp);
+
if (!ret)
simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn");
return ret;
@@ -138,7 +328,7 @@ static int __init ftrace_direct_init(void)
static void __exit ftrace_direct_exit(void)
{
kthread_stop(simple_tsk);
- unregister_ftrace_direct(my_ip, my_tramp);
+ unregister_ftrace_direct(&direct, my_tramp, true);
}
module_init(ftrace_direct_init);
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index a825dbd2c9cf..8f7986d698d8 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -2,8 +2,9 @@
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/ftrace.h>
+#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
#include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
+#endif
extern void my_direct_func1(unsigned long ip);
extern void my_direct_func2(unsigned long ip);
@@ -21,9 +22,51 @@ void my_direct_func2(unsigned long ip)
extern void my_tramp1(void *);
extern void my_tramp2(void *);
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:\n"
+" addi sp,sp,-3*"SZREG"\n"
+" "REG_S" a0,0*"SZREG"(sp)\n"
+" "REG_S" t0,1*"SZREG"(sp)\n"
+" "REG_S" ra,2*"SZREG"(sp)\n"
+" mv a0,t0\n"
+" call my_direct_func1\n"
+" "REG_L" a0,0*"SZREG"(sp)\n"
+" "REG_L" t0,1*"SZREG"(sp)\n"
+" "REG_L" ra,2*"SZREG"(sp)\n"
+" addi sp,sp,3*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:\n"
+" addi sp,sp,-3*"SZREG"\n"
+" "REG_S" a0,0*"SZREG"(sp)\n"
+" "REG_S" t0,1*"SZREG"(sp)\n"
+" "REG_S" ra,2*"SZREG"(sp)\n"
+" mv a0,t0\n"
+" call my_direct_func2\n"
+" "REG_L" a0,0*"SZREG"(sp)\n"
+" "REG_L" t0,1*"SZREG"(sp)\n"
+" "REG_L" ra,2*"SZREG"(sp)\n"
+" addi sp,sp,3*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
+#include <asm/nospec-branch.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
@@ -103,6 +146,184 @@ asm (
#endif /* CONFIG_S390 */
+#ifdef CONFIG_ARM64
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:"
+" hint 34\n" // bti c
+" sub sp, sp, #32\n"
+" stp x9, x30, [sp]\n"
+" str x0, [sp, #16]\n"
+" mov x0, x30\n"
+" bl my_direct_func1\n"
+" ldp x30, x9, [sp]\n"
+" ldr x0, [sp, #16]\n"
+" add sp, sp, #32\n"
+" ret x9\n"
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:"
+" hint 34\n" // bti c
+" sub sp, sp, #32\n"
+" stp x9, x30, [sp]\n"
+" str x0, [sp, #16]\n"
+" mov x0, x30\n"
+" bl my_direct_func2\n"
+" ldp x30, x9, [sp]\n"
+" ldr x0, [sp, #16]\n"
+" add sp, sp, #32\n"
+" ret x9\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
+#ifdef CONFIG_LOONGARCH
+#include <asm/asm.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:\n"
+" addi.d $sp, $sp, -32\n"
+" st.d $a0, $sp, 0\n"
+" st.d $t0, $sp, 8\n"
+" st.d $ra, $sp, 16\n"
+" move $a0, $t0\n"
+" bl my_direct_func1\n"
+" ld.d $a0, $sp, 0\n"
+" ld.d $t0, $sp, 8\n"
+" ld.d $ra, $sp, 16\n"
+" addi.d $sp, $sp, 32\n"
+" jr $t0\n"
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:\n"
+" addi.d $sp, $sp, -32\n"
+" st.d $a0, $sp, 0\n"
+" st.d $t0, $sp, 8\n"
+" st.d $ra, $sp, 16\n"
+" move $a0, $t0\n"
+" bl my_direct_func2\n"
+" ld.d $a0, $sp, 0\n"
+" ld.d $t0, $sp, 8\n"
+" ld.d $ra, $sp, 16\n"
+" addi.d $sp, $sp, 32\n"
+" jr $t0\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_LOONGARCH */
+
+#ifdef CONFIG_PPC
+#include <asm/ppc_asm.h>
+
+#ifdef CONFIG_PPC64
+#define STACK_FRAME_SIZE 48
+#else
+#define STACK_FRAME_SIZE 24
+#endif
+
+#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL)
+#define PPC64_TOC_SAVE_AND_UPDATE \
+" std 2, 24(1)\n" \
+" bcl 20, 31, 1f\n" \
+" 1: mflr 12\n" \
+" ld 2, (99f - 1b)(12)\n"
+#define PPC64_TOC_RESTORE \
+" ld 2, 24(1)\n"
+#define PPC64_TOC \
+" 99: .quad .TOC.@tocbase\n"
+#else
+#define PPC64_TOC_SAVE_AND_UPDATE ""
+#define PPC64_TOC_RESTORE ""
+#define PPC64_TOC ""
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtlr 0\n"
+#define PPC_FTRACE_RET \
+" blr\n"
+#define PPC_FTRACE_RECOVER_IP \
+" lwz 8, 4(3)\n" \
+" li 9, 6\n" \
+" slw 8, 8, 9\n" \
+" sraw 8, 8, 9\n" \
+" add 3, 3, 8\n" \
+" addi 3, 3, 4\n"
+#else
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtctr 0\n"
+#define PPC_FTRACE_RET \
+" mtlr 0\n" \
+" bctr\n"
+#define PPC_FTRACE_RECOVER_IP ""
+#endif
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+ PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mr 3, 0\n"
+ PPC_FTRACE_RECOVER_IP
+" bl my_direct_func1\n"
+ PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+ PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mr 3, 0\n"
+ PPC_FTRACE_RECOVER_IP
+" bl my_direct_func2\n"
+ PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+ PPC64_TOC
+ " .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_PPC */
+
static unsigned long my_tramp = (unsigned long)my_tramp1;
static unsigned long tramps[2] = {
(unsigned long)my_tramp1,
@@ -123,7 +344,7 @@ static int simple_thread(void *arg)
if (ret)
continue;
t ^= 1;
- ret = modify_ftrace_direct_multi(&direct, tramps[t]);
+ ret = modify_ftrace_direct(&direct, tramps[t]);
if (!ret)
my_tramp = tramps[t];
WARN_ON_ONCE(ret);
@@ -141,7 +362,7 @@ static int __init ftrace_direct_multi_init(void)
ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0);
ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0);
- ret = register_ftrace_direct_multi(&direct, my_tramp);
+ ret = register_ftrace_direct(&direct, my_tramp);
if (!ret)
simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn");
@@ -151,13 +372,12 @@ static int __init ftrace_direct_multi_init(void)
static void __exit ftrace_direct_multi_exit(void)
{
kthread_stop(simple_tsk);
- unregister_ftrace_direct_multi(&direct, my_tramp);
- ftrace_free_filter(&direct);
+ unregister_ftrace_direct(&direct, my_tramp, true);
}
module_init(ftrace_direct_multi_init);
module_exit(ftrace_direct_multi_exit);
MODULE_AUTHOR("Jiri Olsa");
-MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()");
+MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct()");
MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index d955a2650605..db326c81a27d 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -4,8 +4,9 @@
#include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h>
#include <linux/sched/stat.h>
+#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
#include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
+#endif
extern void my_direct_func(unsigned long ip);
@@ -16,9 +17,35 @@ void my_direct_func(unsigned long ip)
extern void my_tramp(void *);
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+" addi sp,sp,-3*"SZREG"\n"
+" "REG_S" a0,0*"SZREG"(sp)\n"
+" "REG_S" t0,1*"SZREG"(sp)\n"
+" "REG_S" ra,2*"SZREG"(sp)\n"
+" mv a0,t0\n"
+" call my_direct_func\n"
+" "REG_L" a0,0*"SZREG"(sp)\n"
+" "REG_L" t0,1*"SZREG"(sp)\n"
+" "REG_L" ra,2*"SZREG"(sp)\n"
+" addi sp,sp,3*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
+#include <asm/nospec-branch.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
@@ -66,6 +93,131 @@ asm (
#endif /* CONFIG_S390 */
+#ifdef CONFIG_ARM64
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" hint 34\n" // bti c
+" sub sp, sp, #32\n"
+" stp x9, x30, [sp]\n"
+" str x0, [sp, #16]\n"
+" mov x0, x30\n"
+" bl my_direct_func\n"
+" ldp x30, x9, [sp]\n"
+" ldr x0, [sp, #16]\n"
+" add sp, sp, #32\n"
+" ret x9\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
+#ifdef CONFIG_LOONGARCH
+
+#include <asm/asm.h>
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+" addi.d $sp, $sp, -32\n"
+" st.d $a0, $sp, 0\n"
+" st.d $t0, $sp, 8\n"
+" st.d $ra, $sp, 16\n"
+" move $a0, $t0\n"
+" bl my_direct_func\n"
+" ld.d $a0, $sp, 0\n"
+" ld.d $t0, $sp, 8\n"
+" ld.d $ra, $sp, 16\n"
+" addi.d $sp, $sp, 32\n"
+" jr $t0\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_LOONGARCH */
+
+#ifdef CONFIG_PPC
+#include <asm/ppc_asm.h>
+
+#ifdef CONFIG_PPC64
+#define STACK_FRAME_SIZE 48
+#else
+#define STACK_FRAME_SIZE 24
+#endif
+
+#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL)
+#define PPC64_TOC_SAVE_AND_UPDATE \
+" std 2, 24(1)\n" \
+" bcl 20, 31, 1f\n" \
+" 1: mflr 12\n" \
+" ld 2, (99f - 1b)(12)\n"
+#define PPC64_TOC_RESTORE \
+" ld 2, 24(1)\n"
+#define PPC64_TOC \
+" 99: .quad .TOC.@tocbase\n"
+#else
+#define PPC64_TOC_SAVE_AND_UPDATE ""
+#define PPC64_TOC_RESTORE ""
+#define PPC64_TOC ""
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtlr 0\n"
+#define PPC_FTRACE_RET \
+" blr\n"
+#define PPC_FTRACE_RECOVER_IP \
+" lwz 8, 4(3)\n" \
+" li 9, 6\n" \
+" slw 8, 8, 9\n" \
+" sraw 8, 8, 9\n" \
+" add 3, 3, 8\n" \
+" addi 3, 3, 4\n"
+#else
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtctr 0\n"
+#define PPC_FTRACE_RET \
+" mtlr 0\n" \
+" bctr\n"
+#define PPC_FTRACE_RECOVER_IP ""
+#endif
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+ PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mr 3, 0\n"
+ PPC_FTRACE_RECOVER_IP
+" bl my_direct_func\n"
+ PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+ PPC64_TOC
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_PPC */
+
static struct ftrace_ops direct;
static int __init ftrace_direct_multi_init(void)
@@ -73,13 +225,12 @@ static int __init ftrace_direct_multi_init(void)
ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0);
ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0);
- return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp);
+ return register_ftrace_direct(&direct, (unsigned long) my_tramp);
}
static void __exit ftrace_direct_multi_exit(void)
{
- unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp);
- ftrace_free_filter(&direct);
+ unregister_ftrace_direct(&direct, (unsigned long) my_tramp, true);
}
module_init(ftrace_direct_multi_init);
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index e13fb59a2b47..3d0fa260332d 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -3,24 +3,54 @@
#include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h>
+#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
#include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
+#endif
-extern void my_direct_func(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags);
+extern void my_direct_func(struct vm_area_struct *vma, unsigned long address,
+ unsigned int flags, struct pt_regs *regs);
-void my_direct_func(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
+void my_direct_func(struct vm_area_struct *vma, unsigned long address,
+ unsigned int flags, struct pt_regs *regs)
{
- trace_printk("handle mm fault vma=%p address=%lx flags=%x\n",
- vma, address, flags);
+ trace_printk("handle mm fault vma=%p address=%lx flags=%x regs=%p\n",
+ vma, address, flags, regs);
}
extern void my_tramp(void *);
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+" addi sp,sp,-5*"SZREG"\n"
+" "REG_S" a0,0*"SZREG"(sp)\n"
+" "REG_S" a1,1*"SZREG"(sp)\n"
+" "REG_S" a2,2*"SZREG"(sp)\n"
+" "REG_S" t0,3*"SZREG"(sp)\n"
+" "REG_S" ra,4*"SZREG"(sp)\n"
+" call my_direct_func\n"
+" "REG_L" a0,0*"SZREG"(sp)\n"
+" "REG_L" a1,1*"SZREG"(sp)\n"
+" "REG_L" a2,2*"SZREG"(sp)\n"
+" "REG_L" t0,3*"SZREG"(sp)\n"
+" "REG_L" ra,4*"SZREG"(sp)\n"
+" addi sp,sp,5*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
+#include <asm/nospec-branch.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
@@ -34,7 +64,9 @@ asm (
" pushq %rdi\n"
" pushq %rsi\n"
" pushq %rdx\n"
+" pushq %rcx\n"
" call my_direct_func\n"
+" popq %rcx\n"
" popq %rdx\n"
" popq %rsi\n"
" popq %rdi\n"
@@ -70,16 +102,150 @@ asm (
#endif /* CONFIG_S390 */
+#ifdef CONFIG_ARM64
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" hint 34\n" // bti c
+" sub sp, sp, #48\n"
+" stp x9, x30, [sp]\n"
+" stp x0, x1, [sp, #16]\n"
+" stp x2, x3, [sp, #32]\n"
+" bl my_direct_func\n"
+" ldp x30, x9, [sp]\n"
+" ldp x0, x1, [sp, #16]\n"
+" ldp x2, x3, [sp, #32]\n"
+" add sp, sp, #48\n"
+" ret x9\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
+#ifdef CONFIG_LOONGARCH
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+" addi.d $sp, $sp, -48\n"
+" st.d $a0, $sp, 0\n"
+" st.d $a1, $sp, 8\n"
+" st.d $a2, $sp, 16\n"
+" st.d $t0, $sp, 24\n"
+" st.d $ra, $sp, 32\n"
+" bl my_direct_func\n"
+" ld.d $a0, $sp, 0\n"
+" ld.d $a1, $sp, 8\n"
+" ld.d $a2, $sp, 16\n"
+" ld.d $t0, $sp, 24\n"
+" ld.d $ra, $sp, 32\n"
+" addi.d $sp, $sp, 48\n"
+" jr $t0\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_LOONGARCH */
+
+#ifdef CONFIG_PPC
+#include <asm/ppc_asm.h>
+
+#ifdef CONFIG_PPC64
+#define STACK_FRAME_SIZE 64
+#define STACK_FRAME_ARG1 32
+#define STACK_FRAME_ARG2 40
+#define STACK_FRAME_ARG3 48
+#define STACK_FRAME_ARG4 56
+#else
+#define STACK_FRAME_SIZE 32
+#define STACK_FRAME_ARG1 16
+#define STACK_FRAME_ARG2 20
+#define STACK_FRAME_ARG3 24
+#define STACK_FRAME_ARG4 28
+#endif
+
+#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL)
+#define PPC64_TOC_SAVE_AND_UPDATE \
+" std 2, 24(1)\n" \
+" bcl 20, 31, 1f\n" \
+" 1: mflr 12\n" \
+" ld 2, (99f - 1b)(12)\n"
+#define PPC64_TOC_RESTORE \
+" ld 2, 24(1)\n"
+#define PPC64_TOC \
+" 99: .quad .TOC.@tocbase\n"
+#else
+#define PPC64_TOC_SAVE_AND_UPDATE ""
+#define PPC64_TOC_RESTORE ""
+#define PPC64_TOC ""
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtlr 0\n"
+#define PPC_FTRACE_RET \
+" blr\n"
+#else
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtctr 0\n"
+#define PPC_FTRACE_RET \
+" mtlr 0\n" \
+" bctr\n"
+#endif
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+ PPC_STL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n"
+ PPC_STL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n"
+ PPC_STL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n"
+ PPC_STL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n"
+" bl my_direct_func\n"
+ PPC_LL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n"
+ PPC_LL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n"
+ PPC_LL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n"
+ PPC_LL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+ PPC64_TOC
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_PPC */
+
+static struct ftrace_ops direct;
+
static int __init ftrace_direct_init(void)
{
- return register_ftrace_direct((unsigned long)handle_mm_fault,
- (unsigned long)my_tramp);
+ ftrace_set_filter_ip(&direct, (unsigned long) handle_mm_fault, 0, 0);
+
+ return register_ftrace_direct(&direct, (unsigned long) my_tramp);
}
static void __exit ftrace_direct_exit(void)
{
- unregister_ftrace_direct((unsigned long)handle_mm_fault,
- (unsigned long)my_tramp);
+ unregister_ftrace_direct(&direct, (unsigned long)my_tramp, true);
}
module_init(ftrace_direct_init);
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index 1f769d0db20f..956834b0d19a 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -3,8 +3,9 @@
#include <linux/sched.h> /* for wake_up_process() */
#include <linux/ftrace.h>
+#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
#include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
+#endif
extern void my_direct_func(struct task_struct *p);
@@ -15,9 +16,34 @@ void my_direct_func(struct task_struct *p)
extern void my_tramp(void *);
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+" addi sp,sp,-3*"SZREG"\n"
+" "REG_S" a0,0*"SZREG"(sp)\n"
+" "REG_S" t0,1*"SZREG"(sp)\n"
+" "REG_S" ra,2*"SZREG"(sp)\n"
+" call my_direct_func\n"
+" "REG_L" a0,0*"SZREG"(sp)\n"
+" "REG_L" t0,1*"SZREG"(sp)\n"
+" "REG_L" ra,2*"SZREG"(sp)\n"
+" addi sp,sp,3*"SZREG"\n"
+" jr t0\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
+#include <asm/nospec-branch.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
@@ -63,16 +89,130 @@ asm (
#endif /* CONFIG_S390 */
+#ifdef CONFIG_ARM64
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" hint 34\n" // bti c
+" sub sp, sp, #32\n"
+" stp x9, x30, [sp]\n"
+" str x0, [sp, #16]\n"
+" bl my_direct_func\n"
+" ldp x30, x9, [sp]\n"
+" ldr x0, [sp, #16]\n"
+" add sp, sp, #32\n"
+" ret x9\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
+#ifdef CONFIG_LOONGARCH
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+" addi.d $sp, $sp, -32\n"
+" st.d $a0, $sp, 0\n"
+" st.d $t0, $sp, 8\n"
+" st.d $ra, $sp, 16\n"
+" bl my_direct_func\n"
+" ld.d $a0, $sp, 0\n"
+" ld.d $t0, $sp, 8\n"
+" ld.d $ra, $sp, 16\n"
+" addi.d $sp, $sp, 32\n"
+" jr $t0\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_LOONGARCH */
+
+#ifdef CONFIG_PPC
+#include <asm/ppc_asm.h>
+
+#ifdef CONFIG_PPC64
+#define STACK_FRAME_SIZE 48
+#else
+#define STACK_FRAME_SIZE 24
+#endif
+
+#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL)
+#define PPC64_TOC_SAVE_AND_UPDATE \
+" std 2, 24(1)\n" \
+" bcl 20, 31, 1f\n" \
+" 1: mflr 12\n" \
+" ld 2, (99f - 1b)(12)\n"
+#define PPC64_TOC_RESTORE \
+" ld 2, 24(1)\n"
+#define PPC64_TOC \
+" 99: .quad .TOC.@tocbase\n"
+#else
+#define PPC64_TOC_SAVE_AND_UPDATE ""
+#define PPC64_TOC_RESTORE ""
+#define PPC64_TOC ""
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtlr 0\n"
+#define PPC_FTRACE_RET \
+" blr\n"
+#else
+#define PPC_FTRACE_RESTORE_LR \
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \
+" mtctr 0\n"
+#define PPC_FTRACE_RET \
+" mtlr 0\n" \
+" bctr\n"
+#endif
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" mflr 0\n"
+ PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+ PPC64_TOC_SAVE_AND_UPDATE
+ PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+" bl my_direct_func\n"
+ PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+ PPC64_TOC_RESTORE
+" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+ PPC_FTRACE_RESTORE_LR
+" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+ PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+ PPC_FTRACE_RET
+ PPC64_TOC
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_PPC */
+
+static struct ftrace_ops direct;
+
static int __init ftrace_direct_init(void)
{
- return register_ftrace_direct((unsigned long)wake_up_process,
- (unsigned long)my_tramp);
+ ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0);
+
+ return register_ftrace_direct(&direct, (unsigned long) my_tramp);
}
static void __exit ftrace_direct_exit(void)
{
- unregister_ftrace_direct((unsigned long)wake_up_process,
- (unsigned long)my_tramp);
+ unregister_ftrace_direct(&direct, (unsigned long)my_tramp, true);
}
module_init(ftrace_direct_init);
diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c
new file mode 100644
index 000000000000..68d6685c80bd
--- /dev/null
+++ b/samples/ftrace/ftrace-ops.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/ftrace.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+
+#include <asm/barrier.h>
+
+/*
+ * Arbitrary large value chosen to be sufficiently large to minimize noise but
+ * sufficiently small to complete quickly.
+ */
+static unsigned int nr_function_calls = 100000;
+module_param(nr_function_calls, uint, 0);
+MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee");
+
+/*
+ * The number of ops associated with a call site affects whether a tracer can
+ * be called directly or whether it's necessary to go via the list func, which
+ * can be significantly more expensive.
+ */
+static unsigned int nr_ops_relevant = 1;
+module_param(nr_ops_relevant, uint, 0);
+MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the relevant tracee");
+
+/*
+ * On architectures where all call sites share the same trampoline, having
+ * tracers enabled for distinct functions can force the use of the list func
+ * and incur overhead for all call sites.
+ */
+static unsigned int nr_ops_irrelevant;
+module_param(nr_ops_irrelevant, uint, 0);
+MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the irrelevant tracee");
+
+/*
+ * On architectures with DYNAMIC_FTRACE_WITH_REGS, saving the full pt_regs can
+ * be more expensive than only saving the minimal necessary regs.
+ */
+static bool save_regs;
+module_param(save_regs, bool, 0);
+MODULE_PARM_DESC(save_regs, "Register ops with FTRACE_OPS_FL_SAVE_REGS (save all registers in the trampoline)");
+
+static bool assist_recursion;
+module_param(assist_recursion, bool, 0);
+MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RECURSION");
+
+static bool assist_rcu;
+module_param(assist_rcu, bool, 0);
+MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU");
+
+/*
+ * By default, a trivial tracer is used which immediately returns to mimimize
+ * overhead. Sometimes a consistency check using a more expensive tracer is
+ * desireable.
+ */
+static bool check_count;
+module_param(check_count, bool, 0);
+MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number of times\n");
+
+/*
+ * Usually it's not interesting to leave the ops registered after the test
+ * runs, but sometimes it can be useful to leave them registered so that they
+ * can be inspected through the tracefs 'enabled_functions' file.
+ */
+static bool persist;
+module_param(persist, bool, 0);
+MODULE_PARM_DESC(persist, "Successfully load module and leave ftrace ops registered after test completes\n");
+
+/*
+ * Marked as noinline to ensure that an out-of-line traceable copy is
+ * generated by the compiler.
+ *
+ * The barrier() ensures the compiler won't elide calls by determining there
+ * are no side-effects.
+ */
+static noinline void tracee_relevant(void)
+{
+ barrier();
+}
+
+/*
+ * Marked as noinline to ensure that an out-of-line traceable copy is
+ * generated by the compiler.
+ *
+ * The barrier() ensures the compiler won't elide calls by determining there
+ * are no side-effects.
+ */
+static noinline void tracee_irrelevant(void)
+{
+ barrier();
+}
+
+struct sample_ops {
+ struct ftrace_ops ops;
+ unsigned int count;
+};
+
+static void ops_func_nop(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op,
+ struct ftrace_regs *fregs)
+{
+ /* do nothing */
+}
+
+static void ops_func_count(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op,
+ struct ftrace_regs *fregs)
+{
+ struct sample_ops *self;
+
+ self = container_of(op, struct sample_ops, ops);
+ self->count++;
+}
+
+static struct sample_ops *ops_relevant;
+static struct sample_ops *ops_irrelevant;
+
+static struct sample_ops *ops_alloc_init(void *tracee, ftrace_func_t func,
+ unsigned long flags, int nr)
+{
+ struct sample_ops *ops;
+
+ ops = kcalloc(nr, sizeof(*ops), GFP_KERNEL);
+ if (WARN_ON_ONCE(!ops))
+ return NULL;
+
+ for (unsigned int i = 0; i < nr; i++) {
+ ops[i].ops.func = func;
+ ops[i].ops.flags = flags;
+ WARN_ON_ONCE(ftrace_set_filter_ip(&ops[i].ops, (unsigned long)tracee, 0, 0));
+ WARN_ON_ONCE(register_ftrace_function(&ops[i].ops));
+ }
+
+ return ops;
+}
+
+static void ops_destroy(struct sample_ops *ops, int nr)
+{
+ if (!ops)
+ return;
+
+ for (unsigned int i = 0; i < nr; i++) {
+ WARN_ON_ONCE(unregister_ftrace_function(&ops[i].ops));
+ ftrace_free_filter(&ops[i].ops);
+ }
+
+ kfree(ops);
+}
+
+static void ops_check(struct sample_ops *ops, int nr,
+ unsigned int expected_count)
+{
+ if (!ops || !check_count)
+ return;
+
+ for (unsigned int i = 0; i < nr; i++) {
+ if (ops->count == expected_count)
+ continue;
+ pr_warn("Counter called %u times (expected %u)\n",
+ ops->count, expected_count);
+ }
+}
+
+static ftrace_func_t tracer_relevant = ops_func_nop;
+static ftrace_func_t tracer_irrelevant = ops_func_nop;
+
+static int __init ftrace_ops_sample_init(void)
+{
+ unsigned long flags = 0;
+ ktime_t start, end;
+ u64 period;
+
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && save_regs) {
+ pr_info("this kernel does not support saving registers\n");
+ save_regs = false;
+ } else if (save_regs) {
+ flags |= FTRACE_OPS_FL_SAVE_REGS;
+ }
+
+ if (assist_recursion)
+ flags |= FTRACE_OPS_FL_RECURSION;
+
+ if (assist_rcu)
+ flags |= FTRACE_OPS_FL_RCU;
+
+ if (check_count) {
+ tracer_relevant = ops_func_count;
+ tracer_irrelevant = ops_func_count;
+ }
+
+ pr_info("registering:\n"
+ " relevant ops: %u\n"
+ " tracee: %ps\n"
+ " tracer: %ps\n"
+ " irrelevant ops: %u\n"
+ " tracee: %ps\n"
+ " tracer: %ps\n"
+ " saving registers: %s\n"
+ " assist recursion: %s\n"
+ " assist RCU: %s\n",
+ nr_ops_relevant, tracee_relevant, tracer_relevant,
+ nr_ops_irrelevant, tracee_irrelevant, tracer_irrelevant,
+ save_regs ? "YES" : "NO",
+ assist_recursion ? "YES" : "NO",
+ assist_rcu ? "YES" : "NO");
+
+ ops_relevant = ops_alloc_init(tracee_relevant, tracer_relevant,
+ flags, nr_ops_relevant);
+ ops_irrelevant = ops_alloc_init(tracee_irrelevant, tracer_irrelevant,
+ flags, nr_ops_irrelevant);
+
+ start = ktime_get();
+ for (unsigned int i = 0; i < nr_function_calls; i++)
+ tracee_relevant();
+ end = ktime_get();
+
+ ops_check(ops_relevant, nr_ops_relevant, nr_function_calls);
+ ops_check(ops_irrelevant, nr_ops_irrelevant, 0);
+
+ period = ktime_to_ns(ktime_sub(end, start));
+
+ pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n",
+ nr_function_calls, tracee_relevant,
+ period, div_u64(period, nr_function_calls));
+
+ if (persist)
+ return 0;
+
+ ops_destroy(ops_relevant, nr_ops_relevant);
+ ops_destroy(ops_irrelevant, nr_ops_irrelevant);
+
+ /*
+ * The benchmark completed sucessfully, but there's no reason to keep
+ * the module around. Return an error do the user doesn't have to
+ * manually unload the module.
+ */
+ return -EINVAL;
+}
+module_init(ftrace_ops_sample_init);
+
+static void __exit ftrace_ops_sample_exit(void)
+{
+ ops_destroy(ops_relevant, nr_ops_relevant);
+ ops_destroy(ops_irrelevant, nr_ops_irrelevant);
+}
+module_exit(ftrace_ops_sample_exit);
+
+MODULE_AUTHOR("Mark Rutland");
+MODULE_DESCRIPTION("Example of using custom ftrace_ops");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c
index 6aba02a31c96..4147616102f9 100644
--- a/samples/ftrace/sample-trace-array.c
+++ b/samples/ftrace/sample-trace-array.c
@@ -82,7 +82,7 @@ static int simple_thread(void *arg)
while (!kthread_should_stop())
simple_thread_func(count++);
- del_timer(&mytimer);
+ timer_delete(&mytimer);
cancel_work_sync(&trace_work);
/*
@@ -105,14 +105,14 @@ static int __init sample_trace_array_init(void)
* NOTE: This function increments the reference counter
* associated with the trace array - "tr".
*/
- tr = trace_array_get_by_name("sample-instance");
+ tr = trace_array_get_by_name("sample-instance", "sched,timer,kprobes");
if (!tr)
return -1;
/*
* If context specific per-cpu buffers havent already been allocated.
*/
- trace_printk_init_buffers();
+ trace_array_init_printk(tr);
simple_tsk = kthread_run(simple_thread, NULL, "sample-instance");
if (IS_ERR(simple_tsk)) {
diff --git a/samples/hid/.gitignore b/samples/hid/.gitignore
new file mode 100644
index 000000000000..3ea0fed3bbad
--- /dev/null
+++ b/samples/hid/.gitignore
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+hid_mouse
+hid_surface_dial
+*.out
+*.skel.h
+/vmlinux.h
+/bpftool/
+/libbpf/
diff --git a/samples/hid/Makefile b/samples/hid/Makefile
new file mode 100644
index 000000000000..db5a077c77fc
--- /dev/null
+++ b/samples/hid/Makefile
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: GPL-2.0
+
+HID_SAMPLES_PATH ?= $(abspath $(src))
+TOOLS_PATH := $(HID_SAMPLES_PATH)/../../tools
+
+pound := \#
+
+# List of programs to build
+tprogs-y += hid_mouse
+tprogs-y += hid_surface_dial
+
+# Libbpf dependencies
+LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf
+LIBBPF_OUTPUT = $(abspath $(HID_SAMPLES_PATH))/libbpf
+LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
+LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
+LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
+
+EXTRA_BPF_HEADERS := hid_bpf_helpers.h
+
+hid_mouse-objs := hid_mouse.o
+hid_surface_dial-objs := hid_surface_dial.o
+
+# Tell kbuild to always build the programs
+always-y := $(tprogs-y)
+
+ifeq ($(ARCH), arm)
+# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
+# headers when arm instruction set identification is requested.
+ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS))
+BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
+TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
+endif
+
+ifeq ($(ARCH), mips)
+TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__
+ifdef CONFIG_MACH_LOONGSON64
+BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-loongson64
+BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic
+endif
+endif
+
+COMMON_CFLAGS += -Wall -O2
+COMMON_CFLAGS += -Wmissing-prototypes
+COMMON_CFLAGS += -Wstrict-prototypes
+
+TPROGS_CFLAGS += $(COMMON_CFLAGS)
+TPROGS_CFLAGS += -I$(objtree)/usr/include
+TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
+TPROGS_CFLAGS += -I$(srctree)/tools/include
+
+ifdef SYSROOT
+COMMON_CFLAGS += --sysroot=$(SYSROOT)
+TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
+endif
+
+TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
+
+# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
+# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
+LLC ?= llc
+CLANG ?= clang
+OPT ?= opt
+LLVM_DIS ?= llvm-dis
+LLVM_OBJCOPY ?= llvm-objcopy
+LLVM_READELF ?= llvm-readelf
+BTF_PAHOLE ?= pahole
+
+# Detect that we're cross compiling and use the cross compiler
+ifdef CROSS_COMPILE
+CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+# Don't evaluate probes and warnings if we need to run make recursively
+ifneq ($(src),)
+HDR_PROBE := $(shell printf "$(pound)include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \
+ -o /dev/null 2>/dev/null && echo okay)
+
+ifeq ($(HDR_PROBE),)
+$(warning WARNING: Detected possible issues with include path.)
+$(warning WARNING: Please install kernel headers locally (make headers_install).)
+endif
+
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+ $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
+
+BPF_EXTRA_CFLAGS += -fno-stack-protector
+ifneq ($(BTF_LLVM_PROBE),)
+ BPF_EXTRA_CFLAGS += -g
+else
+ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
+ BPF_EXTRA_CFLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+endif
+endif
+
+# Trick to allow make to be run from this directory
+all:
+ $(MAKE) -C ../../ M=$(CURDIR) HID_SAMPLES_PATH=$(CURDIR)
+
+clean:
+ $(MAKE) -C ../../ M=$(CURDIR) clean
+ @find $(CURDIR) -type f -name '*~' -delete
+ @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool
+
+$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
+# Fix up variables inherited from Kbuild that tools/ build system won't like
+ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \
+ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(HID_SAMPLES_PATH)/../../ \
+ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ $@ install_headers
+
+BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL_OUTPUT := $(abspath $(HID_SAMPLES_PATH))/bpftool
+BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
+$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT)
+ $(MAKE) -C $(BPFTOOLDIR) srctree=$(HID_SAMPLES_PATH)/../../ \
+ OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap
+
+$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT):
+ $(call msg,MKDIR,$@)
+ $(Q)mkdir -p $@
+
+FORCE:
+
+
+# Verify LLVM compiler tools are available and bpf target is supported by llc
+.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
+
+verify_cmds: $(CLANG) $(LLC)
+ @for TOOL in $^ ; do \
+ if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
+ echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
+ exit 1; \
+ else true; fi; \
+ done
+
+verify_target_bpf: verify_cmds
+ @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
+ echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
+ echo " NOTICE: LLVM version >= 3.7.1 required" ;\
+ exit 2; \
+ else true; fi
+
+$(HID_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
+$(src)/*.c: verify_target_bpf $(LIBBPF)
+
+libbpf_hdrs: $(LIBBPF)
+
+.PHONY: libbpf_hdrs
+
+$(obj)/hid_mouse.o: $(obj)/hid_mouse.skel.h
+$(obj)/hid_surface_dial.o: $(obj)/hid_surface_dial.skel.h
+
+-include $(HID_SAMPLES_PATH)/Makefile.target
+
+VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
+ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \
+ $(abspath $(objtree)/vmlinux)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+ifeq ($(VMLINUX_BTF),)
+ $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\
+ build the kernel or set VMLINUX_BTF or VMLINUX_H variable)
+endif
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+clean-files += vmlinux.h
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+endef
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+
+EXTRA_BPF_HEADERS_SRC := $(addprefix $(src)/,$(EXTRA_BPF_HEADERS))
+
+$(obj)/%.bpf.o: $(src)/%.bpf.c $(EXTRA_BPF_HEADERS_SRC) $(obj)/vmlinux.h
+ @echo " CLANG-BPF " $@
+ $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \
+ -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $(filter %.bpf.c,$^) -o $@
+
+LINKED_SKELS := hid_mouse.skel.h hid_surface_dial.skel.h
+clean-files += $(LINKED_SKELS)
+
+hid_mouse.skel.h-deps := hid_mouse.bpf.o
+hid_surface_dial.skel.h-deps := hid_surface_dial.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
+BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c))
+BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED))
+BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS))
+
+$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL)
+ @echo " BPF GEN-OBJ " $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps))
+ @echo " BPF GEN-SKEL" $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@
+
+# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
+# But, there is no easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
+# below we use long chain of commands, clang | opt | llvm-dis | llc,
+# to generate final object file. 'clang' compiles the source into IR
+# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
+# processing (llvm12) and IR optimizations. 'llvm-dis' converts
+# 'opt' output to IR, and finally 'llc' generates bpf byte code.
+$(obj)/%.o: $(src)/%.c
+ @echo " CLANG-bpf " $@
+ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
+ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
+ -I$(LIBBPF_INCLUDE) \
+ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
+ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
+ -Wno-gnu-variable-sized-type-not-at-end \
+ -Wno-address-of-packed-member -Wno-tautological-compare \
+ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
+ -fno-asynchronous-unwind-tables \
+ -I$(srctree)/samples/hid/ \
+ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
+ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+ $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
diff --git a/samples/hid/Makefile.target b/samples/hid/Makefile.target
new file mode 100644
index 000000000000..7621f55e2947
--- /dev/null
+++ b/samples/hid/Makefile.target
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+# ==========================================================================
+# Building binaries on the host system
+# Binaries are not used during the compilation of the kernel, and intended
+# to be build for target board, target board can be host of course. Added to
+# build binaries to run not on host system.
+#
+# Sample syntax
+# tprogs-y := xsk_example
+# Will compile xsk_example.c and create an executable named xsk_example
+#
+# tprogs-y := xdpsock
+# xdpsock-objs := xdpsock_1.o xdpsock_2.o
+# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable
+# xdpsock, based on xdpsock_1.o and xdpsock_2.o
+#
+# Derived from scripts/Makefile.host
+#
+__tprogs := $(sort $(tprogs-y))
+
+# C code
+# Executables compiled from a single .c file
+tprog-csingle := $(foreach m,$(__tprogs), \
+ $(if $($(m)-objs),,$(m)))
+
+# C executables linked based on several .o files
+tprog-cmulti := $(foreach m,$(__tprogs),\
+ $(if $($(m)-objs),$(m)))
+
+# Object (.o) files compiled from .c files
+tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs)))
+
+tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle))
+tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti))
+tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs))
+
+#####
+# Handle options to gcc. Support building with separate output directory
+
+_tprogc_flags = $(TPROGS_CFLAGS) \
+ $(TPROGCFLAGS_$(basetarget).o)
+
+# $(objtree)/$(obj) for including generated headers from checkin source files
+ifeq ($(KBUILD_EXTMOD),)
+ifdef building_out_of_srctree
+_tprogc_flags += -I $(objtree)/$(obj)
+endif
+endif
+
+tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags)
+
+# Create executable from a single .c file
+# tprog-csingle -> Executable
+quiet_cmd_tprog-csingle = CC $@
+ cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-csingle)
+
+# Link an executable based on list of .o files, all plain c
+# tprog-cmulti -> executable
+quiet_cmd_tprog-cmulti = LD $@
+ cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \
+ $(addprefix $(obj)/,$($(@F)-objs)) \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-cmulti): $(tprog-cobjs) FORCE
+ $(call if_changed,tprog-cmulti)
+$(call multi_depend, $(tprog-cmulti), , -objs)
+
+# Create .o file from a single .c file
+# tprog-cobjs -> .o
+quiet_cmd_tprog-cobjs = CC $@
+ cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
+$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-cobjs)
diff --git a/samples/hid/hid_bpf_helpers.h b/samples/hid/hid_bpf_helpers.h
new file mode 100644
index 000000000000..4fff31dbe0e7
--- /dev/null
+++ b/samples/hid/hid_bpf_helpers.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2022 Benjamin Tissoires
+ */
+
+#ifndef __HID_BPF_HELPERS_H
+#define __HID_BPF_HELPERS_H
+
+/* following are kfuncs exported by HID for HID-BPF */
+extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx,
+ unsigned int offset,
+ const size_t __sz) __ksym;
+extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym;
+extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym;
+extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym;
+extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx,
+ __u8 *data,
+ size_t buf__sz,
+ enum hid_report_type type,
+ enum hid_class_request reqtype) __ksym;
+
+#endif /* __HID_BPF_HELPERS_H */
diff --git a/samples/hid/hid_mouse.bpf.c b/samples/hid/hid_mouse.bpf.c
new file mode 100644
index 000000000000..f7f722dcf56d
--- /dev/null
+++ b/samples/hid/hid_mouse.bpf.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "hid_bpf_helpers.h"
+
+static int hid_y_event(struct hid_bpf_ctx *hctx)
+{
+ s16 y;
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 9 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ bpf_printk("event: size: %d", hctx->size);
+ bpf_printk("incoming event: %02x %02x %02x",
+ data[0],
+ data[1],
+ data[2]);
+ bpf_printk(" %02x %02x %02x",
+ data[3],
+ data[4],
+ data[5]);
+ bpf_printk(" %02x %02x %02x",
+ data[6],
+ data[7],
+ data[8]);
+
+ y = data[3] | (data[4] << 8);
+
+ y = -y;
+
+ data[3] = y & 0xFF;
+ data[4] = (y >> 8) & 0xFF;
+
+ bpf_printk("modified event: %02x %02x %02x",
+ data[0],
+ data[1],
+ data[2]);
+ bpf_printk(" %02x %02x %02x",
+ data[3],
+ data[4],
+ data[5]);
+ bpf_printk(" %02x %02x %02x",
+ data[6],
+ data[7],
+ data[8]);
+
+ return 0;
+}
+
+static int hid_x_event(struct hid_bpf_ctx *hctx)
+{
+ s16 x;
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 9 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ x = data[1] | (data[2] << 8);
+
+ x = -x;
+
+ data[1] = x & 0xFF;
+ data[2] = (x >> 8) & 0xFF;
+ return 0;
+}
+
+SEC("struct_ops/hid_device_event")
+int BPF_PROG(hid_event, struct hid_bpf_ctx *hctx, enum hid_report_type type)
+{
+ int ret = hid_y_event(hctx);
+
+ if (ret)
+ return ret;
+
+ return hid_x_event(hctx);
+}
+
+
+SEC("struct_ops/hid_rdesc_fixup")
+int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hctx)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 4096 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ bpf_printk("rdesc: %02x %02x %02x",
+ data[0],
+ data[1],
+ data[2]);
+ bpf_printk(" %02x %02x %02x",
+ data[3],
+ data[4],
+ data[5]);
+ bpf_printk(" %02x %02x %02x ...",
+ data[6],
+ data[7],
+ data[8]);
+
+ /*
+ * The original report descriptor contains:
+ *
+ * 0x05, 0x01, // Usage Page (Generic Desktop) 30
+ * 0x16, 0x01, 0x80, // Logical Minimum (-32767) 32
+ * 0x26, 0xff, 0x7f, // Logical Maximum (32767) 35
+ * 0x09, 0x30, // Usage (X) 38
+ * 0x09, 0x31, // Usage (Y) 40
+ *
+ * So byte 39 contains Usage X and byte 41 Usage Y.
+ *
+ * We simply swap the axes here.
+ */
+ data[39] = 0x31;
+ data[41] = 0x30;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops mouse_invert = {
+ .hid_rdesc_fixup = (void *)hid_rdesc_fixup,
+ .hid_device_event = (void *)hid_event,
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/hid/hid_mouse.c b/samples/hid/hid_mouse.c
new file mode 100644
index 000000000000..4b80d4e4c154
--- /dev/null
+++ b/samples/hid/hid_mouse.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Benjamin Tissoires
+ *
+ * This is a pure HID-BPF example, and should be considered as such:
+ * on the Etekcity Scroll 6E, the X and Y axes will be swapped and
+ * inverted. On any other device... Not sure what this will do.
+ *
+ * This C main file is generic though. To adapt the code and test, users
+ * must amend only the .bpf.c file, which this program will load any
+ * eBPF program it finds.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "hid_mouse.skel.h"
+
+static bool running = true;
+
+static void int_exit(int sig)
+{
+ running = false;
+ exit(0);
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "%s: %s /sys/bus/hid/devices/0BUS:0VID:0PID:00ID\n\n",
+ __func__, prog);
+ fprintf(stderr,
+ "This program will upload and attach a HID-BPF program to the given device.\n"
+ "On the Etekcity Scroll 6E, the X and Y axis will be inverted, but on any other\n"
+ "device, chances are high that the device will not be working anymore\n\n"
+ "consider this as a demo and adapt the eBPF program to your needs\n"
+ "Hit Ctrl-C to unbind the program and reset the device\n");
+}
+
+static int get_hid_id(const char *path)
+{
+ const char *str_id, *dir;
+ char uevent[1024];
+ int fd;
+
+ memset(uevent, 0, sizeof(uevent));
+ snprintf(uevent, sizeof(uevent) - 1, "%s/uevent", path);
+
+ fd = open(uevent, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ return -ENOENT;
+
+ close(fd);
+
+ dir = basename((char *)path);
+
+ str_id = dir + sizeof("0003:0001:0A37.");
+ return (int)strtol(str_id, NULL, 16);
+}
+
+int main(int argc, char **argv)
+{
+ struct hid_mouse *skel;
+ struct bpf_link *link;
+ int err;
+ const char *optstr = "";
+ const char *sysfs_path;
+ int opt, hid_id;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (optind == argc) {
+ usage(basename(argv[0]));
+ return 1;
+ }
+
+ sysfs_path = argv[optind];
+ if (!sysfs_path) {
+ perror("sysfs");
+ return 1;
+ }
+
+ skel = hid_mouse__open();
+ if (!skel) {
+ fprintf(stderr, "%s %s:%d", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+
+ hid_id = get_hid_id(sysfs_path);
+
+ if (hid_id < 0) {
+ fprintf(stderr, "can not open HID device: %m\n");
+ return 1;
+ }
+ skel->struct_ops.mouse_invert->hid_id = hid_id;
+
+ err = hid_mouse__load(skel);
+ if (err < 0) {
+ fprintf(stderr, "can not load HID-BPF program: %m\n");
+ return 1;
+ }
+
+ link = bpf_map__attach_struct_ops(skel->maps.mouse_invert);
+ if (!link) {
+ fprintf(stderr, "can not attach HID-BPF program: %m\n");
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ while (running)
+ sleep(1);
+
+ hid_mouse__destroy(skel);
+
+ return 0;
+}
diff --git a/samples/hid/hid_surface_dial.bpf.c b/samples/hid/hid_surface_dial.bpf.c
new file mode 100644
index 000000000000..527d584812ab
--- /dev/null
+++ b/samples/hid/hid_surface_dial.bpf.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Benjamin Tissoires
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "hid_bpf_helpers.h"
+
+#define HID_UP_BUTTON 0x0009
+#define HID_GD_WHEEL 0x0038
+
+SEC("struct_ops/hid_device_event")
+int BPF_PROG(hid_event, struct hid_bpf_ctx *hctx)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 9 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* Touch */
+ data[1] &= 0xfd;
+
+ /* X */
+ data[4] = 0;
+ data[5] = 0;
+
+ /* Y */
+ data[6] = 0;
+ data[7] = 0;
+
+ return 0;
+}
+
+/* 72 == 360 / 5 -> 1 report every 5 degrees */
+int resolution = 72;
+int physical = 5;
+
+struct haptic_syscall_args {
+ unsigned int hid;
+ int retval;
+};
+
+static __u8 haptic_data[8];
+
+SEC("syscall")
+int set_haptic(struct haptic_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = sizeof(haptic_data);
+ u16 *res;
+ int ret;
+
+ if (size > sizeof(haptic_data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ haptic_data[0] = 1; /* report ID */
+
+ ret = hid_bpf_hw_request(ctx, haptic_data, size, HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
+
+ bpf_printk("probed/remove event ret value: %d", ret);
+ bpf_printk("buf: %02x %02x %02x",
+ haptic_data[0],
+ haptic_data[1],
+ haptic_data[2]);
+ bpf_printk(" %02x %02x %02x",
+ haptic_data[3],
+ haptic_data[4],
+ haptic_data[5]);
+ bpf_printk(" %02x %02x",
+ haptic_data[6],
+ haptic_data[7]);
+
+ /* whenever resolution multiplier is not 3600, we have the fixed report descriptor */
+ res = (u16 *)&haptic_data[1];
+ if (*res != 3600) {
+// haptic_data[1] = 72; /* resolution multiplier */
+// haptic_data[2] = 0; /* resolution multiplier */
+// haptic_data[3] = 0; /* Repeat Count */
+ haptic_data[4] = 3; /* haptic Auto Trigger */
+// haptic_data[5] = 5; /* Waveform Cutoff Time */
+// haptic_data[6] = 80; /* Retrigger Period */
+// haptic_data[7] = 0; /* Retrigger Period */
+ } else {
+ haptic_data[4] = 0;
+ }
+
+ ret = hid_bpf_hw_request(ctx, haptic_data, size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+
+ bpf_printk("set haptic ret value: %d -> %d", ret, haptic_data[4]);
+
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+/* Convert REL_DIAL into REL_WHEEL */
+SEC("struct_ops/hid_rdesc_fixup")
+int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hctx)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 4096 /* size */);
+ __u16 *res, *phys;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* Convert TOUCH into a button */
+ data[31] = HID_UP_BUTTON;
+ data[33] = 2;
+
+ /* Convert REL_DIAL into REL_WHEEL */
+ data[45] = HID_GD_WHEEL;
+
+ /* Change Resolution Multiplier */
+ phys = (__u16 *)&data[61];
+ *phys = physical;
+ res = (__u16 *)&data[66];
+ *res = resolution;
+
+ /* Convert X,Y from Abs to Rel */
+ data[88] = 0x06;
+ data[98] = 0x06;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops surface_dial = {
+ .hid_rdesc_fixup = (void *)hid_rdesc_fixup,
+ .hid_device_event = (void *)hid_event,
+};
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = 1;
diff --git a/samples/hid/hid_surface_dial.c b/samples/hid/hid_surface_dial.c
new file mode 100644
index 000000000000..9dd363845a85
--- /dev/null
+++ b/samples/hid/hid_surface_dial.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Benjamin Tissoires
+ *
+ * This program will morph the Microsoft Surface Dial into a mouse,
+ * and depending on the chosen resolution enable or not the haptic feedback:
+ * - a resolution (-r) of 3600 will report 3600 "ticks" in one full rotation
+ * without haptic feedback
+ * - any other resolution will report N "ticks" in a full rotation with haptic
+ * feedback
+ *
+ * A good default for low resolution haptic scrolling is 72 (1 "tick" every 5
+ * degrees), and set to 3600 for smooth scrolling.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "hid_surface_dial.skel.h"
+
+static bool running = true;
+
+struct haptic_syscall_args {
+ unsigned int hid;
+ int retval;
+};
+
+static void int_exit(int sig)
+{
+ running = false;
+ exit(0);
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "%s: %s [OPTIONS] /sys/bus/hid/devices/0BUS:0VID:0PID:00ID\n\n"
+ " OPTIONS:\n"
+ " -r N\t set the given resolution to the device (number of ticks per 360°)\n\n",
+ __func__, prog);
+ fprintf(stderr,
+ "This program will morph the Microsoft Surface Dial into a mouse,\n"
+ "and depending on the chosen resolution enable or not the haptic feedback:\n"
+ "- a resolution (-r) of 3600 will report 3600 'ticks' in one full rotation\n"
+ " without haptic feedback\n"
+ "- any other resolution will report N 'ticks' in a full rotation with haptic\n"
+ " feedback\n"
+ "\n"
+ "A good default for low resolution haptic scrolling is 72 (1 'tick' every 5\n"
+ "degrees), and set to 3600 for smooth scrolling.\n");
+}
+
+static int get_hid_id(const char *path)
+{
+ const char *str_id, *dir;
+ char uevent[1024];
+ int fd;
+
+ memset(uevent, 0, sizeof(uevent));
+ snprintf(uevent, sizeof(uevent) - 1, "%s/uevent", path);
+
+ fd = open(uevent, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ return -ENOENT;
+
+ close(fd);
+
+ dir = basename((char *)path);
+
+ str_id = dir + sizeof("0003:0001:0A37.");
+ return (int)strtol(str_id, NULL, 16);
+}
+
+static int set_haptic(struct hid_surface_dial *skel, int hid_id)
+{
+ struct haptic_syscall_args args = {
+ .hid = hid_id,
+ .retval = -1,
+ };
+ int haptic_fd, err;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+
+ haptic_fd = bpf_program__fd(skel->progs.set_haptic);
+ if (haptic_fd < 0) {
+ fprintf(stderr, "can't locate haptic prog: %m\n");
+ return 1;
+ }
+
+ err = bpf_prog_test_run_opts(haptic_fd, &tattr);
+ if (err) {
+ fprintf(stderr, "can't set haptic configuration to hid device %d: %m (err: %d)\n",
+ hid_id, err);
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct hid_surface_dial *skel;
+ const char *optstr = "r:";
+ struct bpf_link *link;
+ const char *sysfs_path;
+ int err, opt, hid_id, resolution = 72;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ case 'r':
+ {
+ char *endp = NULL;
+ long l = -1;
+
+ if (optarg) {
+ l = strtol(optarg, &endp, 10);
+ if (endp && *endp)
+ l = -1;
+ }
+
+ if (l < 0) {
+ fprintf(stderr,
+ "invalid r option %s - expecting a number\n",
+ optarg ? optarg : "");
+ exit(EXIT_FAILURE);
+ };
+
+ resolution = (int) l;
+ break;
+ }
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (optind == argc) {
+ usage(basename(argv[0]));
+ return 1;
+ }
+
+ sysfs_path = argv[optind];
+ if (!sysfs_path) {
+ perror("sysfs");
+ return 1;
+ }
+
+ skel = hid_surface_dial__open();
+ if (!skel) {
+ fprintf(stderr, "%s %s:%d", __func__, __FILE__, __LINE__);
+ return -1;
+ }
+
+ hid_id = get_hid_id(sysfs_path);
+ if (hid_id < 0) {
+ fprintf(stderr, "can not open HID device: %m\n");
+ return 1;
+ }
+
+ skel->struct_ops.surface_dial->hid_id = hid_id;
+
+ err = hid_surface_dial__load(skel);
+ if (err < 0) {
+ fprintf(stderr, "can not load HID-BPF program: %m\n");
+ return 1;
+ }
+
+ skel->data->resolution = resolution;
+ skel->data->physical = (int)(resolution / 72);
+
+ link = bpf_map__attach_struct_ops(skel->maps.surface_dial);
+ if (!link) {
+ fprintf(stderr, "can not attach HID-BPF program: %m\n");
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ set_haptic(skel, hid_id);
+
+ while (running)
+ sleep(1);
+
+ hid_surface_dial__destroy(skel);
+
+ return 0;
+}
diff --git a/samples/hung_task/Makefile b/samples/hung_task/Makefile
new file mode 100644
index 000000000000..86036f1a204d
--- /dev/null
+++ b/samples/hung_task/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task_tests.o
diff --git a/samples/hung_task/hung_task_tests.c b/samples/hung_task/hung_task_tests.c
new file mode 100644
index 000000000000..0360ec916890
--- /dev/null
+++ b/samples/hung_task/hung_task_tests.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * hung_task_tests.c - Sample code for testing hung tasks with mutex,
+ * semaphore, etc.
+ *
+ * Usage: Load this module and read `<debugfs>/hung_task/mutex`,
+ * `<debugfs>/hung_task/semaphore`, `<debugfs>/hung_task/rw_semaphore_read`,
+ * `<debugfs>/hung_task/rw_semaphore_write`, etc., with 2 or more processes.
+ *
+ * This is for testing kernel hung_task error messages with various locking
+ * mechanisms (e.g., mutex, semaphore, rw_semaphore_read, rw_semaphore_write, etc.).
+ * Note that this may freeze your system or cause a panic. Use only for testing purposes.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/rwsem.h>
+
+#define HUNG_TASK_DIR "hung_task"
+#define HUNG_TASK_MUTEX_FILE "mutex"
+#define HUNG_TASK_SEM_FILE "semaphore"
+#define HUNG_TASK_RWSEM_READ_FILE "rw_semaphore_read"
+#define HUNG_TASK_RWSEM_WRITE_FILE "rw_semaphore_write"
+#define SLEEP_SECOND 256
+
+static const char dummy_string[] = "This is a dummy string.";
+static DEFINE_MUTEX(dummy_mutex);
+static DEFINE_SEMAPHORE(dummy_sem, 1);
+static DECLARE_RWSEM(dummy_rwsem);
+static struct dentry *hung_task_dir;
+
+/* Mutex-based read function */
+static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ /* Check if data is already read */
+ if (*ppos >= sizeof(dummy_string))
+ return 0;
+
+ /* Second task waits on mutex, entering uninterruptible sleep */
+ guard(mutex)(&dummy_mutex);
+
+ /* First task sleeps here, interruptible */
+ msleep_interruptible(SLEEP_SECOND * 1000);
+
+ return simple_read_from_buffer(user_buf, count, ppos, dummy_string,
+ sizeof(dummy_string));
+}
+
+/* Semaphore-based read function */
+static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ /* Check if data is already read */
+ if (*ppos >= sizeof(dummy_string))
+ return 0;
+
+ /* Second task waits on semaphore, entering uninterruptible sleep */
+ down(&dummy_sem);
+
+ /* First task sleeps here, interruptible */
+ msleep_interruptible(SLEEP_SECOND * 1000);
+
+ up(&dummy_sem);
+
+ return simple_read_from_buffer(user_buf, count, ppos, dummy_string,
+ sizeof(dummy_string));
+}
+
+/* Read-write semaphore read function */
+static ssize_t read_dummy_rwsem_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ /* Check if data is already read */
+ if (*ppos >= sizeof(dummy_string))
+ return 0;
+
+ /* Acquires read lock, allowing concurrent readers but blocks if write lock is held */
+ down_read(&dummy_rwsem);
+
+ /* Sleeps here, potentially triggering hung task detection if lock is held too long */
+ msleep_interruptible(SLEEP_SECOND * 1000);
+
+ up_read(&dummy_rwsem);
+
+ return simple_read_from_buffer(user_buf, count, ppos, dummy_string,
+ sizeof(dummy_string));
+}
+
+/* Read-write semaphore write function */
+static ssize_t read_dummy_rwsem_write(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ /* Check if data is already read */
+ if (*ppos >= sizeof(dummy_string))
+ return 0;
+
+ /* Acquires exclusive write lock, blocking all other readers and writers */
+ down_write(&dummy_rwsem);
+
+ /* Sleeps here, potentially triggering hung task detection if lock is held too long */
+ msleep_interruptible(SLEEP_SECOND * 1000);
+
+ up_write(&dummy_rwsem);
+
+ return simple_read_from_buffer(user_buf, count, ppos, dummy_string,
+ sizeof(dummy_string));
+}
+
+/* File operations for mutex */
+static const struct file_operations hung_task_mutex_fops = {
+ .read = read_dummy_mutex,
+};
+
+/* File operations for semaphore */
+static const struct file_operations hung_task_sem_fops = {
+ .read = read_dummy_semaphore,
+};
+
+/* File operations for rw_semaphore read */
+static const struct file_operations hung_task_rwsem_read_fops = {
+ .read = read_dummy_rwsem_read,
+};
+
+/* File operations for rw_semaphore write */
+static const struct file_operations hung_task_rwsem_write_fops = {
+ .read = read_dummy_rwsem_write,
+};
+
+static int __init hung_task_tests_init(void)
+{
+ hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL);
+ if (IS_ERR(hung_task_dir))
+ return PTR_ERR(hung_task_dir);
+
+ /* Create debugfs files for mutex and semaphore tests */
+ debugfs_create_file(HUNG_TASK_MUTEX_FILE, 0400, hung_task_dir, NULL,
+ &hung_task_mutex_fops);
+ debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL,
+ &hung_task_sem_fops);
+ debugfs_create_file(HUNG_TASK_RWSEM_READ_FILE, 0400, hung_task_dir, NULL,
+ &hung_task_rwsem_read_fops);
+ debugfs_create_file(HUNG_TASK_RWSEM_WRITE_FILE, 0400, hung_task_dir, NULL,
+ &hung_task_rwsem_write_fops);
+
+ return 0;
+}
+
+static void __exit hung_task_tests_exit(void)
+{
+ debugfs_remove_recursive(hung_task_dir);
+}
+
+module_init(hung_task_tests_init);
+module_exit(hung_task_tests_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Masami Hiramatsu <mhiramat@kernel.org>");
+MODULE_AUTHOR("Zi Li <amaindex@outlook.com>");
+MODULE_DESCRIPTION("Simple sleep under lock files for testing hung task");
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index 418c46fe5ffc..fbb03b66dcbd 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -21,7 +21,7 @@
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
-struct perf_event * __percpu *sample_hbp;
+static struct perf_event * __percpu *sample_hbp;
static char ksym_name[KSYM_NAME_LEN] = "jiffies";
module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
@@ -52,8 +52,8 @@ static int __init hw_break_module_init(void)
attr.bp_type = HW_BREAKPOINT_W;
sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
- if (IS_ERR((void __force *)sample_hbp)) {
- ret = PTR_ERR((void __force *)sample_hbp);
+ if (IS_ERR_PCPU(sample_hbp)) {
+ ret = PTR_ERR_PCPU(sample_hbp);
goto fail;
}
@@ -70,7 +70,9 @@ fail:
static void __exit hw_break_module_exit(void)
{
unregister_wide_hw_breakpoint(sample_hbp);
- symbol_put(ksym_name);
+#ifdef CONFIG_MODULE_UNLOAD
+ __symbol_put(ksym_name);
+#endif
printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
}
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
index 642d0748c169..4ae29a12cc8a 100644
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -191,5 +191,6 @@ static void __exit example_exit(void)
module_init(example_init);
module_exit(example_exit);
+MODULE_DESCRIPTION("Sample kfifo byte stream implementation");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index 0cf27483cb36..8076ac410161 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -6,8 +6,10 @@
*/
#include <linux/init.h>
-#include <linux/module.h>
#include <linux/kfifo.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
/*
* This module shows how to handle fifo dma operations.
@@ -137,5 +139,6 @@ static void __exit example_exit(void)
module_init(example_init);
module_exit(example_exit);
+MODULE_DESCRIPTION("Sample fifo dma implementation");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
index c61482ba94f4..e4f93317c5d0 100644
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -182,5 +182,6 @@ static void __exit example_exit(void)
module_init(example_init);
module_exit(example_exit);
+MODULE_DESCRIPTION("Sample kfifo int type implementation");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
index e4087b2d3fc4..e4d1a2d7983c 100644
--- a/samples/kfifo/record-example.c
+++ b/samples/kfifo/record-example.c
@@ -198,5 +198,6 @@ static void __exit example_exit(void)
module_init(example_init);
module_exit(example_exit);
+MODULE_DESCRIPTION("Sample dynamic sized record fifo implementation");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kmemleak/Makefile b/samples/kmemleak/Makefile
index 16b6132c540c..8a999ab43b6d 100644
--- a/samples/kmemleak/Makefile
+++ b/samples/kmemleak/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
+obj-$(CONFIG_SAMPLE_KMEMLEAK) += kmemleak-test.o
diff --git a/samples/kmemleak/kmemleak-test.c b/samples/kmemleak/kmemleak-test.c
index 7b476eb8285f..8609812a37eb 100644
--- a/samples/kmemleak/kmemleak-test.c
+++ b/samples/kmemleak/kmemleak-test.c
@@ -32,7 +32,7 @@ static DEFINE_PER_CPU(void *, kmemleak_test_pointer);
* Some very simple testing. This function needs to be extended for
* proper testing.
*/
-static int __init kmemleak_test_init(void)
+static int kmemleak_test_init(void)
{
struct test_node *elem;
int i;
@@ -40,25 +40,25 @@ static int __init kmemleak_test_init(void)
pr_info("Kmemleak testing\n");
/* make some orphan objects */
- pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
- pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
- pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
- pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
- pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
- pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
- pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
- pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+ pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL));
+ pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL));
#ifndef CONFIG_MODULES
- pr_info("kmem_cache_alloc(files_cachep) = %p\n",
+ pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n",
kmem_cache_alloc(files_cachep, GFP_KERNEL));
- pr_info("kmem_cache_alloc(files_cachep) = %p\n",
+ pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n",
kmem_cache_alloc(files_cachep, GFP_KERNEL));
#endif
- pr_info("vmalloc(64) = %p\n", vmalloc(64));
- pr_info("vmalloc(64) = %p\n", vmalloc(64));
- pr_info("vmalloc(64) = %p\n", vmalloc(64));
- pr_info("vmalloc(64) = %p\n", vmalloc(64));
- pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = 0x%px\n", vmalloc(64));
+ pr_info("vmalloc(64) = 0x%px\n", vmalloc(64));
+ pr_info("vmalloc(64) = 0x%px\n", vmalloc(64));
+ pr_info("vmalloc(64) = 0x%px\n", vmalloc(64));
+ pr_info("vmalloc(64) = 0x%px\n", vmalloc(64));
/*
* Add elements to a list. They should only appear as orphan
@@ -66,7 +66,7 @@ static int __init kmemleak_test_init(void)
*/
for (i = 0; i < 10; i++) {
elem = kzalloc(sizeof(*elem), GFP_KERNEL);
- pr_info("kzalloc(sizeof(*elem)) = %p\n", elem);
+ pr_info("kzalloc(sizeof(*elem)) = 0x%px\n", elem);
if (!elem)
return -ENOMEM;
INIT_LIST_HEAD(&elem->list);
@@ -75,10 +75,12 @@ static int __init kmemleak_test_init(void)
for_each_possible_cpu(i) {
per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
- pr_info("kmalloc(129) = %p\n",
+ pr_info("kmalloc(129) = 0x%px\n",
per_cpu(kmemleak_test_pointer, i));
}
+ pr_info("__alloc_percpu(64, 4) = 0x%px\n", __alloc_percpu(64, 4));
+
return 0;
}
module_init(kmemleak_test_init);
@@ -96,4 +98,5 @@ static void __exit kmemleak_test_exit(void)
}
module_exit(kmemleak_test_exit);
+MODULE_DESCRIPTION("Sample module to leak memory for kmemleak testing");
MODULE_LICENSE("GPL");
diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
index 96678ed73216..36d87ca0bee2 100644
--- a/samples/kobject/kobject-example.c
+++ b/samples/kobject/kobject-example.c
@@ -13,7 +13,7 @@
/*
* This module shows how to create a simple subdirectory in sysfs called
- * /sys/kernel/kobject-example In that directory, 3 files are created:
+ * /sys/kernel/kobject_example In that directory, 3 files are created:
* "foo", "baz", and "bar". If an integer is written to these files, it can be
* later read out of it.
*/
@@ -102,7 +102,7 @@ static struct attribute *attrs[] = {
* created for the attributes with the directory being the name of the
* attribute group.
*/
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = attrs,
};
@@ -140,5 +140,6 @@ static void __exit example_exit(void)
module_init(example_init);
module_exit(example_exit);
+MODULE_DESCRIPTION("Sample kobject implementation");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>");
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 52f1acabd479..d0103904e5dd 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -14,8 +14,8 @@
/*
* This module shows how to create a kset in sysfs called
- * /sys/kernel/kset-example
- * Then tree kobjects are created and assigned to this kset, "foo", "baz",
+ * /sys/kernel/kset_example
+ * Then three kobjects are created and assigned to this kset, "foo", "baz",
* and "bar". In those kobjects, attributes of the same name are also
* created and if an integer is written to these files, it can be later
* read out of it.
@@ -37,10 +37,11 @@ struct foo_obj {
/* a custom attribute that works just for a struct foo_obj. */
struct foo_attribute {
struct attribute attr;
- ssize_t (*show)(struct foo_obj *foo, struct foo_attribute *attr, char *buf);
- ssize_t (*store)(struct foo_obj *foo, struct foo_attribute *attr, const char *buf, size_t count);
+ ssize_t (*show)(struct foo_obj *foo, const struct foo_attribute *attr, char *buf);
+ ssize_t (*store)(struct foo_obj *foo, const struct foo_attribute *attr,
+ const char *buf, size_t count);
};
-#define to_foo_attr(x) container_of(x, struct foo_attribute, attr)
+#define to_foo_attr(x) container_of_const(x, struct foo_attribute, attr)
/*
* The default show function that must be passed to sysfs. This will be
@@ -53,7 +54,7 @@ static ssize_t foo_attr_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
{
- struct foo_attribute *attribute;
+ const struct foo_attribute *attribute;
struct foo_obj *foo;
attribute = to_foo_attr(attr);
@@ -73,7 +74,7 @@ static ssize_t foo_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
{
- struct foo_attribute *attribute;
+ const struct foo_attribute *attribute;
struct foo_obj *foo;
attribute = to_foo_attr(attr);
@@ -109,13 +110,13 @@ static void foo_release(struct kobject *kobj)
/*
* The "foo" file where the .foo variable is read from and written to.
*/
-static ssize_t foo_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
+static ssize_t foo_show(struct foo_obj *foo_obj, const struct foo_attribute *attr,
char *buf)
{
return sysfs_emit(buf, "%d\n", foo_obj->foo);
}
-static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
+static ssize_t foo_store(struct foo_obj *foo_obj, const struct foo_attribute *attr,
const char *buf, size_t count)
{
int ret;
@@ -128,14 +129,14 @@ static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
}
/* Sysfs attributes cannot be world-writable. */
-static struct foo_attribute foo_attribute =
+static const struct foo_attribute foo_attribute =
__ATTR(foo, 0664, foo_show, foo_store);
/*
* More complex function where we determine which variable is being accessed by
* looking at the attribute for the "baz" and "bar" files.
*/
-static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
+static ssize_t b_show(struct foo_obj *foo_obj, const struct foo_attribute *attr,
char *buf)
{
int var;
@@ -147,7 +148,7 @@ static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
return sysfs_emit(buf, "%d\n", var);
}
-static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
+static ssize_t b_store(struct foo_obj *foo_obj, const struct foo_attribute *attr,
const char *buf, size_t count)
{
int var, ret;
@@ -163,29 +164,44 @@ static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
return count;
}
-static struct foo_attribute baz_attribute =
+static const struct foo_attribute baz_attribute =
__ATTR(baz, 0664, b_show, b_store);
-static struct foo_attribute bar_attribute =
+static const struct foo_attribute bar_attribute =
__ATTR(bar, 0664, b_show, b_store);
/*
* Create a group of attributes so that we can create and destroy them all
* at once.
*/
-static struct attribute *foo_default_attrs[] = {
+static const struct attribute *const foo_default_attrs[] = {
&foo_attribute.attr,
&baz_attribute.attr,
&bar_attribute.attr,
NULL, /* need to NULL terminate the list of attributes */
};
-ATTRIBUTE_GROUPS(foo_default);
+
+static umode_t foo_default_attrs_is_visible(struct kobject *kobj,
+ const struct attribute *attr,
+ int n)
+{
+ /* Hide attributes with the same name as the kobject. */
+ if (strcmp(kobject_name(kobj), attr->name) == 0)
+ return 0;
+ return attr->mode;
+}
+
+static const struct attribute_group foo_default_group = {
+ .attrs_const = foo_default_attrs,
+ .is_visible_const = foo_default_attrs_is_visible,
+};
+__ATTRIBUTE_GROUPS(foo_default);
/*
* Our own ktype for our kobjects. Here we specify our sysfs ops, the
* release function, and the set of default attributes we want created
* whenever a kobject of this type is registered with the kernel.
*/
-static struct kobj_type foo_ktype = {
+static const struct kobj_type foo_ktype = {
.sysfs_ops = &foo_sysfs_ops,
.release = foo_release,
.default_groups = foo_default_groups,
@@ -284,5 +300,6 @@ static void __exit example_exit(void)
module_init(example_init);
module_exit(example_exit);
+MODULE_DESCRIPTION("Sample kset and ktype implementation");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>");
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index fd346f58ddba..53ec6c8b8c40 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -55,6 +55,10 @@ static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
pr_info("<%s> p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->psw.addr, regs->flags);
#endif
+#ifdef CONFIG_LOONGARCH
+ pr_info("<%s> p->addr = 0x%p, era = 0x%lx, estat = 0x%lx\n",
+ p->symbol_name, p->addr, regs->csr_era, regs->csr_estat);
+#endif
/* A dump_stack() here will give a stack backtrace */
return 0;
@@ -92,6 +96,10 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
pr_info("<%s> p->addr, 0x%p, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->flags);
#endif
+#ifdef CONFIG_LOONGARCH
+ pr_info("<%s> p->addr = 0x%p, estat = 0x%lx\n",
+ p->symbol_name, p->addr, regs->csr_estat);
+#endif
}
static int __init kprobe_init(void)
@@ -117,4 +125,5 @@ static void __exit kprobe_exit(void)
module_init(kprobe_init)
module_exit(kprobe_exit)
+MODULE_DESCRIPTION("sample kernel module showing the use of kprobes");
MODULE_LICENSE("GPL");
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index cbf16542d84e..65d6dcafd742 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -35,7 +35,7 @@ struct my_data {
ktime_t entry_stamp;
};
-/* Here we use the entry_hanlder to timestamp function entry */
+/* Here we use the entry_handler to timestamp function entry */
static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
struct my_data *data;
@@ -104,4 +104,5 @@ static void __exit kretprobe_exit(void)
module_init(kretprobe_init)
module_exit(kretprobe_exit)
+MODULE_DESCRIPTION("sample kernel module showing the use of return probes");
MODULE_LICENSE("GPL");
diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
index e2056c8b902c..e7af02f98208 100644
--- a/samples/landlock/sandboxer.c
+++ b/samples/landlock/sandboxer.c
@@ -1,17 +1,19 @@
// SPDX-License-Identifier: BSD-3-Clause
/*
- * Simple Landlock sandbox manager able to launch a process restricted by a
- * user-defined filesystem access control policy.
+ * Simple Landlock sandbox manager able to execute a process restricted by
+ * user-defined file system and network access control policies.
*
* Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2020 ANSSI
*/
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/landlock.h>
-#include <linux/prctl.h>
+#include <linux/socket.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
@@ -20,6 +22,11 @@
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
+#include <stdbool.h>
+
+#if defined(__GLIBC__)
+#include <linux/prctl.h>
+#endif
#ifndef landlock_create_ruleset
static inline int
@@ -51,7 +58,30 @@ static inline int landlock_restrict_self(const int ruleset_fd,
#define ENV_FS_RO_NAME "LL_FS_RO"
#define ENV_FS_RW_NAME "LL_FS_RW"
-#define ENV_PATH_TOKEN ":"
+#define ENV_TCP_BIND_NAME "LL_TCP_BIND"
+#define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT"
+#define ENV_SCOPED_NAME "LL_SCOPED"
+#define ENV_FORCE_LOG_NAME "LL_FORCE_LOG"
+#define ENV_DELIMITER ":"
+
+static int str2num(const char *numstr, __u64 *num_dst)
+{
+ char *endptr = NULL;
+ int err = 0;
+ __u64 num;
+
+ errno = 0;
+ num = strtoull(numstr, &endptr, 10);
+ if (errno != 0)
+ err = errno;
+ /* Was the string empty, or not entirely parsed successfully? */
+ else if ((*numstr == '\0') || (*endptr != '\0'))
+ err = EINVAL;
+ else
+ *num_dst = num;
+
+ return err;
+}
static int parse_path(char *env_path, const char ***const path_list)
{
@@ -60,13 +90,16 @@ static int parse_path(char *env_path, const char ***const path_list)
if (env_path) {
num_paths++;
for (i = 0; env_path[i]; i++) {
- if (env_path[i] == ENV_PATH_TOKEN[0])
+ if (env_path[i] == ENV_DELIMITER[0])
num_paths++;
}
}
*path_list = malloc(num_paths * sizeof(**path_list));
+ if (!*path_list)
+ return -1;
+
for (i = 0; i < num_paths; i++)
- (*path_list)[i] = strsep(&env_path, ENV_PATH_TOKEN);
+ (*path_list)[i] = strsep(&env_path, ENV_DELIMITER);
return num_paths;
}
@@ -77,12 +110,13 @@ static int parse_path(char *env_path, const char ***const path_list)
LANDLOCK_ACCESS_FS_EXECUTE | \
LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_READ_FILE | \
- LANDLOCK_ACCESS_FS_TRUNCATE)
+ LANDLOCK_ACCESS_FS_TRUNCATE | \
+ LANDLOCK_ACCESS_FS_IOCTL_DEV)
/* clang-format on */
-static int populate_ruleset(const char *const env_var, const int ruleset_fd,
- const __u64 allowed_access)
+static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd,
+ const __u64 allowed_access)
{
int num_paths, i, ret = 1;
char *env_path_name;
@@ -100,6 +134,10 @@ static int populate_ruleset(const char *const env_var, const int ruleset_fd,
env_path_name = strdup(env_path_name);
unsetenv(env_var);
num_paths = parse_path(env_path_name, &path_list);
+ if (num_paths < 0) {
+ fprintf(stderr, "Failed to allocate memory\n");
+ goto out_free_name;
+ }
if (num_paths == 1 && path_list[0][0] == '\0') {
/*
* Allows to not use all possible restrictions (e.g. use
@@ -116,9 +154,11 @@ static int populate_ruleset(const char *const env_var, const int ruleset_fd,
if (path_beneath.parent_fd < 0) {
fprintf(stderr, "Failed to open \"%s\": %s\n",
path_list[i], strerror(errno));
- goto out_free_name;
+ continue;
}
if (fstat(path_beneath.parent_fd, &statbuf)) {
+ fprintf(stderr, "Failed to stat \"%s\": %s\n",
+ path_list[i], strerror(errno));
close(path_beneath.parent_fd);
goto out_free_name;
}
@@ -143,6 +183,98 @@ out_free_name:
return ret;
}
+static int populate_ruleset_net(const char *const env_var, const int ruleset_fd,
+ const __u64 allowed_access)
+{
+ int ret = 1;
+ char *env_port_name, *env_port_name_next, *strport;
+ struct landlock_net_port_attr net_port = {
+ .allowed_access = allowed_access,
+ };
+
+ env_port_name = getenv(env_var);
+ if (!env_port_name)
+ return 0;
+ env_port_name = strdup(env_port_name);
+ unsetenv(env_var);
+
+ env_port_name_next = env_port_name;
+ while ((strport = strsep(&env_port_name_next, ENV_DELIMITER))) {
+ __u64 port;
+
+ if (strcmp(strport, "") == 0)
+ continue;
+
+ if (str2num(strport, &port)) {
+ fprintf(stderr, "Failed to parse port at \"%s\"\n",
+ strport);
+ goto out_free_name;
+ }
+ net_port.port = port;
+ if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0)) {
+ fprintf(stderr,
+ "Failed to update the ruleset with port \"%llu\": %s\n",
+ net_port.port, strerror(errno));
+ goto out_free_name;
+ }
+ }
+ ret = 0;
+
+out_free_name:
+ free(env_port_name);
+ return ret;
+}
+
+/* Returns true on error, false otherwise. */
+static bool check_ruleset_scope(const char *const env_var,
+ struct landlock_ruleset_attr *ruleset_attr)
+{
+ char *env_type_scope, *env_type_scope_next, *ipc_scoping_name;
+ bool error = false;
+ bool abstract_scoping = false;
+ bool signal_scoping = false;
+
+ /* Scoping is not supported by Landlock ABI */
+ if (!(ruleset_attr->scoped &
+ (LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL)))
+ goto out_unset;
+
+ env_type_scope = getenv(env_var);
+ /* Scoping is not supported by the user */
+ if (!env_type_scope || strcmp("", env_type_scope) == 0)
+ goto out_unset;
+
+ env_type_scope = strdup(env_type_scope);
+ env_type_scope_next = env_type_scope;
+ while ((ipc_scoping_name =
+ strsep(&env_type_scope_next, ENV_DELIMITER))) {
+ if (strcmp("a", ipc_scoping_name) == 0 && !abstract_scoping) {
+ abstract_scoping = true;
+ } else if (strcmp("s", ipc_scoping_name) == 0 &&
+ !signal_scoping) {
+ signal_scoping = true;
+ } else {
+ fprintf(stderr, "Unknown or duplicate scope \"%s\"\n",
+ ipc_scoping_name);
+ error = true;
+ goto out_free_name;
+ }
+ }
+
+out_free_name:
+ free(env_type_scope);
+
+out_unset:
+ if (!abstract_scoping)
+ ruleset_attr->scoped &= ~LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET;
+ if (!signal_scoping)
+ ruleset_attr->scoped &= ~LANDLOCK_SCOPE_SIGNAL;
+
+ unsetenv(env_var);
+ return error;
+}
+
/* clang-format off */
#define ACCESS_FS_ROUGHLY_READ ( \
@@ -162,47 +294,75 @@ out_free_name:
LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
LANDLOCK_ACCESS_FS_MAKE_SYM | \
LANDLOCK_ACCESS_FS_REFER | \
- LANDLOCK_ACCESS_FS_TRUNCATE)
+ LANDLOCK_ACCESS_FS_TRUNCATE | \
+ LANDLOCK_ACCESS_FS_IOCTL_DEV)
/* clang-format on */
-#define LANDLOCK_ABI_LAST 3
+#define LANDLOCK_ABI_LAST 7
+
+#define XSTR(s) #s
+#define STR(s) XSTR(s)
+
+/* clang-format off */
+
+static const char help[] =
+ "usage: " ENV_FS_RO_NAME "=\"...\" " ENV_FS_RW_NAME "=\"...\" "
+ "[other environment variables] %1$s <cmd> [args]...\n"
+ "\n"
+ "Execute the given command in a restricted environment.\n"
+ "Multi-valued settings (lists of ports, paths, scopes) are colon-delimited.\n"
+ "\n"
+ "Mandatory settings:\n"
+ "* " ENV_FS_RO_NAME ": paths allowed to be used in a read-only way\n"
+ "* " ENV_FS_RW_NAME ": paths allowed to be used in a read-write way\n"
+ "\n"
+ "Optional settings (when not set, their associated access check "
+ "is always allowed, which is different from an empty string which "
+ "means an empty list):\n"
+ "* " ENV_TCP_BIND_NAME ": ports allowed to bind (server)\n"
+ "* " ENV_TCP_CONNECT_NAME ": ports allowed to connect (client)\n"
+ "* " ENV_SCOPED_NAME ": actions denied on the outside of the landlock domain\n"
+ " - \"a\" to restrict opening abstract unix sockets\n"
+ " - \"s\" to restrict sending signals\n"
+ "\n"
+ "A sandboxer should not log denied access requests to avoid spamming logs, "
+ "but to test audit we can set " ENV_FORCE_LOG_NAME "=1\n"
+ "\n"
+ "Example:\n"
+ ENV_FS_RO_NAME "=\"${PATH}:/lib:/usr:/proc:/etc:/dev/urandom\" "
+ ENV_FS_RW_NAME "=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
+ ENV_TCP_BIND_NAME "=\"9418\" "
+ ENV_TCP_CONNECT_NAME "=\"80:443\" "
+ ENV_SCOPED_NAME "=\"a:s\" "
+ "%1$s bash -i\n"
+ "\n"
+ "This sandboxer can use Landlock features up to ABI version "
+ STR(LANDLOCK_ABI_LAST) ".\n";
+
+/* clang-format on */
int main(const int argc, char *const argv[], char *const *const envp)
{
const char *cmd_path;
char *const *cmd_argv;
int ruleset_fd, abi;
+ char *env_port_name, *env_force_log;
__u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ,
access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE;
+
struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = access_fs_rw,
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
+ LANDLOCK_SCOPE_SIGNAL,
};
+ int supported_restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
+ int set_restrict_flags = 0;
if (argc < 2) {
- fprintf(stderr,
- "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
- ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
- fprintf(stderr,
- "Launch a command in a restricted environment.\n\n");
- fprintf(stderr, "Environment variables containing paths, "
- "each separated by a colon:\n");
- fprintf(stderr,
- "* %s: list of paths allowed to be used in a read-only way.\n",
- ENV_FS_RO_NAME);
- fprintf(stderr,
- "* %s: list of paths allowed to be used in a read-write way.\n",
- ENV_FS_RW_NAME);
- fprintf(stderr,
- "\nexample:\n"
- "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
- "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
- "%s bash -i\n\n",
- ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
- fprintf(stderr,
- "This sandboxer can use Landlock features "
- "up to ABI version %d.\n",
- LANDLOCK_ABI_LAST);
+ fprintf(stderr, help, argv[0]);
return 1;
}
@@ -255,7 +415,29 @@ int main(const int argc, char *const argv[], char *const *const envp)
case 2:
/* Removes LANDLOCK_ACCESS_FS_TRUNCATE for ABI < 3 */
ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_TRUNCATE;
+ __attribute__((fallthrough));
+ case 3:
+ /* Removes network support for ABI < 4 */
+ ruleset_attr.handled_access_net &=
+ ~(LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP);
+ __attribute__((fallthrough));
+ case 4:
+ /* Removes LANDLOCK_ACCESS_FS_IOCTL_DEV for ABI < 5 */
+ ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ __attribute__((fallthrough));
+ case 5:
+ /* Removes LANDLOCK_SCOPE_* for ABI < 6 */
+ ruleset_attr.scoped &= ~(LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
+ LANDLOCK_SCOPE_SIGNAL);
+ __attribute__((fallthrough));
+ case 6:
+ /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */
+ supported_restrict_flags &=
+ ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
+
+ /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
fprintf(stderr,
"Hint: You should update the running kernel "
"to leverage Landlock features "
@@ -274,23 +456,68 @@ int main(const int argc, char *const argv[], char *const *const envp)
access_fs_ro &= ruleset_attr.handled_access_fs;
access_fs_rw &= ruleset_attr.handled_access_fs;
+ /* Removes bind access attribute if not supported by a user. */
+ env_port_name = getenv(ENV_TCP_BIND_NAME);
+ if (!env_port_name) {
+ ruleset_attr.handled_access_net &=
+ ~LANDLOCK_ACCESS_NET_BIND_TCP;
+ }
+ /* Removes connect access attribute if not supported by a user. */
+ env_port_name = getenv(ENV_TCP_CONNECT_NAME);
+ if (!env_port_name) {
+ ruleset_attr.handled_access_net &=
+ ~LANDLOCK_ACCESS_NET_CONNECT_TCP;
+ }
+
+ if (check_ruleset_scope(ENV_SCOPED_NAME, &ruleset_attr))
+ return 1;
+
+ /* Enables optional logs. */
+ env_force_log = getenv(ENV_FORCE_LOG_NAME);
+ if (env_force_log) {
+ if (strcmp(env_force_log, "1") != 0) {
+ fprintf(stderr, "Unknown value for " ENV_FORCE_LOG_NAME
+ " (only \"1\" is handled)\n");
+ return 1;
+ }
+ if (!(supported_restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON)) {
+ fprintf(stderr,
+ "Audit logs not supported by current kernel\n");
+ return 1;
+ }
+ set_restrict_flags |= LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
+ unsetenv(ENV_FORCE_LOG_NAME);
+ }
+
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
if (ruleset_fd < 0) {
perror("Failed to create a ruleset");
return 1;
}
- if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) {
+
+ if (populate_ruleset_fs(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) {
+ goto err_close_ruleset;
+ }
+ if (populate_ruleset_fs(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) {
goto err_close_ruleset;
}
- if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) {
+
+ if (populate_ruleset_net(ENV_TCP_BIND_NAME, ruleset_fd,
+ LANDLOCK_ACCESS_NET_BIND_TCP)) {
goto err_close_ruleset;
}
+ if (populate_ruleset_net(ENV_TCP_CONNECT_NAME, ruleset_fd,
+ LANDLOCK_ACCESS_NET_CONNECT_TCP)) {
+ goto err_close_ruleset;
+ }
+
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
perror("Failed to restrict privileges");
goto err_close_ruleset;
}
- if (landlock_restrict_self(ruleset_fd, 0)) {
+ if (landlock_restrict_self(ruleset_fd, set_restrict_flags)) {
perror("Failed to enforce ruleset");
goto err_close_ruleset;
}
@@ -298,6 +525,7 @@ int main(const int argc, char *const argv[], char *const *const envp)
cmd_path = argv[1];
cmd_argv = argv + 1;
+ fprintf(stderr, "Executing the sandboxed command...\n");
execvpe(cmd_path, cmd_argv, envp);
fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path,
strerror(errno));
diff --git a/samples/livepatch/livepatch-callbacks-busymod.c b/samples/livepatch/livepatch-callbacks-busymod.c
index 378e2d40271a..fadc2a85cb35 100644
--- a/samples/livepatch/livepatch-callbacks-busymod.c
+++ b/samples/livepatch/livepatch-callbacks-busymod.c
@@ -44,8 +44,7 @@ static void busymod_work_func(struct work_struct *work)
static int livepatch_callbacks_mod_init(void)
{
pr_info("%s\n", __func__);
- schedule_delayed_work(&work,
- msecs_to_jiffies(1000 * 0));
+ schedule_delayed_work(&work, 0);
return 0;
}
@@ -57,4 +56,5 @@ static void livepatch_callbacks_mod_exit(void)
module_init(livepatch_callbacks_mod_init);
module_exit(livepatch_callbacks_mod_exit);
+MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module");
MODULE_LICENSE("GPL");
diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c
index 11c3f4357812..9e69d9caed25 100644
--- a/samples/livepatch/livepatch-callbacks-demo.c
+++ b/samples/livepatch/livepatch-callbacks-demo.c
@@ -192,5 +192,6 @@ static void livepatch_callbacks_demo_exit(void)
module_init(livepatch_callbacks_demo_init);
module_exit(livepatch_callbacks_demo_exit);
+MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks");
MODULE_LICENSE("GPL");
MODULE_INFO(livepatch, "Y");
diff --git a/samples/livepatch/livepatch-callbacks-mod.c b/samples/livepatch/livepatch-callbacks-mod.c
index 2a074f422a51..d1851b471ad9 100644
--- a/samples/livepatch/livepatch-callbacks-mod.c
+++ b/samples/livepatch/livepatch-callbacks-mod.c
@@ -38,4 +38,5 @@ static void livepatch_callbacks_mod_exit(void)
module_init(livepatch_callbacks_mod_init);
module_exit(livepatch_callbacks_mod_exit);
+MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module");
MODULE_LICENSE("GPL");
diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c
index cd76d7ebe598..5263a2f31c48 100644
--- a/samples/livepatch/livepatch-sample.c
+++ b/samples/livepatch/livepatch-sample.c
@@ -66,5 +66,6 @@ static void livepatch_exit(void)
module_init(livepatch_init);
module_exit(livepatch_exit);
+MODULE_DESCRIPTION("Kernel Live Patching Sample Module");
MODULE_LICENSE("GPL");
MODULE_INFO(livepatch, "Y");
diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c
index 6701641bf12d..cbf68ca40097 100644
--- a/samples/livepatch/livepatch-shadow-fix1.c
+++ b/samples/livepatch/livepatch-shadow-fix1.c
@@ -72,8 +72,7 @@ static struct dummy *livepatch_fix1_dummy_alloc(void)
if (!d)
return NULL;
- d->jiffies_expire = jiffies +
- msecs_to_jiffies(1000 * EXPIRE_PERIOD);
+ d->jiffies_expire = jiffies + secs_to_jiffies(EXPIRE_PERIOD);
/*
* Patch: save the extra memory location into a SV_LEAK shadow
@@ -169,5 +168,6 @@ static void livepatch_shadow_fix1_exit(void)
module_init(livepatch_shadow_fix1_init);
module_exit(livepatch_shadow_fix1_exit);
+MODULE_DESCRIPTION("Live patching demo for shadow variables");
MODULE_LICENSE("GPL");
MODULE_INFO(livepatch, "Y");
diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c
index 361046a4f10c..b99122cb221f 100644
--- a/samples/livepatch/livepatch-shadow-fix2.c
+++ b/samples/livepatch/livepatch-shadow-fix2.c
@@ -128,5 +128,6 @@ static void livepatch_shadow_fix2_exit(void)
module_init(livepatch_shadow_fix2_init);
module_exit(livepatch_shadow_fix2_exit);
+MODULE_DESCRIPTION("Live patching demo for shadow variables");
MODULE_LICENSE("GPL");
MODULE_INFO(livepatch, "Y");
diff --git a/samples/livepatch/livepatch-shadow-mod.c b/samples/livepatch/livepatch-shadow-mod.c
index 7e753b0d2fa6..5d83ad5a8118 100644
--- a/samples/livepatch/livepatch-shadow-mod.c
+++ b/samples/livepatch/livepatch-shadow-mod.c
@@ -101,8 +101,7 @@ static __used noinline struct dummy *dummy_alloc(void)
if (!d)
return NULL;
- d->jiffies_expire = jiffies +
- msecs_to_jiffies(1000 * EXPIRE_PERIOD);
+ d->jiffies_expire = jiffies + secs_to_jiffies(EXPIRE_PERIOD);
/* Oops, forgot to save leak! */
leak = kzalloc(sizeof(*leak), GFP_KERNEL);
@@ -152,8 +151,7 @@ static void alloc_work_func(struct work_struct *work)
list_add(&d->list, &dummy_list);
mutex_unlock(&dummy_list_mutex);
- schedule_delayed_work(&alloc_dwork,
- msecs_to_jiffies(1000 * ALLOC_PERIOD));
+ schedule_delayed_work(&alloc_dwork, secs_to_jiffies(ALLOC_PERIOD));
}
/*
@@ -184,16 +182,13 @@ static void cleanup_work_func(struct work_struct *work)
}
mutex_unlock(&dummy_list_mutex);
- schedule_delayed_work(&cleanup_dwork,
- msecs_to_jiffies(1000 * CLEANUP_PERIOD));
+ schedule_delayed_work(&cleanup_dwork, secs_to_jiffies(CLEANUP_PERIOD));
}
static int livepatch_shadow_mod_init(void)
{
- schedule_delayed_work(&alloc_dwork,
- msecs_to_jiffies(1000 * ALLOC_PERIOD));
- schedule_delayed_work(&cleanup_dwork,
- msecs_to_jiffies(1000 * CLEANUP_PERIOD));
+ schedule_delayed_work(&alloc_dwork, secs_to_jiffies(ALLOC_PERIOD));
+ schedule_delayed_work(&cleanup_dwork, secs_to_jiffies(CLEANUP_PERIOD));
return 0;
}
diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c
index 867debd3b912..1d7254bcb44c 100644
--- a/samples/mei/mei-amt-version.c
+++ b/samples/mei/mei-amt-version.c
@@ -69,11 +69,11 @@
#include <string.h>
#include <fcntl.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
#include <unistd.h>
#include <errno.h>
#include <stdint.h>
#include <stdbool.h>
-#include <bits/wordsize.h>
#include <linux/mei.h>
/*****************************************************************************
diff --git a/samples/pfsm/.gitignore b/samples/pfsm/.gitignore
new file mode 100644
index 000000000000..f350a030a060
--- /dev/null
+++ b/samples/pfsm/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+/pfsm-wakeup
diff --git a/samples/pfsm/Makefile b/samples/pfsm/Makefile
new file mode 100644
index 000000000000..213e8d9f5dbc
--- /dev/null
+++ b/samples/pfsm/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+userprogs-always-y += pfsm-wakeup
+
+userccflags += -I usr/include
diff --git a/samples/pfsm/pfsm-wakeup.c b/samples/pfsm/pfsm-wakeup.c
new file mode 100644
index 000000000000..299dd9e1f607
--- /dev/null
+++ b/samples/pfsm/pfsm-wakeup.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * TPS6594 PFSM userspace example
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ *
+ * This example shows how to use PFSMs from a userspace application,
+ * on TI j721s2 platform. The PMIC is armed to be triggered by a RTC
+ * alarm to execute state transition (RETENTION to ACTIVE).
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <linux/rtc.h>
+#include <linux/tps6594_pfsm.h>
+
+#define ALARM_DELTA_SEC 30
+
+#define RTC_A "/dev/rtc0"
+
+#define PMIC_NB 3
+#define PMIC_A "/dev/pfsm-0-0x48"
+#define PMIC_B "/dev/pfsm-0-0x4c"
+#define PMIC_C "/dev/pfsm-2-0x58"
+
+static const char * const dev_pfsm[] = {PMIC_A, PMIC_B, PMIC_C};
+
+int main(int argc, char *argv[])
+{
+ int i, ret, fd_rtc, fd_pfsm[PMIC_NB] = { 0 };
+ struct rtc_time rtc_tm;
+ struct pmic_state_opt pmic_opt = { 0 };
+ unsigned long data;
+
+ fd_rtc = open(RTC_A, O_RDONLY);
+ if (fd_rtc < 0) {
+ perror("Failed to open RTC device.");
+ goto out;
+ }
+
+ for (i = 0 ; i < PMIC_NB ; i++) {
+ fd_pfsm[i] = open(dev_pfsm[i], O_RDWR);
+ if (fd_pfsm[i] < 0) {
+ perror("Failed to open PFSM device.");
+ goto out;
+ }
+ }
+
+ /* Read RTC date/time */
+ ret = ioctl(fd_rtc, RTC_RD_TIME, &rtc_tm);
+ if (ret < 0) {
+ perror("Failed to read RTC date/time.");
+ goto out;
+ }
+ printf("Current RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+ rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+ rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+ /* Set RTC alarm to ALARM_DELTA_SEC sec in the future, and check for rollover */
+ rtc_tm.tm_sec += ALARM_DELTA_SEC;
+ if (rtc_tm.tm_sec >= 60) {
+ rtc_tm.tm_sec %= 60;
+ rtc_tm.tm_min++;
+ }
+ if (rtc_tm.tm_min == 60) {
+ rtc_tm.tm_min = 0;
+ rtc_tm.tm_hour++;
+ }
+ if (rtc_tm.tm_hour == 24)
+ rtc_tm.tm_hour = 0;
+ ret = ioctl(fd_rtc, RTC_ALM_SET, &rtc_tm);
+ if (ret < 0) {
+ perror("Failed to set RTC alarm.");
+ goto out;
+ }
+
+ /* Enable alarm interrupts */
+ ret = ioctl(fd_rtc, RTC_AIE_ON, 0);
+ if (ret < 0) {
+ perror("Failed to enable alarm interrupts.");
+ goto out;
+ }
+ printf("Waiting %d seconds for alarm...\n", ALARM_DELTA_SEC);
+
+ /*
+ * Set RETENTION state with options for PMIC_C/B/A respectively.
+ * Since PMIC_A is master, it should be the last one to be configured.
+ */
+ pmic_opt.ddr_retention = 1;
+ for (i = PMIC_NB - 1 ; i >= 0 ; i--) {
+ printf("Set RETENTION state for PMIC_%d.\n", i);
+ sleep(1);
+ ret = ioctl(fd_pfsm[i], PMIC_SET_RETENTION_STATE, &pmic_opt);
+ if (ret < 0) {
+ perror("Failed to set RETENTION state.");
+ goto out_reset;
+ }
+ }
+
+ /* This blocks until the alarm ring causes an interrupt */
+ ret = read(fd_rtc, &data, sizeof(unsigned long));
+ if (ret < 0)
+ perror("Failed to get RTC alarm.");
+ else
+ puts("Alarm rang.\n");
+
+out_reset:
+ ioctl(fd_rtc, RTC_AIE_OFF, 0);
+
+ /* Set ACTIVE state for PMIC_A */
+ ioctl(fd_pfsm[0], PMIC_SET_ACTIVE_STATE, 0);
+
+out:
+ for (i = 0 ; i < PMIC_NB ; i++)
+ if (fd_pfsm[i])
+ close(fd_pfsm[i]);
+
+ if (fd_rtc)
+ close(fd_rtc);
+
+ return 0;
+}
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index dd4e53ae9b73..c08cefb8eb1f 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -108,12 +108,13 @@ function pgset() {
fi
}
-if [[ -z "$APPEND" ]]; then
- if [[ $EUID -eq 0 ]]; then
- # Cleanup pktgen setup on exit if thats not "append mode"
- trap 'pg_ctrl "reset"' EXIT
- fi
-fi
+function trap_exit()
+{
+ # Cleanup pktgen setup on exit if thats not "append mode"
+ if [[ -z "$APPEND" ]] && [[ $EUID -eq 0 ]]; then
+ trap 'pg_ctrl "reset"' EXIT
+ fi
+}
## -- General shell tricks --
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index 99ec0688b044..b4328db4a164 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -33,6 +33,10 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
+
+# Trap EXIT first
+trap_exit
+
# Using invalid DST_MAC will cause the packets to get dropped in
# ip_rcv() which is part of the test
if [ -z "$DEST_IP" ]; then
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index 04b0dd0c36d6..f2beb512c5cd 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -14,6 +14,10 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
+
+# Trap EXIT first
+trap_exit
+
if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
fi
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index 09a92ea963f9..66cb707479e6 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -13,6 +13,10 @@ root_check_run_with_sudo "$@"
# - go look in parameters.sh to see which setting are avail
# - required param is the interface "-i" stored in $DEV
source ${basedir}/parameters.sh
+
+# Trap EXIT first
+trap_exit
+
#
# Set some default params, if they didn't get set
if [ -z "$DEST_IP" ]; then
@@ -72,7 +76,7 @@ if [ -n "$DST_PORT" ]; then
pg_set $DEV "udp_dst_max $UDP_DST_MAX"
fi
-[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+[ ! -z "$UDP_CSUM" ] && pg_set $DEV "flag UDPCSUM"
# Setup random UDP port src range
pg_set $DEV "flag UDPSRC_RND"
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index 7fa41c84c32f..93f33d7d0a81 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -14,6 +14,9 @@ root_check_run_with_sudo "$@"
# Required param: -i dev in $DEV
source ${basedir}/parameters.sh
+# Trap EXIT first
+trap_exit
+
[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
# Base Config
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index 8bf2fdffba16..8f8ed1ac46a0 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -25,6 +25,10 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
+
+# Trap EXIT first
+trap_exit
+
# Set some default params, if they didn't get set
if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index cff51f861506..65ed486ce4f1 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -12,6 +12,10 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
+
+# Trap EXIT first
+trap_exit
+
# Set some default params, if they didn't get set
if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 3578d0aa4ac5..bcbc386b2284 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -16,6 +16,10 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
+
+# Trap EXIT first
+trap_exit
+
# Set some default params, if they didn't get set
if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 264cc5db9c49..0c5409cb5bab 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -14,6 +14,9 @@ root_check_run_with_sudo "$@"
# Required param: -i dev in $DEV
source ${basedir}/parameters.sh
+# Trap EXIT first
+trap_exit
+
# Base Config
[ -z "$COUNT" ] && COUNT="20000000" # Zero means indefinitely
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c
index c045e3d24326..d1814582319b 100644
--- a/samples/qmi/qmi_sample_client.c
+++ b/samples/qmi/qmi_sample_client.c
@@ -468,7 +468,7 @@ static int qmi_sample_probe(struct platform_device *pdev)
return ret;
sq = dev_get_platdata(&pdev->dev);
- ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq,
+ ret = kernel_connect(sample->qmi.sock, (struct sockaddr_unsized *)sq,
sizeof(*sq), 0);
if (ret < 0) {
pr_err("failed to connect to remote service port\n");
@@ -511,7 +511,7 @@ err_release_qmi_handle:
return ret;
}
-static int qmi_sample_remove(struct platform_device *pdev)
+static void qmi_sample_remove(struct platform_device *pdev)
{
struct qmi_sample *sample = platform_get_drvdata(pdev);
@@ -520,8 +520,6 @@ static int qmi_sample_remove(struct platform_device *pdev)
debugfs_remove(sample->de_dir);
qmi_handle_release(&sample->qmi);
-
- return 0;
}
static struct platform_driver qmi_sample_driver = {
diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig
index b0f74a81c8f9..3efa51bfc8ef 100644
--- a/samples/rust/Kconfig
+++ b/samples/rust/Kconfig
@@ -10,6 +10,17 @@ menuconfig SAMPLES_RUST
if SAMPLES_RUST
+config SAMPLE_RUST_CONFIGFS
+ tristate "Configfs sample"
+ depends on CONFIGFS_FS
+ help
+ This option builds the Rust configfs sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_configfs.
+
+ If unsure, say N.
+
config SAMPLE_RUST_MINIMAL
tristate "Minimal"
help
@@ -20,6 +31,16 @@ config SAMPLE_RUST_MINIMAL
If unsure, say N.
+config SAMPLE_RUST_MISC_DEVICE
+ tristate "Misc device"
+ help
+ This option builds the Rust misc device.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_misc_device.
+
+ If unsure, say N.
+
config SAMPLE_RUST_PRINT
tristate "Printing macros"
help
@@ -30,6 +51,116 @@ config SAMPLE_RUST_PRINT
If unsure, say N.
+config SAMPLE_RUST_DMA
+ tristate "DMA Test Driver"
+ depends on PCI
+ help
+ This option builds the Rust DMA Test driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_dma.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DEBUGFS
+ tristate "DebugFS Test Module"
+ depends on DEBUG_FS
+ help
+ This option builds the Rust DebugFS Test module sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_debugfs.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DEBUGFS_SCOPED
+ tristate "Scoped DebugFS Test Module"
+ depends on DEBUG_FS
+ help
+ This option builds the Rust Scoped DebugFS Test module sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_debugfs_scoped.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DRIVER_I2C
+ tristate "I2C Driver"
+ depends on I2C=y
+ help
+ This option builds the Rust I2C driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_driver_i2c.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_I2C_CLIENT
+ tristate "I2C Client Registration"
+ depends on I2C=y
+ help
+ This option builds the Rust I2C client manual creation
+ sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_i2c_client.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DRIVER_PCI
+ tristate "PCI Driver"
+ depends on PCI
+ help
+ This option builds the Rust PCI driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_driver_pci.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DRIVER_PLATFORM
+ tristate "Platform Driver"
+ help
+ This option builds the Rust Platform driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_driver_platform.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DRIVER_USB
+ tristate "USB Driver"
+ depends on USB = y
+ help
+ This option builds the Rust USB driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_driver_usb.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DRIVER_FAUX
+ tristate "Faux Driver"
+ help
+ This option builds the Rust Faux driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_driver_faux.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_DRIVER_AUXILIARY
+ tristate "Auxiliary Driver"
+ depends on PCI
+ select AUXILIARY_BUS
+ help
+ This option builds the Rust auxiliary driver sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_driver_auxiliary.
+
+ If unsure, say N.
+
config SAMPLE_RUST_HOSTPROGS
bool "Host programs"
help
diff --git a/samples/rust/Makefile b/samples/rust/Makefile
index 03086dabbea4..f65885d1d62b 100644
--- a/samples/rust/Makefile
+++ b/samples/rust/Makefile
@@ -1,6 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
+ccflags-y += -I$(src) # needed for trace events
obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o
+obj-$(CONFIG_SAMPLE_RUST_MISC_DEVICE) += rust_misc_device.o
obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o
+obj-$(CONFIG_SAMPLE_RUST_DEBUGFS) += rust_debugfs.o
+obj-$(CONFIG_SAMPLE_RUST_DEBUGFS_SCOPED) += rust_debugfs_scoped.o
+obj-$(CONFIG_SAMPLE_RUST_DMA) += rust_dma.o
+obj-$(CONFIG_SAMPLE_RUST_DRIVER_I2C) += rust_driver_i2c.o
+obj-$(CONFIG_SAMPLE_RUST_I2C_CLIENT) += rust_i2c_client.o
+obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o
+obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o
+obj-$(CONFIG_SAMPLE_RUST_DRIVER_USB) += rust_driver_usb.o
+obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o
+obj-$(CONFIG_SAMPLE_RUST_DRIVER_AUXILIARY) += rust_driver_auxiliary.o
+obj-$(CONFIG_SAMPLE_RUST_CONFIGFS) += rust_configfs.o
+
+rust_print-y := rust_print_main.o rust_print_events.o
subdir-$(CONFIG_SAMPLE_RUST_HOSTPROGS) += hostprogs
diff --git a/samples/rust/rust_configfs.rs b/samples/rust/rust_configfs.rs
new file mode 100644
index 000000000000..0ccc7553ef39
--- /dev/null
+++ b/samples/rust/rust_configfs.rs
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust configfs sample.
+
+use kernel::alloc::flags;
+use kernel::c_str;
+use kernel::configfs;
+use kernel::configfs::configfs_attrs;
+use kernel::new_mutex;
+use kernel::page::PAGE_SIZE;
+use kernel::prelude::*;
+use kernel::sync::Mutex;
+
+module! {
+ type: RustConfigfs,
+ name: "rust_configfs",
+ authors: ["Rust for Linux Contributors"],
+ description: "Rust configfs sample",
+ license: "GPL",
+}
+
+#[pin_data]
+struct RustConfigfs {
+ #[pin]
+ config: configfs::Subsystem<Configuration>,
+}
+
+#[pin_data]
+struct Configuration {
+ message: &'static CStr,
+ #[pin]
+ bar: Mutex<(KBox<[u8; PAGE_SIZE]>, usize)>,
+}
+
+impl Configuration {
+ fn new() -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ message: c_str!("Hello World\n"),
+ bar <- new_mutex!((KBox::new([0; PAGE_SIZE], flags::GFP_KERNEL)?, 0)),
+ })
+ }
+}
+
+impl kernel::InPlaceModule for RustConfigfs {
+ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
+ pr_info!("Rust configfs sample (init)\n");
+
+ // Define a subsystem with the data type `Configuration`, two
+ // attributes, `message` and `bar` and child group type `Child`. `mkdir`
+ // in the directory representing this subsystem will create directories
+ // backed by the `Child` type.
+ let item_type = configfs_attrs! {
+ container: configfs::Subsystem<Configuration>,
+ data: Configuration,
+ child: Child,
+ attributes: [
+ message: 0,
+ bar: 1,
+ ],
+ };
+
+ try_pin_init!(Self {
+ config <- configfs::Subsystem::new(
+ c_str!("rust_configfs"), item_type, Configuration::new()
+ ),
+ })
+ }
+}
+
+#[vtable]
+impl configfs::GroupOperations for Configuration {
+ type Child = Child;
+
+ fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<Child>, Error>> {
+ // Define a group with data type `Child`, one attribute `baz` and child
+ // group type `GrandChild`. `mkdir` in the directory representing this
+ // group will create directories backed by the `GrandChild` type.
+ let tpe = configfs_attrs! {
+ container: configfs::Group<Child>,
+ data: Child,
+ child: GrandChild,
+ attributes: [
+ baz: 0,
+ ],
+ };
+
+ Ok(configfs::Group::new(name.try_into()?, tpe, Child::new()))
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for Configuration {
+ type Data = Configuration;
+
+ fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show message\n");
+ let data = container.message.to_bytes();
+ page[0..data.len()].copy_from_slice(data);
+ Ok(data.len())
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<1> for Configuration {
+ type Data = Configuration;
+
+ fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show bar\n");
+ let guard = container.bar.lock();
+ let data = guard.0.as_slice();
+ let len = guard.1;
+ page[0..len].copy_from_slice(&data[0..len]);
+ Ok(len)
+ }
+
+ fn store(container: &Configuration, page: &[u8]) -> Result {
+ pr_info!("Store bar\n");
+ let mut guard = container.bar.lock();
+ guard.0[0..page.len()].copy_from_slice(page);
+ guard.1 = page.len();
+ Ok(())
+ }
+}
+
+// `pin_data` cannot handle structs without braces.
+#[pin_data]
+struct Child {}
+
+impl Child {
+ fn new() -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {})
+ }
+}
+
+#[vtable]
+impl configfs::GroupOperations for Child {
+ type Child = GrandChild;
+
+ fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<GrandChild>, Error>> {
+ // Define a group with data type `GrandChild`, one attribute `gc`. As no
+ // child type is specified, it will not be possible to create subgroups
+ // in this group, and `mkdir`in the directory representing this group
+ // will return an error.
+ let tpe = configfs_attrs! {
+ container: configfs::Group<GrandChild>,
+ data: GrandChild,
+ attributes: [
+ gc: 0,
+ ],
+ };
+
+ Ok(configfs::Group::new(
+ name.try_into()?,
+ tpe,
+ GrandChild::new(),
+ ))
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for Child {
+ type Data = Child;
+
+ fn show(_container: &Child, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show baz\n");
+ let data = c"Hello Baz\n".to_bytes();
+ page[0..data.len()].copy_from_slice(data);
+ Ok(data.len())
+ }
+}
+
+// `pin_data` cannot handle structs without braces.
+#[pin_data]
+struct GrandChild {}
+
+impl GrandChild {
+ fn new() -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {})
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for GrandChild {
+ type Data = GrandChild;
+
+ fn show(_container: &GrandChild, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ pr_info!("Show grand child\n");
+ let data = c"Hello GC\n".to_bytes();
+ page[0..data.len()].copy_from_slice(data);
+ Ok(data.len())
+ }
+}
diff --git a/samples/rust/rust_debugfs.rs b/samples/rust/rust_debugfs.rs
new file mode 100644
index 000000000000..025e8f9d12de
--- /dev/null
+++ b/samples/rust/rust_debugfs.rs
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Sample DebugFS exporting platform driver
+//!
+//! To successfully probe this driver with ACPI, use an ssdt that looks like
+//!
+//! ```dsl
+//! DefinitionBlock ("", "SSDT", 2, "TEST", "VIRTACPI", 0x00000001)
+//! {
+//! Scope (\_SB)
+//! {
+//! Device (T432)
+//! {
+//! Name (_HID, "LNUXBEEF") // ACPI hardware ID to match
+//! Name (_UID, 1)
+//! Name (_STA, 0x0F) // Device present, enabled
+//! Name (_DSD, Package () { // Sample attribute
+//! ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+//! Package() {
+//! Package(2) {"compatible", "sample-debugfs"}
+//! }
+//! })
+//! Name (_CRS, ResourceTemplate ()
+//! {
+//! Memory32Fixed (ReadWrite, 0xFED00000, 0x1000)
+//! })
+//! }
+//! }
+//! }
+//! ```
+
+use core::str::FromStr;
+use kernel::c_str;
+use kernel::debugfs::{Dir, File};
+use kernel::new_mutex;
+use kernel::prelude::*;
+use kernel::sizes::*;
+use kernel::sync::atomic::{Atomic, Relaxed};
+use kernel::sync::Mutex;
+use kernel::{acpi, device::Core, of, platform, str::CString, types::ARef};
+
+kernel::module_platform_driver! {
+ type: RustDebugFs,
+ name: "rust_debugfs",
+ authors: ["Matthew Maurer"],
+ description: "Rust DebugFS usage sample",
+ license: "GPL",
+}
+
+#[pin_data]
+struct RustDebugFs {
+ pdev: ARef<platform::Device>,
+ // As we only hold these for drop effect (to remove the directory/files) we have a leading
+ // underscore to indicate to the compiler that we don't expect to use this field directly.
+ _debugfs: Dir,
+ #[pin]
+ _compatible: File<CString>,
+ #[pin]
+ counter: File<Atomic<usize>>,
+ #[pin]
+ inner: File<Mutex<Inner>>,
+ #[pin]
+ array_blob: File<Mutex<[u8; 4]>>,
+ #[pin]
+ vector_blob: File<Mutex<KVec<u8>>>,
+}
+
+#[derive(Debug)]
+struct Inner {
+ x: u32,
+ y: u32,
+}
+
+impl FromStr for Inner {
+ type Err = Error;
+ fn from_str(s: &str) -> Result<Self> {
+ let mut parts = s.split_whitespace();
+ let x = parts
+ .next()
+ .ok_or(EINVAL)?
+ .parse::<u32>()
+ .map_err(|_| EINVAL)?;
+ let y = parts
+ .next()
+ .ok_or(EINVAL)?
+ .parse::<u32>()
+ .map_err(|_| EINVAL)?;
+ if parts.next().is_some() {
+ return Err(EINVAL);
+ }
+ Ok(Inner { x, y })
+ }
+}
+
+kernel::acpi_device_table!(
+ ACPI_TABLE,
+ MODULE_ACPI_TABLE,
+ <RustDebugFs as platform::Driver>::IdInfo,
+ [(acpi::DeviceId::new(c_str!("LNUXBEEF")), ())]
+);
+
+impl platform::Driver for RustDebugFs {
+ type IdInfo = ();
+ const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None;
+ const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
+
+ fn probe(
+ pdev: &platform::Device<Core>,
+ _info: Option<&Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> {
+ RustDebugFs::new(pdev).pin_chain(|this| {
+ this.counter.store(91, Relaxed);
+ {
+ let mut guard = this.inner.lock();
+ guard.x = guard.y;
+ guard.y = 42;
+ }
+
+ Ok(())
+ })
+ }
+}
+
+impl RustDebugFs {
+ fn build_counter(dir: &Dir) -> impl PinInit<File<Atomic<usize>>> + '_ {
+ dir.read_write_file(c_str!("counter"), Atomic::<usize>::new(0))
+ }
+
+ fn build_inner(dir: &Dir) -> impl PinInit<File<Mutex<Inner>>> + '_ {
+ dir.read_write_file(c_str!("pair"), new_mutex!(Inner { x: 3, y: 10 }))
+ }
+
+ fn new(pdev: &platform::Device<Core>) -> impl PinInit<Self, Error> + '_ {
+ let debugfs = Dir::new(c_str!("sample_debugfs"));
+ let dev = pdev.as_ref();
+
+ try_pin_init! {
+ Self {
+ _compatible <- debugfs.read_only_file(
+ c_str!("compatible"),
+ dev.fwnode()
+ .ok_or(ENOENT)?
+ .property_read::<CString>(c_str!("compatible"))
+ .required_by(dev)?,
+ ),
+ counter <- Self::build_counter(&debugfs),
+ inner <- Self::build_inner(&debugfs),
+ array_blob <- debugfs.read_write_binary_file(
+ c_str!("array_blob"),
+ new_mutex!([0x62, 0x6c, 0x6f, 0x62]),
+ ),
+ vector_blob <- debugfs.read_write_binary_file(
+ c_str!("vector_blob"),
+ new_mutex!(kernel::kvec!(0x42; SZ_4K)?),
+ ),
+ _debugfs: debugfs,
+ pdev: pdev.into(),
+ }
+ }
+ }
+}
diff --git a/samples/rust/rust_debugfs_scoped.rs b/samples/rust/rust_debugfs_scoped.rs
new file mode 100644
index 000000000000..702a6546d3fb
--- /dev/null
+++ b/samples/rust/rust_debugfs_scoped.rs
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Sample DebugFS exporting platform driver that demonstrates the use of
+//! `Scope::dir` to create a variety of files without the need to separately
+//! track them all.
+
+use kernel::debugfs::{Dir, Scope};
+use kernel::prelude::*;
+use kernel::sizes::*;
+use kernel::sync::atomic::Atomic;
+use kernel::sync::Mutex;
+use kernel::{c_str, new_mutex, str::CString};
+
+module! {
+ type: RustScopedDebugFs,
+ name: "rust_debugfs_scoped",
+ authors: ["Matthew Maurer"],
+ description: "Rust Scoped DebugFS usage sample",
+ license: "GPL",
+}
+
+fn remove_file_write(
+ mod_data: &ModuleData,
+ reader: &mut kernel::uaccess::UserSliceReader,
+) -> Result {
+ let mut buf = [0u8; 128];
+ if reader.len() >= buf.len() {
+ return Err(EINVAL);
+ }
+ let n = reader.len();
+ reader.read_slice(&mut buf[..n])?;
+
+ let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?.trim();
+ let nul_idx = s.len();
+ buf[nul_idx] = 0;
+ let to_remove = CStr::from_bytes_with_nul(&buf[..nul_idx + 1]).map_err(|_| EINVAL)?;
+ mod_data
+ .devices
+ .lock()
+ .retain(|device| device.name.to_bytes() != to_remove.to_bytes());
+ Ok(())
+}
+
+fn create_file_write(
+ mod_data: &ModuleData,
+ reader: &mut kernel::uaccess::UserSliceReader,
+) -> Result {
+ let mut buf = [0u8; 128];
+ if reader.len() > buf.len() {
+ return Err(EINVAL);
+ }
+ let n = reader.len();
+ reader.read_slice(&mut buf[..n])?;
+
+ let mut nums = KVec::new();
+
+ let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?.trim();
+ let mut items = s.split_whitespace();
+ let name_str = items.next().ok_or(EINVAL)?;
+ let name = CString::try_from_fmt(fmt!("{name_str}"))?;
+ let file_name = CString::try_from_fmt(fmt!("{name_str}"))?;
+ for sub in items {
+ nums.push(
+ Atomic::<usize>::new(sub.parse().map_err(|_| EINVAL)?),
+ GFP_KERNEL,
+ )?;
+ }
+ let blob = KBox::pin_init(new_mutex!([0x42; SZ_4K]), GFP_KERNEL)?;
+
+ let scope = KBox::pin_init(
+ mod_data.device_dir.scope(
+ DeviceData { name, nums, blob },
+ &file_name,
+ |dev_data, dir| {
+ for (idx, val) in dev_data.nums.iter().enumerate() {
+ let Ok(name) = CString::try_from_fmt(fmt!("{idx}")) else {
+ return;
+ };
+ dir.read_write_file(&name, val);
+ }
+ dir.read_write_binary_file(c_str!("blob"), &dev_data.blob);
+ },
+ ),
+ GFP_KERNEL,
+ )?;
+ (*mod_data.devices.lock()).push(scope, GFP_KERNEL)?;
+
+ Ok(())
+}
+
+struct RustScopedDebugFs {
+ _data: Pin<KBox<Scope<ModuleData>>>,
+}
+
+#[pin_data]
+struct ModuleData {
+ device_dir: Dir,
+ #[pin]
+ devices: Mutex<KVec<Pin<KBox<Scope<DeviceData>>>>>,
+}
+
+impl ModuleData {
+ fn init(device_dir: Dir) -> impl PinInit<Self> {
+ pin_init! {
+ Self {
+ device_dir: device_dir,
+ devices <- new_mutex!(KVec::new())
+ }
+ }
+ }
+}
+
+struct DeviceData {
+ name: CString,
+ nums: KVec<Atomic<usize>>,
+ blob: Pin<KBox<Mutex<[u8; SZ_4K]>>>,
+}
+
+fn init_control(base_dir: &Dir, dyn_dirs: Dir) -> impl PinInit<Scope<ModuleData>> + '_ {
+ base_dir.scope(
+ ModuleData::init(dyn_dirs),
+ c_str!("control"),
+ |data, dir| {
+ dir.write_only_callback_file(c_str!("create"), data, &create_file_write);
+ dir.write_only_callback_file(c_str!("remove"), data, &remove_file_write);
+ },
+ )
+}
+
+impl kernel::Module for RustScopedDebugFs {
+ fn init(_module: &'static kernel::ThisModule) -> Result<Self> {
+ let base_dir = Dir::new(c_str!("rust_scoped_debugfs"));
+ let dyn_dirs = base_dir.subdir(c_str!("dynamic"));
+ Ok(Self {
+ _data: KBox::pin_init(init_control(&base_dir, dyn_dirs), GFP_KERNEL)?,
+ })
+ }
+}
diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs
new file mode 100644
index 000000000000..f53bce2a73e3
--- /dev/null
+++ b/samples/rust/rust_dma.rs
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust DMA api test (based on QEMU's `pci-testdev`).
+//!
+//! To make this driver probe, QEMU must be run with `-device pci-testdev`.
+
+use kernel::{
+ device::Core,
+ dma::{CoherentAllocation, DataDirection, Device, DmaMask},
+ page, pci,
+ prelude::*,
+ scatterlist::{Owned, SGTable},
+ sync::aref::ARef,
+};
+
+#[pin_data(PinnedDrop)]
+struct DmaSampleDriver {
+ pdev: ARef<pci::Device>,
+ ca: CoherentAllocation<MyStruct>,
+ #[pin]
+ sgt: SGTable<Owned<VVec<u8>>>,
+}
+
+const TEST_VALUES: [(u32, u32); 5] = [
+ (0xa, 0xb),
+ (0xc, 0xd),
+ (0xe, 0xf),
+ (0xab, 0xba),
+ (0xcd, 0xef),
+];
+
+struct MyStruct {
+ h: u32,
+ b: u32,
+}
+
+impl MyStruct {
+ fn new(h: u32, b: u32) -> Self {
+ Self { h, b }
+ }
+}
+// SAFETY: All bit patterns are acceptable values for `MyStruct`.
+unsafe impl kernel::transmute::AsBytes for MyStruct {}
+// SAFETY: Instances of `MyStruct` have no uninitialized portions.
+unsafe impl kernel::transmute::FromBytes for MyStruct {}
+
+kernel::pci_device_table!(
+ PCI_TABLE,
+ MODULE_PCI_TABLE,
+ <DmaSampleDriver as pci::Driver>::IdInfo,
+ [(pci::DeviceId::from_id(pci::Vendor::REDHAT, 0x5), ())]
+);
+
+impl pci::Driver for DmaSampleDriver {
+ type IdInfo = ();
+ const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
+
+ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ pin_init::pin_init_scope(move || {
+ dev_info!(pdev.as_ref(), "Probe DMA test driver.\n");
+
+ let mask = DmaMask::new::<64>();
+
+ // SAFETY: There are no concurrent calls to DMA allocation and mapping primitives.
+ unsafe { pdev.dma_set_mask_and_coherent(mask)? };
+
+ let ca: CoherentAllocation<MyStruct> =
+ CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?;
+
+ for (i, value) in TEST_VALUES.into_iter().enumerate() {
+ kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?;
+ }
+
+ let size = 4 * page::PAGE_SIZE;
+ let pages = VVec::with_capacity(size, GFP_KERNEL)?;
+
+ let sgt = SGTable::new(pdev.as_ref(), pages, DataDirection::ToDevice, GFP_KERNEL);
+
+ Ok(try_pin_init!(Self {
+ pdev: pdev.into(),
+ ca,
+ sgt <- sgt,
+ }))
+ })
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for DmaSampleDriver {
+ fn drop(self: Pin<&mut Self>) {
+ let dev = self.pdev.as_ref();
+
+ dev_info!(dev, "Unload DMA test driver.\n");
+
+ for (i, value) in TEST_VALUES.into_iter().enumerate() {
+ let val0 = kernel::dma_read!(self.ca[i].h);
+ let val1 = kernel::dma_read!(self.ca[i].b);
+ assert!(val0.is_ok());
+ assert!(val1.is_ok());
+
+ if let Ok(val0) = val0 {
+ assert_eq!(val0, value.0);
+ }
+ if let Ok(val1) = val1 {
+ assert_eq!(val1, value.1);
+ }
+ }
+
+ for (i, entry) in self.sgt.iter().enumerate() {
+ dev_info!(dev, "Entry[{}]: DMA address: {:#x}", i, entry.dma_address());
+ }
+ }
+}
+
+kernel::module_pci_driver! {
+ type: DmaSampleDriver,
+ name: "rust_dma",
+ authors: ["Abdiel Janulgue"],
+ description: "Rust DMA test",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs
new file mode 100644
index 000000000000..5761ea314f44
--- /dev/null
+++ b/samples/rust/rust_driver_auxiliary.rs
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust auxiliary driver sample (based on a PCI driver for QEMU's `pci-testdev`).
+//!
+//! To make this driver probe, QEMU must be run with `-device pci-testdev`.
+
+use kernel::{
+ auxiliary, c_str,
+ device::{Bound, Core},
+ devres::Devres,
+ driver,
+ error::Error,
+ pci,
+ prelude::*,
+ InPlaceModule,
+};
+
+use core::any::TypeId;
+use pin_init::PinInit;
+
+const MODULE_NAME: &CStr = <LocalModule as kernel::ModuleMetadata>::NAME;
+const AUXILIARY_NAME: &CStr = c_str!("auxiliary");
+
+struct AuxiliaryDriver;
+
+kernel::auxiliary_device_table!(
+ AUX_TABLE,
+ MODULE_AUX_TABLE,
+ <AuxiliaryDriver as auxiliary::Driver>::IdInfo,
+ [(auxiliary::DeviceId::new(MODULE_NAME, AUXILIARY_NAME), ())]
+);
+
+impl auxiliary::Driver for AuxiliaryDriver {
+ type IdInfo = ();
+
+ const ID_TABLE: auxiliary::IdTable<Self::IdInfo> = &AUX_TABLE;
+
+ fn probe(adev: &auxiliary::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ dev_info!(
+ adev.as_ref(),
+ "Probing auxiliary driver for auxiliary device with id={}\n",
+ adev.id()
+ );
+
+ ParentDriver::connect(adev)?;
+
+ Ok(Self)
+ }
+}
+
+#[pin_data]
+struct ParentDriver {
+ private: TypeId,
+ #[pin]
+ _reg0: Devres<auxiliary::Registration>,
+ #[pin]
+ _reg1: Devres<auxiliary::Registration>,
+}
+
+kernel::pci_device_table!(
+ PCI_TABLE,
+ MODULE_PCI_TABLE,
+ <ParentDriver as pci::Driver>::IdInfo,
+ [(pci::DeviceId::from_id(pci::Vendor::REDHAT, 0x5), ())]
+);
+
+impl pci::Driver for ParentDriver {
+ type IdInfo = ();
+
+ const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
+
+ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ private: TypeId::of::<Self>(),
+ _reg0 <- auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 0, MODULE_NAME),
+ _reg1 <- auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 1, MODULE_NAME),
+ })
+ }
+}
+
+impl ParentDriver {
+ fn connect(adev: &auxiliary::Device<Bound>) -> Result {
+ let dev = adev.parent();
+ let pdev: &pci::Device<Bound> = dev.try_into()?;
+ let drvdata = dev.drvdata::<Self>()?;
+
+ dev_info!(
+ dev,
+ "Connect auxiliary {} with parent: VendorID={}, DeviceID={:#x}\n",
+ adev.id(),
+ pdev.vendor_id(),
+ pdev.device_id()
+ );
+
+ dev_info!(
+ dev,
+ "We have access to the private data of {:?}.\n",
+ drvdata.private
+ );
+
+ Ok(())
+ }
+}
+
+#[pin_data]
+struct SampleModule {
+ #[pin]
+ _pci_driver: driver::Registration<pci::Adapter<ParentDriver>>,
+ #[pin]
+ _aux_driver: driver::Registration<auxiliary::Adapter<AuxiliaryDriver>>,
+}
+
+impl InPlaceModule for SampleModule {
+ fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ _pci_driver <- driver::Registration::new(MODULE_NAME, module),
+ _aux_driver <- driver::Registration::new(MODULE_NAME, module),
+ })
+ }
+}
+
+module! {
+ type: SampleModule,
+ name: "rust_driver_auxiliary",
+ authors: ["Danilo Krummrich"],
+ description: "Rust auxiliary driver",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs
new file mode 100644
index 000000000000..ecc9fd378cbd
--- /dev/null
+++ b/samples/rust/rust_driver_faux.rs
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+//! Rust faux device sample.
+
+use kernel::{c_str, faux, prelude::*, Module};
+
+module! {
+ type: SampleModule,
+ name: "rust_faux_driver",
+ authors: ["Lyude Paul"],
+ description: "Rust faux device sample",
+ license: "GPL",
+}
+
+struct SampleModule {
+ _reg: faux::Registration,
+}
+
+impl Module for SampleModule {
+ fn init(_module: &'static ThisModule) -> Result<Self> {
+ pr_info!("Initialising Rust Faux Device Sample\n");
+
+ let reg = faux::Registration::new(c_str!("rust-faux-sample-device"), None)?;
+
+ dev_info!(reg.as_ref(), "Hello from faux device!\n");
+
+ Ok(Self { _reg: reg })
+ }
+}
diff --git a/samples/rust/rust_driver_i2c.rs b/samples/rust/rust_driver_i2c.rs
new file mode 100644
index 000000000000..ecefeca3e22f
--- /dev/null
+++ b/samples/rust/rust_driver_i2c.rs
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust I2C driver sample.
+
+use kernel::{
+ acpi,
+ c_str,
+ device::Core,
+ i2c,
+ of,
+ prelude::*, //
+};
+
+struct SampleDriver;
+
+kernel::acpi_device_table! {
+ ACPI_TABLE,
+ MODULE_ACPI_TABLE,
+ <SampleDriver as i2c::Driver>::IdInfo,
+ [(acpi::DeviceId::new(c_str!("LNUXBEEF")), 0)]
+}
+
+kernel::i2c_device_table! {
+ I2C_TABLE,
+ MODULE_I2C_TABLE,
+ <SampleDriver as i2c::Driver>::IdInfo,
+ [(i2c::DeviceId::new(c_str!("rust_driver_i2c")), 0)]
+}
+
+kernel::of_device_table! {
+ OF_TABLE,
+ MODULE_OF_TABLE,
+ <SampleDriver as i2c::Driver>::IdInfo,
+ [(of::DeviceId::new(c_str!("test,rust_driver_i2c")), 0)]
+}
+
+impl i2c::Driver for SampleDriver {
+ type IdInfo = u32;
+
+ const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
+ const I2C_ID_TABLE: Option<i2c::IdTable<Self::IdInfo>> = Some(&I2C_TABLE);
+ const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
+
+ fn probe(
+ idev: &i2c::I2cClient<Core>,
+ info: Option<&Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> {
+ let dev = idev.as_ref();
+
+ dev_info!(dev, "Probe Rust I2C driver sample.\n");
+
+ if let Some(info) = info {
+ dev_info!(dev, "Probed with info: '{}'.\n", info);
+ }
+
+ Ok(Self)
+ }
+
+ fn shutdown(idev: &i2c::I2cClient<Core>, _this: Pin<&Self>) {
+ dev_info!(idev.as_ref(), "Shutdown Rust I2C driver sample.\n");
+ }
+
+ fn unbind(idev: &i2c::I2cClient<Core>, _this: Pin<&Self>) {
+ dev_info!(idev.as_ref(), "Unbind Rust I2C driver sample.\n");
+ }
+}
+
+kernel::module_i2c_driver! {
+ type: SampleDriver,
+ name: "rust_driver_i2c",
+ authors: ["Igor Korotin"],
+ description: "Rust I2C driver",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs
new file mode 100644
index 000000000000..5823787bea8e
--- /dev/null
+++ b/samples/rust/rust_driver_pci.rs
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust PCI driver sample (based on QEMU's `pci-testdev`).
+//!
+//! To make this driver probe, QEMU must be run with `-device pci-testdev`.
+
+use kernel::{c_str, device::Core, devres::Devres, pci, prelude::*, sync::aref::ARef};
+
+struct Regs;
+
+impl Regs {
+ const TEST: usize = 0x0;
+ const OFFSET: usize = 0x4;
+ const DATA: usize = 0x8;
+ const COUNT: usize = 0xC;
+ const END: usize = 0x10;
+}
+
+type Bar0 = pci::Bar<{ Regs::END }>;
+
+#[derive(Copy, Clone, Debug)]
+struct TestIndex(u8);
+
+impl TestIndex {
+ const NO_EVENTFD: Self = Self(0);
+}
+
+#[pin_data(PinnedDrop)]
+struct SampleDriver {
+ pdev: ARef<pci::Device>,
+ #[pin]
+ bar: Devres<Bar0>,
+ index: TestIndex,
+}
+
+kernel::pci_device_table!(
+ PCI_TABLE,
+ MODULE_PCI_TABLE,
+ <SampleDriver as pci::Driver>::IdInfo,
+ [(
+ pci::DeviceId::from_id(pci::Vendor::REDHAT, 0x5),
+ TestIndex::NO_EVENTFD
+ )]
+);
+
+impl SampleDriver {
+ fn testdev(index: &TestIndex, bar: &Bar0) -> Result<u32> {
+ // Select the test.
+ bar.write8(index.0, Regs::TEST);
+
+ let offset = u32::from_le(bar.read32(Regs::OFFSET)) as usize;
+ let data = bar.read8(Regs::DATA);
+
+ // Write `data` to `offset` to increase `count` by one.
+ //
+ // Note that we need `try_write8`, since `offset` can't be checked at compile-time.
+ bar.try_write8(data, offset)?;
+
+ Ok(bar.read32(Regs::COUNT))
+ }
+}
+
+impl pci::Driver for SampleDriver {
+ type IdInfo = TestIndex;
+
+ const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
+
+ fn probe(pdev: &pci::Device<Core>, info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ pin_init::pin_init_scope(move || {
+ let vendor = pdev.vendor_id();
+ dev_dbg!(
+ pdev.as_ref(),
+ "Probe Rust PCI driver sample (PCI ID: {}, 0x{:x}).\n",
+ vendor,
+ pdev.device_id()
+ );
+
+ pdev.enable_device_mem()?;
+ pdev.set_master();
+
+ Ok(try_pin_init!(Self {
+ bar <- pdev.iomap_region_sized::<{ Regs::END }>(0, c_str!("rust_driver_pci")),
+ index: *info,
+ _: {
+ let bar = bar.access(pdev.as_ref())?;
+
+ dev_info!(
+ pdev.as_ref(),
+ "pci-testdev data-match count: {}\n",
+ Self::testdev(info, bar)?
+ );
+ },
+ pdev: pdev.into(),
+ }))
+ })
+ }
+
+ fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) {
+ if let Ok(bar) = this.bar.access(pdev.as_ref()) {
+ // Reset pci-testdev by writing a new test index.
+ bar.write8(this.index.0, Regs::TEST);
+ }
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for SampleDriver {
+ fn drop(self: Pin<&mut Self>) {
+ dev_dbg!(self.pdev.as_ref(), "Remove Rust PCI driver sample.\n");
+ }
+}
+
+kernel::module_pci_driver! {
+ type: SampleDriver,
+ name: "rust_driver_pci",
+ authors: ["Danilo Krummrich"],
+ description: "Rust PCI driver",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_driver_platform.rs b/samples/rust/rust_driver_platform.rs
new file mode 100644
index 000000000000..6bf4f0c9633d
--- /dev/null
+++ b/samples/rust/rust_driver_platform.rs
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust Platform driver sample.
+
+//! ACPI match table test
+//!
+//! This demonstrates how to test an ACPI-based Rust platform driver using QEMU
+//! with a custom SSDT.
+//!
+//! Steps:
+//!
+//! 1. **Create an SSDT source file** (`ssdt.dsl`) with the following content:
+//!
+//! ```asl
+//! DefinitionBlock ("", "SSDT", 2, "TEST", "VIRTACPI", 0x00000001)
+//! {
+//! Scope (\_SB)
+//! {
+//! Device (T432)
+//! {
+//! Name (_HID, "LNUXBEEF") // ACPI hardware ID to match
+//! Name (_UID, 1)
+//! Name (_STA, 0x0F) // Device present, enabled
+//! Name (_CRS, ResourceTemplate ()
+//! {
+//! Memory32Fixed (ReadWrite, 0xFED00000, 0x1000)
+//! })
+//! }
+//! }
+//! }
+//! ```
+//!
+//! 2. **Compile the table**:
+//!
+//! ```sh
+//! iasl -tc ssdt.dsl
+//! ```
+//!
+//! This generates `ssdt.aml`
+//!
+//! 3. **Run QEMU** with the compiled AML file:
+//!
+//! ```sh
+//! qemu-system-x86_64 -m 512M \
+//! -enable-kvm \
+//! -kernel path/to/bzImage \
+//! -append "root=/dev/sda console=ttyS0" \
+//! -hda rootfs.img \
+//! -serial stdio \
+//! -acpitable file=ssdt.aml
+//! ```
+//!
+//! Requirements:
+//! - The `rust_driver_platform` must be present either:
+//! - built directly into the kernel (`bzImage`), or
+//! - available as a `.ko` file and loadable from `rootfs.img`
+//!
+//! 4. **Verify it worked** by checking `dmesg`:
+//!
+//! ```
+//! rust_driver_platform LNUXBEEF:00: Probed with info: '0'.
+//! ```
+//!
+
+use kernel::{
+ acpi, c_str,
+ device::{
+ self,
+ property::{FwNodeReferenceArgs, NArgs},
+ Core,
+ },
+ of, platform,
+ prelude::*,
+ str::CString,
+ sync::aref::ARef,
+};
+
+struct SampleDriver {
+ pdev: ARef<platform::Device>,
+}
+
+struct Info(u32);
+
+kernel::of_device_table!(
+ OF_TABLE,
+ MODULE_OF_TABLE,
+ <SampleDriver as platform::Driver>::IdInfo,
+ [(of::DeviceId::new(c_str!("test,rust-device")), Info(42))]
+);
+
+kernel::acpi_device_table!(
+ ACPI_TABLE,
+ MODULE_ACPI_TABLE,
+ <SampleDriver as platform::Driver>::IdInfo,
+ [(acpi::DeviceId::new(c_str!("LNUXBEEF")), Info(0))]
+);
+
+impl platform::Driver for SampleDriver {
+ type IdInfo = Info;
+ const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
+ const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
+
+ fn probe(
+ pdev: &platform::Device<Core>,
+ info: Option<&Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> {
+ let dev = pdev.as_ref();
+
+ dev_dbg!(dev, "Probe Rust Platform driver sample.\n");
+
+ if let Some(info) = info {
+ dev_info!(dev, "Probed with info: '{}'.\n", info.0);
+ }
+
+ if dev.fwnode().is_some_and(|node| node.is_of_node()) {
+ Self::properties_parse(dev)?;
+ }
+
+ Ok(Self { pdev: pdev.into() })
+ }
+}
+
+impl SampleDriver {
+ fn properties_parse(dev: &device::Device) -> Result {
+ let fwnode = dev.fwnode().ok_or(ENOENT)?;
+
+ if let Ok(idx) =
+ fwnode.property_match_string(c_str!("compatible"), c_str!("test,rust-device"))
+ {
+ dev_info!(dev, "matched compatible string idx = {}\n", idx);
+ }
+
+ let name = c_str!("compatible");
+ let prop = fwnode.property_read::<CString>(name).required_by(dev)?;
+ dev_info!(dev, "'{name}'='{prop:?}'\n");
+
+ let name = c_str!("test,bool-prop");
+ let prop = fwnode.property_read_bool(c_str!("test,bool-prop"));
+ dev_info!(dev, "'{name}'='{prop}'\n");
+
+ if fwnode.property_present(c_str!("test,u32-prop")) {
+ dev_info!(dev, "'test,u32-prop' is present\n");
+ }
+
+ let name = c_str!("test,u32-optional-prop");
+ let prop = fwnode.property_read::<u32>(name).or(0x12);
+ dev_info!(dev, "'{name}'='{prop:#x}' (default = 0x12)\n");
+
+ // A missing required property will print an error. Discard the error to
+ // prevent properties_parse from failing in that case.
+ let name = c_str!("test,u32-required-prop");
+ let _ = fwnode.property_read::<u32>(name).required_by(dev);
+
+ let name = c_str!("test,u32-prop");
+ let prop: u32 = fwnode.property_read(name).required_by(dev)?;
+ dev_info!(dev, "'{name}'='{prop:#x}'\n");
+
+ let name = c_str!("test,i16-array");
+ let prop: [i16; 4] = fwnode.property_read(name).required_by(dev)?;
+ dev_info!(dev, "'{name}'='{prop:?}'\n");
+ let len = fwnode.property_count_elem::<u16>(name)?;
+ dev_info!(dev, "'{name}' length is {len}\n");
+
+ let name = c_str!("test,i16-array");
+ let prop: KVec<i16> = fwnode.property_read_array_vec(name, 4)?.required_by(dev)?;
+ dev_info!(dev, "'{name}'='{prop:?}' (KVec)\n");
+
+ for child in fwnode.children() {
+ let name = c_str!("test,ref-arg");
+ let nargs = NArgs::N(2);
+ let prop: FwNodeReferenceArgs = child.property_get_reference_args(name, nargs, 0)?;
+ dev_info!(dev, "'{name}'='{prop:?}'\n");
+ }
+
+ Ok(())
+ }
+}
+
+impl Drop for SampleDriver {
+ fn drop(&mut self) {
+ dev_dbg!(self.pdev.as_ref(), "Remove Rust Platform driver sample.\n");
+ }
+}
+
+kernel::module_platform_driver! {
+ type: SampleDriver,
+ name: "rust_driver_platform",
+ authors: ["Danilo Krummrich"],
+ description: "Rust Platform driver",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_driver_usb.rs b/samples/rust/rust_driver_usb.rs
new file mode 100644
index 000000000000..4eaad14867b2
--- /dev/null
+++ b/samples/rust/rust_driver_usb.rs
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (C) 2025 Collabora Ltd.
+
+//! Rust USB driver sample.
+
+use kernel::{device, device::Core, prelude::*, sync::aref::ARef, usb};
+
+struct SampleDriver {
+ _intf: ARef<usb::Interface>,
+}
+
+kernel::usb_device_table!(
+ USB_TABLE,
+ MODULE_USB_TABLE,
+ <SampleDriver as usb::Driver>::IdInfo,
+ [(usb::DeviceId::from_id(0x1234, 0x5678), ()),]
+);
+
+impl usb::Driver for SampleDriver {
+ type IdInfo = ();
+ const ID_TABLE: usb::IdTable<Self::IdInfo> = &USB_TABLE;
+
+ fn probe(
+ intf: &usb::Interface<Core>,
+ _id: &usb::DeviceId,
+ _info: &Self::IdInfo,
+ ) -> impl PinInit<Self, Error> {
+ let dev: &device::Device<Core> = intf.as_ref();
+ dev_info!(dev, "Rust USB driver sample probed\n");
+
+ Ok(Self { _intf: intf.into() })
+ }
+
+ fn disconnect(intf: &usb::Interface<Core>, _data: Pin<&Self>) {
+ let dev: &device::Device<Core> = intf.as_ref();
+ dev_info!(dev, "Rust USB driver sample disconnected\n");
+ }
+}
+
+kernel::module_usb_driver! {
+ type: SampleDriver,
+ name: "rust_driver_usb",
+ authors: ["Daniel Almeida"],
+ description: "Rust USB driver sample",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_i2c_client.rs b/samples/rust/rust_i2c_client.rs
new file mode 100644
index 000000000000..f67938396dce
--- /dev/null
+++ b/samples/rust/rust_i2c_client.rs
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust I2C client registration sample.
+//!
+//! An I2C client in Rust cannot exist on its own. To register a new I2C client,
+//! it must be bound to a parent device. In this sample driver, a platform device
+//! is used as the parent.
+//!
+
+//! ACPI match table test
+//!
+//! This demonstrates how to test an ACPI-based Rust I2C client registration driver
+//! using QEMU with a custom SSDT.
+//!
+//! Steps:
+//!
+//! 1. **Create an SSDT source file** (`ssdt.dsl`) with the following content:
+//!
+//! ```asl
+//! DefinitionBlock ("", "SSDT", 2, "TEST", "VIRTACPI", 0x00000001)
+//! {
+//! Scope (\_SB)
+//! {
+//! Device (T432)
+//! {
+//! Name (_HID, "LNUXBEEF") // ACPI hardware ID to match
+//! Name (_UID, 1)
+//! Name (_STA, 0x0F) // Device present, enabled
+//! Name (_CRS, ResourceTemplate ()
+//! {
+//! Memory32Fixed (ReadWrite, 0xFED00000, 0x1000)
+//! })
+//! }
+//! }
+//! }
+//! ```
+//!
+//! 2. **Compile the table**:
+//!
+//! ```sh
+//! iasl -tc ssdt.dsl
+//! ```
+//!
+//! This generates `ssdt.aml`
+//!
+//! 3. **Run QEMU** with the compiled AML file:
+//!
+//! ```sh
+//! qemu-system-x86_64 -m 512M \
+//! -enable-kvm \
+//! -kernel path/to/bzImage \
+//! -append "root=/dev/sda console=ttyS0" \
+//! -hda rootfs.img \
+//! -serial stdio \
+//! -acpitable file=ssdt.aml
+//! ```
+//!
+//! Requirements:
+//! - The `rust_driver_platform` must be present either:
+//! - built directly into the kernel (`bzImage`), or
+//! - available as a `.ko` file and loadable from `rootfs.img`
+//!
+//! 4. **Verify it worked** by checking `dmesg`:
+//!
+//! ```
+//! rust_driver_platform LNUXBEEF:00: Probed with info: '0'.
+//! ```
+//!
+
+use kernel::{
+ acpi,
+ c_str,
+ device,
+ devres::Devres,
+ i2c,
+ of,
+ platform,
+ prelude::*,
+ sync::aref::ARef, //
+};
+
+#[pin_data]
+struct SampleDriver {
+ parent_dev: ARef<platform::Device>,
+ #[pin]
+ _reg: Devres<i2c::Registration>,
+}
+
+kernel::of_device_table!(
+ OF_TABLE,
+ MODULE_OF_TABLE,
+ <SampleDriver as platform::Driver>::IdInfo,
+ [(of::DeviceId::new(c_str!("test,rust-device")), ())]
+);
+
+kernel::acpi_device_table!(
+ ACPI_TABLE,
+ MODULE_ACPI_TABLE,
+ <SampleDriver as platform::Driver>::IdInfo,
+ [(acpi::DeviceId::new(c_str!("LNUXBEEF")), ())]
+);
+
+const SAMPLE_I2C_CLIENT_ADDR: u16 = 0x30;
+const SAMPLE_I2C_ADAPTER_INDEX: i32 = 0;
+const BOARD_INFO: i2c::I2cBoardInfo =
+ i2c::I2cBoardInfo::new(c_str!("rust_driver_i2c"), SAMPLE_I2C_CLIENT_ADDR);
+
+impl platform::Driver for SampleDriver {
+ type IdInfo = ();
+ const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
+ const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
+
+ fn probe(
+ pdev: &platform::Device<device::Core>,
+ _info: Option<&Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> {
+ dev_info!(
+ pdev.as_ref(),
+ "Probe Rust I2C Client registration sample.\n"
+ );
+
+ kernel::try_pin_init!( Self {
+ parent_dev: pdev.into(),
+
+ _reg <- {
+ let adapter = i2c::I2cAdapter::get(SAMPLE_I2C_ADAPTER_INDEX)?;
+
+ i2c::Registration::new(&adapter, &BOARD_INFO, pdev.as_ref())
+ }
+ })
+ }
+
+ fn unbind(pdev: &platform::Device<device::Core>, _this: Pin<&Self>) {
+ dev_info!(
+ pdev.as_ref(),
+ "Unbind Rust I2C Client registration sample.\n"
+ );
+ }
+}
+
+kernel::module_platform_driver! {
+ type: SampleDriver,
+ name: "rust_device_i2c",
+ authors: ["Danilo Krummrich", "Igor Korotin"],
+ description: "Rust I2C client registration",
+ license: "GPL v2",
+}
diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs
index dc05f4bbe27e..8eb9583571d7 100644
--- a/samples/rust/rust_minimal.rs
+++ b/samples/rust/rust_minimal.rs
@@ -7,24 +7,34 @@ use kernel::prelude::*;
module! {
type: RustMinimal,
name: "rust_minimal",
- author: "Rust for Linux Contributors",
+ authors: ["Rust for Linux Contributors"],
description: "Rust minimal sample",
license: "GPL",
+ params: {
+ test_parameter: i64 {
+ default: 1,
+ description: "This parameter has a default of 1",
+ },
+ },
}
struct RustMinimal {
- numbers: Vec<i32>,
+ numbers: KVec<i32>,
}
impl kernel::Module for RustMinimal {
fn init(_module: &'static ThisModule) -> Result<Self> {
pr_info!("Rust minimal sample (init)\n");
pr_info!("Am I built-in? {}\n", !cfg!(MODULE));
-
- let mut numbers = Vec::new();
- numbers.try_push(72)?;
- numbers.try_push(108)?;
- numbers.try_push(200)?;
+ pr_info!(
+ "test_parameter: {}\n",
+ *module_parameters::test_parameter.value()
+ );
+
+ let mut numbers = KVec::new();
+ numbers.push(72, GFP_KERNEL)?;
+ numbers.push(108, GFP_KERNEL)?;
+ numbers.push(200, GFP_KERNEL)?;
Ok(RustMinimal { numbers })
}
diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs
new file mode 100644
index 000000000000..d69bc33dbd99
--- /dev/null
+++ b/samples/rust/rust_misc_device.rs
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Rust misc device sample.
+//!
+//! Below is an example userspace C program that exercises this sample's functionality.
+//!
+//! ```c
+//! #include <stdio.h>
+//! #include <stdlib.h>
+//! #include <errno.h>
+//! #include <fcntl.h>
+//! #include <unistd.h>
+//! #include <sys/ioctl.h>
+//!
+//! #define RUST_MISC_DEV_FAIL _IO('|', 0)
+//! #define RUST_MISC_DEV_HELLO _IO('|', 0x80)
+//! #define RUST_MISC_DEV_GET_VALUE _IOR('|', 0x81, int)
+//! #define RUST_MISC_DEV_SET_VALUE _IOW('|', 0x82, int)
+//!
+//! int main() {
+//! int value, new_value;
+//! int fd, ret;
+//!
+//! // Open the device file
+//! printf("Opening /dev/rust-misc-device for reading and writing\n");
+//! fd = open("/dev/rust-misc-device", O_RDWR);
+//! if (fd < 0) {
+//! perror("open");
+//! return errno;
+//! }
+//!
+//! // Make call into driver to say "hello"
+//! printf("Calling Hello\n");
+//! ret = ioctl(fd, RUST_MISC_DEV_HELLO, NULL);
+//! if (ret < 0) {
+//! perror("ioctl: Failed to call into Hello");
+//! close(fd);
+//! return errno;
+//! }
+//!
+//! // Get initial value
+//! printf("Fetching initial value\n");
+//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &value);
+//! if (ret < 0) {
+//! perror("ioctl: Failed to fetch the initial value");
+//! close(fd);
+//! return errno;
+//! }
+//!
+//! value++;
+//!
+//! // Set value to something different
+//! printf("Submitting new value (%d)\n", value);
+//! ret = ioctl(fd, RUST_MISC_DEV_SET_VALUE, &value);
+//! if (ret < 0) {
+//! perror("ioctl: Failed to submit new value");
+//! close(fd);
+//! return errno;
+//! }
+//!
+//! // Ensure new value was applied
+//! printf("Fetching new value\n");
+//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &new_value);
+//! if (ret < 0) {
+//! perror("ioctl: Failed to fetch the new value");
+//! close(fd);
+//! return errno;
+//! }
+//!
+//! if (value != new_value) {
+//! printf("Failed: Committed and retrieved values are different (%d - %d)\n", value, new_value);
+//! close(fd);
+//! return -1;
+//! }
+//!
+//! // Call the unsuccessful ioctl
+//! printf("Attempting to call in to an non-existent IOCTL\n");
+//! ret = ioctl(fd, RUST_MISC_DEV_FAIL, NULL);
+//! if (ret < 0) {
+//! perror("ioctl: Succeeded to fail - this was expected");
+//! } else {
+//! printf("ioctl: Failed to fail\n");
+//! close(fd);
+//! return -1;
+//! }
+//!
+//! // Close the device file
+//! printf("Closing /dev/rust-misc-device\n");
+//! close(fd);
+//!
+//! printf("Success\n");
+//! return 0;
+//! }
+//! ```
+
+use core::pin::Pin;
+
+use kernel::{
+ c_str,
+ device::Device,
+ fs::{File, Kiocb},
+ ioctl::{_IO, _IOC_SIZE, _IOR, _IOW},
+ iov::{IovIterDest, IovIterSource},
+ miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration},
+ new_mutex,
+ prelude::*,
+ sync::{aref::ARef, Mutex},
+ uaccess::{UserSlice, UserSliceReader, UserSliceWriter},
+};
+
+const RUST_MISC_DEV_HELLO: u32 = _IO('|' as u32, 0x80);
+const RUST_MISC_DEV_GET_VALUE: u32 = _IOR::<i32>('|' as u32, 0x81);
+const RUST_MISC_DEV_SET_VALUE: u32 = _IOW::<i32>('|' as u32, 0x82);
+
+module! {
+ type: RustMiscDeviceModule,
+ name: "rust_misc_device",
+ authors: ["Lee Jones"],
+ description: "Rust misc device sample",
+ license: "GPL",
+}
+
+#[pin_data]
+struct RustMiscDeviceModule {
+ #[pin]
+ _miscdev: MiscDeviceRegistration<RustMiscDevice>,
+}
+
+impl kernel::InPlaceModule for RustMiscDeviceModule {
+ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
+ pr_info!("Initialising Rust Misc Device Sample\n");
+
+ let options = MiscDeviceOptions {
+ name: c_str!("rust-misc-device"),
+ };
+
+ try_pin_init!(Self {
+ _miscdev <- MiscDeviceRegistration::register(options),
+ })
+ }
+}
+
+struct Inner {
+ value: i32,
+ buffer: KVVec<u8>,
+}
+
+#[pin_data(PinnedDrop)]
+struct RustMiscDevice {
+ #[pin]
+ inner: Mutex<Inner>,
+ dev: ARef<Device>,
+}
+
+#[vtable]
+impl MiscDevice for RustMiscDevice {
+ type Ptr = Pin<KBox<Self>>;
+
+ fn open(_file: &File, misc: &MiscDeviceRegistration<Self>) -> Result<Pin<KBox<Self>>> {
+ let dev = ARef::from(misc.device());
+
+ dev_info!(dev, "Opening Rust Misc Device Sample\n");
+
+ KBox::try_pin_init(
+ try_pin_init! {
+ RustMiscDevice {
+ inner <- new_mutex!(Inner {
+ value: 0_i32,
+ buffer: KVVec::new(),
+ }),
+ dev: dev,
+ }
+ },
+ GFP_KERNEL,
+ )
+ }
+
+ fn read_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterDest<'_>) -> Result<usize> {
+ let me = kiocb.file();
+ dev_info!(me.dev, "Reading from Rust Misc Device Sample\n");
+
+ let inner = me.inner.lock();
+ // Read the buffer contents, taking the file position into account.
+ let read = iov.simple_read_from_buffer(kiocb.ki_pos_mut(), &inner.buffer)?;
+
+ Ok(read)
+ }
+
+ fn write_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterSource<'_>) -> Result<usize> {
+ let me = kiocb.file();
+ dev_info!(me.dev, "Writing to Rust Misc Device Sample\n");
+
+ let mut inner = me.inner.lock();
+
+ // Replace buffer contents.
+ inner.buffer.clear();
+ let len = iov.copy_from_iter_vec(&mut inner.buffer, GFP_KERNEL)?;
+
+ // Set position to zero so that future `read` calls will see the new contents.
+ *kiocb.ki_pos_mut() = 0;
+
+ Ok(len)
+ }
+
+ fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result<isize> {
+ dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n");
+
+ // Treat the ioctl argument as a user pointer.
+ let arg = UserPtr::from_addr(arg);
+ let size = _IOC_SIZE(cmd);
+
+ match cmd {
+ RUST_MISC_DEV_GET_VALUE => me.get_value(UserSlice::new(arg, size).writer())?,
+ RUST_MISC_DEV_SET_VALUE => me.set_value(UserSlice::new(arg, size).reader())?,
+ RUST_MISC_DEV_HELLO => me.hello()?,
+ _ => {
+ dev_err!(me.dev, "-> IOCTL not recognised: {}\n", cmd);
+ return Err(ENOTTY);
+ }
+ };
+
+ Ok(0)
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for RustMiscDevice {
+ fn drop(self: Pin<&mut Self>) {
+ dev_info!(self.dev, "Exiting the Rust Misc Device Sample\n");
+ }
+}
+
+impl RustMiscDevice {
+ fn set_value(&self, mut reader: UserSliceReader) -> Result<isize> {
+ let new_value = reader.read::<i32>()?;
+ let mut guard = self.inner.lock();
+
+ dev_info!(
+ self.dev,
+ "-> Copying data from userspace (value: {})\n",
+ new_value
+ );
+
+ guard.value = new_value;
+ Ok(0)
+ }
+
+ fn get_value(&self, mut writer: UserSliceWriter) -> Result<isize> {
+ let guard = self.inner.lock();
+ let value = guard.value;
+
+ // Free-up the lock and use our locally cached instance from here
+ drop(guard);
+
+ dev_info!(
+ self.dev,
+ "-> Copying data to userspace (value: {})\n",
+ &value
+ );
+
+ writer.write::<i32>(&value)?;
+ Ok(0)
+ }
+
+ fn hello(&self) -> Result<isize> {
+ dev_info!(self.dev, "-> Hello from the Rust Misc Device\n");
+
+ Ok(0)
+ }
+}
diff --git a/samples/rust/rust_print.rs b/samples/rust/rust_print.rs
deleted file mode 100644
index 8b39d9cef6d1..000000000000
--- a/samples/rust/rust_print.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! Rust printing macros sample.
-
-use kernel::pr_cont;
-use kernel::prelude::*;
-
-module! {
- type: RustPrint,
- name: "rust_print",
- author: "Rust for Linux Contributors",
- description: "Rust printing macros sample",
- license: "GPL",
-}
-
-struct RustPrint;
-
-impl kernel::Module for RustPrint {
- fn init(_module: &'static ThisModule) -> Result<Self> {
- pr_info!("Rust printing macros sample (init)\n");
-
- pr_emerg!("Emergency message (level 0) without args\n");
- pr_alert!("Alert message (level 1) without args\n");
- pr_crit!("Critical message (level 2) without args\n");
- pr_err!("Error message (level 3) without args\n");
- pr_warn!("Warning message (level 4) without args\n");
- pr_notice!("Notice message (level 5) without args\n");
- pr_info!("Info message (level 6) without args\n");
-
- pr_info!("A line that");
- pr_cont!(" is continued");
- pr_cont!(" without args\n");
-
- pr_emerg!("{} message (level {}) with args\n", "Emergency", 0);
- pr_alert!("{} message (level {}) with args\n", "Alert", 1);
- pr_crit!("{} message (level {}) with args\n", "Critical", 2);
- pr_err!("{} message (level {}) with args\n", "Error", 3);
- pr_warn!("{} message (level {}) with args\n", "Warning", 4);
- pr_notice!("{} message (level {}) with args\n", "Notice", 5);
- pr_info!("{} message (level {}) with args\n", "Info", 6);
-
- pr_info!("A {} that", "line");
- pr_cont!(" is {}", "continued");
- pr_cont!(" with {}\n", "args");
-
- Ok(RustPrint)
- }
-}
-
-impl Drop for RustPrint {
- fn drop(&mut self) {
- pr_info!("Rust printing macros sample (exit)\n");
- }
-}
diff --git a/samples/rust/rust_print_events.c b/samples/rust/rust_print_events.c
new file mode 100644
index 000000000000..a9169ff0edf1
--- /dev/null
+++ b/samples/rust/rust_print_events.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Google LLC
+ */
+
+#define CREATE_TRACE_POINTS
+#define CREATE_RUST_TRACE_POINTS
+#include <trace/events/rust_sample.h>
diff --git a/samples/rust/rust_print_main.rs b/samples/rust/rust_print_main.rs
new file mode 100644
index 000000000000..4095c72afeab
--- /dev/null
+++ b/samples/rust/rust_print_main.rs
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust printing macros sample.
+
+use kernel::pr_cont;
+use kernel::prelude::*;
+
+module! {
+ type: RustPrint,
+ name: "rust_print",
+ authors: ["Rust for Linux Contributors"],
+ description: "Rust printing macros sample",
+ license: "GPL",
+}
+
+struct RustPrint;
+
+#[expect(clippy::disallowed_macros)]
+fn arc_print() -> Result {
+ use kernel::sync::*;
+
+ let a = Arc::new(1, GFP_KERNEL)?;
+ let b = UniqueArc::new("hello, world", GFP_KERNEL)?;
+
+ // Prints the value of data in `a`.
+ pr_info!("{}", a);
+
+ // Uses ":?" to print debug fmt of `b`.
+ pr_info!("{:?}", b);
+
+ let a: Arc<&str> = b.into();
+ let c = a.clone();
+
+ // Uses `dbg` to print, will move `c` (for temporary debugging purposes).
+ dbg!(c);
+
+ {
+ // `Arc` can be used to delegate dynamic dispatch and the following is an example.
+ // Both `i32` and `&str` implement `Display`. This enables us to express a unified
+ // behaviour, contract or protocol on both `i32` and `&str` into a single `Arc` of
+ // type `Arc<dyn Display>`.
+
+ use kernel::fmt::Display;
+ fn arc_dyn_print(arc: &Arc<dyn Display>) {
+ pr_info!("Arc<dyn Display> says {arc}");
+ }
+
+ let a_i32_display: Arc<dyn Display> = Arc::new(42i32, GFP_KERNEL)?;
+ let a_str_display: Arc<dyn Display> = a.clone();
+
+ arc_dyn_print(&a_i32_display);
+ arc_dyn_print(&a_str_display);
+ }
+
+ // Pretty-prints the debug formatting with lower-case hexadecimal integers.
+ pr_info!("{:#x?}", a);
+
+ Ok(())
+}
+
+impl kernel::Module for RustPrint {
+ fn init(_module: &'static ThisModule) -> Result<Self> {
+ pr_info!("Rust printing macros sample (init)\n");
+
+ pr_emerg!("Emergency message (level 0) without args\n");
+ pr_alert!("Alert message (level 1) without args\n");
+ pr_crit!("Critical message (level 2) without args\n");
+ pr_err!("Error message (level 3) without args\n");
+ pr_warn!("Warning message (level 4) without args\n");
+ pr_notice!("Notice message (level 5) without args\n");
+ pr_info!("Info message (level 6) without args\n");
+
+ pr_info!("A line that");
+ pr_cont!(" is continued");
+ pr_cont!(" without args\n");
+
+ pr_emerg!("{} message (level {}) with args\n", "Emergency", 0);
+ pr_alert!("{} message (level {}) with args\n", "Alert", 1);
+ pr_crit!("{} message (level {}) with args\n", "Critical", 2);
+ pr_err!("{} message (level {}) with args\n", "Error", 3);
+ pr_warn!("{} message (level {}) with args\n", "Warning", 4);
+ pr_notice!("{} message (level {}) with args\n", "Notice", 5);
+ pr_info!("{} message (level {}) with args\n", "Info", 6);
+
+ pr_info!("A {} that", "line");
+ pr_cont!(" is {}", "continued");
+ pr_cont!(" with {}\n", "args");
+
+ arc_print()?;
+
+ trace::trace_rust_sample_loaded(42);
+
+ Ok(RustPrint)
+ }
+}
+
+impl Drop for RustPrint {
+ fn drop(&mut self) {
+ pr_info!("Rust printing macros sample (exit)\n");
+ }
+}
+
+mod trace {
+ use kernel::ffi::c_int;
+
+ kernel::declare_trace! {
+ /// # Safety
+ ///
+ /// Always safe to call.
+ unsafe fn rust_sample_loaded(magic: c_int);
+ }
+
+ pub(crate) fn trace_rust_sample_loaded(magic: i32) {
+ // SAFETY: Always safe to call.
+ unsafe { rust_sample_loaded(magic as c_int) }
+ }
+}
diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
index 20291ec6489f..a23fec357b5d 100644
--- a/samples/seccomp/user-trap.c
+++ b/samples/seccomp/user-trap.c
@@ -33,6 +33,7 @@ static int send_fd(int sock, int fd)
{
struct msghdr msg = {};
struct cmsghdr *cmsg;
+ int *fd_ptr;
char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
struct iovec io = {
.iov_base = &c,
@@ -47,7 +48,8 @@ static int send_fd(int sock, int fd)
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
- *((int *)CMSG_DATA(cmsg)) = fd;
+ fd_ptr = (int *)CMSG_DATA(cmsg);
+ *fd_ptr = fd;
msg.msg_controllen = cmsg->cmsg_len;
if (sendmsg(sock, &msg, 0) < 0) {
@@ -62,6 +64,7 @@ static int recv_fd(int sock)
{
struct msghdr msg = {};
struct cmsghdr *cmsg;
+ int *fd_ptr;
char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
struct iovec io = {
.iov_base = &c,
@@ -79,8 +82,9 @@ static int recv_fd(int sock)
}
cmsg = CMSG_FIRSTHDR(&msg);
+ fd_ptr = (int *)CMSG_DATA(cmsg);
- return *((int *)CMSG_DATA(cmsg));
+ return *fd_ptr;
}
static int user_trap_syscall(int nr, unsigned int flags)
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 1c6b843b8c4e..1a05fc153353 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -136,10 +136,11 @@
*
* To assign a string, use the helper macro __assign_str().
*
- * __assign_str(foo, bar);
+ * __assign_str(foo);
*
- * In most cases, the __assign_str() macro will take the same
- * parameters as the __string() macro had to declare the string.
+ * The __string() macro saves off the string that is passed into
+ * the second parameter, and the __assign_str() will store than
+ * saved string into the "foo" field.
*
* __vstring: This is similar to __string() but instead of taking a
* dynamic length, it takes a variable list va_list 'va' variable.
@@ -163,8 +164,7 @@
* __string().
*
* __string_len: This is a helper to a __dynamic_array, but it understands
- * that the array has characters in it, and with the combined
- * use of __assign_str_len(), it will allocate 'len' + 1 bytes
+ * that the array has characters in it, it will allocate 'len' + 1 bytes
* in the ring buffer and add a '\0' to the string. This is
* useful if the string being saved has no terminating '\0' byte.
* It requires that the length of the string is known as it acts
@@ -174,9 +174,11 @@
*
* __string_len(foo, bar, len)
*
- * To assign this string, use the helper macro __assign_str_len().
+ * To assign this string, use the helper macro __assign_str().
+ * The length is saved via the __string_len() and is retrieved in
+ * __assign_str().
*
- * __assign_str_len(foo, bar, len);
+ * __assign_str(foo);
*
* Then len + 1 is allocated to the ring buffer, and a nul terminating
* byte is added. This is similar to:
@@ -302,20 +304,23 @@ TRACE_EVENT(foo_bar,
__bitmask( cpus, num_possible_cpus() )
__cpumask( cpum )
__vstring( vstr, fmt, va )
+ __string_len( lstr, foo, bar / 2 < strlen(foo) ? bar / 2 : strlen(foo) )
),
TP_fast_assign(
- strlcpy(__entry->foo, foo, 10);
+ strscpy(__entry->foo, foo, 10);
__entry->bar = bar;
memcpy(__get_dynamic_array(list), lst,
__length_of(lst) * sizeof(int));
- __assign_str(str, string);
+ __assign_str(str);
+ __assign_str(lstr);
__assign_vstr(vstr, fmt, va);
__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
__assign_cpumask(cpum, cpumask_bits(mask));
),
- TP_printk("foo %s %d %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar,
+ TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s [%d] %*pbl",
+ __entry->foo, __entry->bar,
/*
* Notice here the use of some helper functions. This includes:
@@ -359,8 +364,17 @@ TRACE_EVENT(foo_bar,
__print_array(__get_dynamic_array(list),
__get_dynamic_array_len(list) / sizeof(int),
sizeof(int)),
- __get_str(str), __get_bitmask(cpus), __get_cpumask(cpum),
- __get_str(vstr))
+
+/* A shortcut is to use __print_dynamic_array for dynamic arrays */
+
+ __print_dynamic_array(list, sizeof(int)),
+
+ __get_str(str), __get_str(lstr),
+ __get_bitmask(cpus), __get_cpumask(cpum),
+ __get_str(vstr),
+ __get_dynamic_array_len(cpus),
+ __get_dynamic_array_len(cpus),
+ __get_dynamic_array(cpus))
);
/*
@@ -414,7 +428,7 @@ TRACE_EVENT_CONDITION(foo_bar_with_cond,
),
TP_fast_assign(
- __assign_str(foo, foo);
+ __assign_str(foo);
__entry->bar = bar;
),
@@ -455,7 +469,7 @@ TRACE_EVENT_FN(foo_bar_with_fn,
),
TP_fast_assign(
- __assign_str(foo, foo);
+ __assign_str(foo);
__entry->bar = bar;
),
@@ -502,7 +516,7 @@ DECLARE_EVENT_CLASS(foo_template,
),
TP_fast_assign(
- __assign_str(foo, foo);
+ __assign_str(foo);
__entry->bar = bar;
),
@@ -570,7 +584,7 @@ TRACE_EVENT(foo_rel_loc,
),
TP_fast_assign(
- __assign_rel_str(foo, foo);
+ __assign_rel_str(foo);
__entry->bar = bar;
__assign_rel_bitmask(bitmask, mask,
BITS_PER_BYTE * sizeof(unsigned long));
diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c
index b99d9ab7db85..dd409b704b35 100644
--- a/samples/trace_events/trace_custom_sched.c
+++ b/samples/trace_events/trace_custom_sched.c
@@ -8,7 +8,6 @@
#define pr_fmt(fmt) fmt
#include <linux/trace_events.h>
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/sched.h>
diff --git a/samples/tsm-mr/Makefile b/samples/tsm-mr/Makefile
new file mode 100644
index 000000000000..587c3947b3a7
--- /dev/null
+++ b/samples/tsm-mr/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SAMPLE_TSM_MR) += tsm_mr_sample.o
diff --git a/samples/tsm-mr/tsm_mr_sample.c b/samples/tsm-mr/tsm_mr_sample.c
new file mode 100644
index 000000000000..a2c652148639
--- /dev/null
+++ b/samples/tsm-mr/tsm_mr_sample.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024-2005 Intel Corporation. All rights reserved. */
+
+#define pr_fmt(x) KBUILD_MODNAME ": " x
+
+#include <linux/module.h>
+#include <linux/tsm-mr.h>
+#include <linux/miscdevice.h>
+#include <crypto/hash.h>
+
+static struct {
+ u8 static_mr[SHA384_DIGEST_SIZE];
+ u8 config_mr[SHA512_DIGEST_SIZE];
+ u8 rtmr0[SHA256_DIGEST_SIZE];
+ u8 rtmr1[SHA384_DIGEST_SIZE];
+ u8 report_digest[SHA512_DIGEST_SIZE];
+} sample_report = {
+ .static_mr = "static_mr",
+ .config_mr = "config_mr",
+ .rtmr0 = "rtmr0",
+ .rtmr1 = "rtmr1",
+};
+
+static int sample_report_refresh(const struct tsm_measurements *tm)
+{
+ struct crypto_shash *tfm;
+ int rc;
+
+ tfm = crypto_alloc_shash(hash_algo_name[HASH_ALGO_SHA512], 0, 0);
+ if (IS_ERR(tfm)) {
+ pr_err("crypto_alloc_shash failed: %ld\n", PTR_ERR(tfm));
+ return PTR_ERR(tfm);
+ }
+
+ rc = crypto_shash_tfm_digest(tfm, (u8 *)&sample_report,
+ offsetof(typeof(sample_report),
+ report_digest),
+ sample_report.report_digest);
+ crypto_free_shash(tfm);
+ if (rc)
+ pr_err("crypto_shash_tfm_digest failed: %d\n", rc);
+ return rc;
+}
+
+static int sample_report_extend_mr(const struct tsm_measurements *tm,
+ const struct tsm_measurement_register *mr,
+ const u8 *data)
+{
+ SHASH_DESC_ON_STACK(desc, 0);
+ int rc;
+
+ desc->tfm = crypto_alloc_shash(hash_algo_name[mr->mr_hash], 0, 0);
+ if (IS_ERR(desc->tfm)) {
+ pr_err("crypto_alloc_shash failed: %ld\n", PTR_ERR(desc->tfm));
+ return PTR_ERR(desc->tfm);
+ }
+
+ rc = crypto_shash_init(desc);
+ if (!rc)
+ rc = crypto_shash_update(desc, mr->mr_value, mr->mr_size);
+ if (!rc)
+ rc = crypto_shash_finup(desc, data, mr->mr_size, mr->mr_value);
+ crypto_free_shash(desc->tfm);
+ if (rc)
+ pr_err("SHA calculation failed: %d\n", rc);
+ return rc;
+}
+
+#define MR_(mr, hash) .mr_value = &sample_report.mr, TSM_MR_(mr, hash)
+static const struct tsm_measurement_register sample_mrs[] = {
+ /* static MR, read-only */
+ { MR_(static_mr, SHA384) },
+ /* config MR, read-only */
+ { MR_(config_mr, SHA512) | TSM_MR_F_NOHASH },
+ /* RTMR, direct extension prohibited */
+ { MR_(rtmr0, SHA256) | TSM_MR_F_LIVE },
+ /* RTMR, direct extension allowed */
+ { MR_(rtmr1, SHA384) | TSM_MR_F_RTMR },
+ /* RTMR, crypto agile, alaised to rtmr0 and rtmr1, respectively */
+ { .mr_value = &sample_report.rtmr0,
+ TSM_MR_(rtmr_crypto_agile, SHA256) | TSM_MR_F_RTMR },
+ { .mr_value = &sample_report.rtmr1,
+ TSM_MR_(rtmr_crypto_agile, SHA384) | TSM_MR_F_RTMR },
+ /* sha512 digest of the whole structure */
+ { MR_(report_digest, SHA512) | TSM_MR_F_LIVE },
+};
+#undef MR_
+
+static struct tsm_measurements sample_tm = {
+ .mrs = sample_mrs,
+ .nr_mrs = ARRAY_SIZE(sample_mrs),
+ .refresh = sample_report_refresh,
+ .write = sample_report_extend_mr,
+};
+
+static const struct attribute_group *sample_groups[] = {
+ NULL,
+ NULL,
+};
+
+static struct miscdevice sample_misc_dev = {
+ .name = KBUILD_MODNAME,
+ .minor = MISC_DYNAMIC_MINOR,
+ .groups = sample_groups,
+};
+
+static int __init tsm_mr_sample_init(void)
+{
+ int rc;
+
+ sample_groups[0] = tsm_mr_create_attribute_group(&sample_tm);
+ if (IS_ERR(sample_groups[0]))
+ return PTR_ERR(sample_groups[0]);
+
+ rc = misc_register(&sample_misc_dev);
+ if (rc)
+ tsm_mr_free_attribute_group(sample_groups[0]);
+ return rc;
+}
+
+static void __exit tsm_mr_sample_exit(void)
+{
+ misc_deregister(&sample_misc_dev);
+ tsm_mr_free_attribute_group(sample_groups[0]);
+}
+
+module_init(tsm_mr_sample_init);
+module_exit(tsm_mr_sample_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Sample module using tsm-mr to expose emulated MRs");
diff --git a/samples/user_events/example.c b/samples/user_events/example.c
index d06dc24156ec..28165a096697 100644
--- a/samples/user_events/example.c
+++ b/samples/user_events/example.c
@@ -9,51 +9,28 @@
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/uio.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
-#include <asm/bitsperlong.h>
-#include <endian.h>
#include <linux/user_events.h>
-#if __BITS_PER_LONG == 64
-#define endian_swap(x) htole64(x)
-#else
-#define endian_swap(x) htole32(x)
-#endif
+const char *data_file = "/sys/kernel/tracing/user_events_data";
+int enabled = 0;
-/* Assumes debugfs is mounted */
-const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
-const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
-
-static int event_status(long **status)
-{
- int fd = open(status_file, O_RDONLY);
-
- *status = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ,
- MAP_SHARED, fd, 0);
-
- close(fd);
-
- if (*status == MAP_FAILED)
- return -1;
-
- return 0;
-}
-
-static int event_reg(int fd, const char *command, long *index, long *mask,
- int *write)
+static int event_reg(int fd, const char *command, int *write, int *enabled)
{
struct user_reg reg = {0};
reg.size = sizeof(reg);
+ reg.enable_bit = 31;
+ reg.enable_size = sizeof(*enabled);
+ reg.enable_addr = (__u64)enabled;
reg.name_args = (__u64)command;
if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
return -1;
- *index = reg.status_bit / __BITS_PER_LONG;
- *mask = endian_swap(1L << (reg.status_bit % __BITS_PER_LONG));
*write = reg.write_index;
return 0;
@@ -62,17 +39,12 @@ static int event_reg(int fd, const char *command, long *index, long *mask,
int main(int argc, char **argv)
{
int data_fd, write;
- long index, mask;
- long *status_page;
struct iovec io[2];
__u32 count = 0;
- if (event_status(&status_page) == -1)
- return errno;
-
data_fd = open(data_file, O_RDWR);
- if (event_reg(data_fd, "test u32 count", &index, &mask, &write) == -1)
+ if (event_reg(data_fd, "test u32 count", &write, &enabled) == -1)
return errno;
/* Setup iovec */
@@ -80,13 +52,12 @@ int main(int argc, char **argv)
io[0].iov_len = sizeof(write);
io[1].iov_base = &count;
io[1].iov_len = sizeof(count);
-
ask:
printf("Press enter to check status...\n");
getchar();
/* Check if anyone is listening */
- if (status_page[index] & mask) {
+ if (enabled) {
/* Yep, trace out our data */
writev(data_fd, (const struct iovec *)io, 2);
diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c
index a61f94db18d9..69925d30329e 100644
--- a/samples/v4l/v4l2-pci-skeleton.c
+++ b/samples/v4l/v4l2-pci-skeleton.c
@@ -155,6 +155,7 @@ static int queue_setup(struct vb2_queue *vq,
unsigned int sizes[], struct device *alloc_devs[])
{
struct skeleton *skel = vb2_get_drv_priv(vq);
+ unsigned int q_num_bufs = vb2_get_num_buffers(vq);
skel->field = skel->format.field;
if (skel->field == V4L2_FIELD_ALTERNATE) {
@@ -167,8 +168,8 @@ static int queue_setup(struct vb2_queue *vq,
skel->field = V4L2_FIELD_TOP;
}
- if (vq->num_buffers + *nbuffers < 3)
- *nbuffers = 3 - vq->num_buffers;
+ if (q_num_bufs + *nbuffers < 3)
+ *nbuffers = 3 - q_num_bufs;
if (*nplanes)
return sizes[0] < skel->format.sizeimage ? -EINVAL : 0;
@@ -268,9 +269,7 @@ static void stop_streaming(struct vb2_queue *vq)
}
/*
- * The vb2 queue ops. Note that since q->lock is set we can use the standard
- * vb2_ops_wait_prepare/finish helper functions. If q->lock would be NULL,
- * then this driver would have to provide these ops.
+ * The vb2 queue ops.
*/
static const struct vb2_ops skel_qops = {
.queue_setup = queue_setup,
@@ -278,8 +277,6 @@ static const struct vb2_ops skel_qops = {
.buf_queue = buffer_queue,
.start_streaming = start_streaming,
.stop_streaming = stop_streaming,
- .wait_prepare = vb2_ops_wait_prepare,
- .wait_finish = vb2_ops_wait_finish,
};
/*
@@ -291,8 +288,8 @@ static int skeleton_querycap(struct file *file, void *priv,
{
struct skeleton *skel = video_drvdata(file);
- strlcpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver));
- strlcpy(cap->card, "V4L2 PCI Skeleton", sizeof(cap->card));
+ strscpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver));
+ strscpy(cap->card, "V4L2 PCI Skeleton", sizeof(cap->card));
snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s",
pci_name(skel->pdev));
return 0;
@@ -473,7 +470,7 @@ static int skeleton_querystd(struct file *file, void *priv, v4l2_std_id *std)
return 0;
}
-static int skeleton_s_dv_timings(struct file *file, void *_fh,
+static int skeleton_s_dv_timings(struct file *file, void *priv,
struct v4l2_dv_timings *timings)
{
struct skeleton *skel = video_drvdata(file);
@@ -512,7 +509,7 @@ static int skeleton_s_dv_timings(struct file *file, void *_fh,
return 0;
}
-static int skeleton_g_dv_timings(struct file *file, void *_fh,
+static int skeleton_g_dv_timings(struct file *file, void *priv,
struct v4l2_dv_timings *timings)
{
struct skeleton *skel = video_drvdata(file);
@@ -525,7 +522,7 @@ static int skeleton_g_dv_timings(struct file *file, void *_fh,
return 0;
}
-static int skeleton_enum_dv_timings(struct file *file, void *_fh,
+static int skeleton_enum_dv_timings(struct file *file, void *priv,
struct v4l2_enum_dv_timings *timings)
{
struct skeleton *skel = video_drvdata(file);
@@ -547,7 +544,7 @@ static int skeleton_enum_dv_timings(struct file *file, void *_fh,
* can lock but that the DMA engine it is connected to cannot handle
* pixelclocks above a certain frequency), then -ERANGE is returned.
*/
-static int skeleton_query_dv_timings(struct file *file, void *_fh,
+static int skeleton_query_dv_timings(struct file *file, void *priv,
struct v4l2_dv_timings *timings)
{
struct skeleton *skel = video_drvdata(file);
@@ -576,7 +573,7 @@ static int skeleton_query_dv_timings(struct file *file, void *_fh,
return 0;
}
-static int skeleton_dv_timings_cap(struct file *file, void *fh,
+static int skeleton_dv_timings_cap(struct file *file, void *priv,
struct v4l2_dv_timings_cap *cap)
{
struct skeleton *skel = video_drvdata(file);
@@ -597,11 +594,11 @@ static int skeleton_enum_input(struct file *file, void *priv,
i->type = V4L2_INPUT_TYPE_CAMERA;
if (i->index == 0) {
i->std = SKEL_TVNORMS;
- strlcpy(i->name, "S-Video", sizeof(i->name));
+ strscpy(i->name, "S-Video", sizeof(i->name));
i->capabilities = V4L2_IN_CAP_STD;
} else {
i->std = 0;
- strlcpy(i->name, "HDMI", sizeof(i->name));
+ strscpy(i->name, "HDMI", sizeof(i->name));
i->capabilities = V4L2_IN_CAP_DV_TIMINGS;
}
return 0;
@@ -820,7 +817,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* available before it can be started. The start_streaming() op
* won't be called until at least this many buffers are queued up.
*/
- q->min_buffers_needed = 2;
+ q->min_queued_buffers = 2;
/*
* The serialization lock for the streaming ioctls. This is the same
* as the main serialization lock, but if some of the non-streaming
@@ -845,7 +842,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Initialize the video_device structure */
vdev = &skel->vdev;
- strlcpy(vdev->name, KBUILD_MODNAME, sizeof(vdev->name));
+ strscpy(vdev->name, KBUILD_MODNAME, sizeof(vdev->name));
/*
* There is nothing to clean up, so release is set to an empty release
* function. The release callback must be non-NULL.
diff --git a/samples/vfio-mdev/README.rst b/samples/vfio-mdev/README.rst
new file mode 100644
index 000000000000..b52eb37739c0
--- /dev/null
+++ b/samples/vfio-mdev/README.rst
@@ -0,0 +1,100 @@
+Using the mtty vfio-mdev sample code
+====================================
+
+mtty is a sample vfio-mdev driver that demonstrates how to use the mediated
+device framework.
+
+The sample driver creates an mdev device that simulates a serial port over a PCI
+card.
+
+1. Build and load the mtty.ko module.
+
+ This step creates a dummy device, /sys/devices/virtual/mtty/mtty/
+
+ Files in this device directory in sysfs are similar to the following::
+
+ # tree /sys/devices/virtual/mtty/mtty/
+ /sys/devices/virtual/mtty/mtty/
+ |-- mdev_supported_types
+ | |-- mtty-1
+ | | |-- available_instances
+ | | |-- create
+ | | |-- device_api
+ | | |-- devices
+ | | `-- name
+ | `-- mtty-2
+ | |-- available_instances
+ | |-- create
+ | |-- device_api
+ | |-- devices
+ | `-- name
+ |-- mtty_dev
+ | `-- sample_mtty_dev
+ |-- power
+ | |-- autosuspend_delay_ms
+ | |-- control
+ | |-- runtime_active_time
+ | |-- runtime_status
+ | `-- runtime_suspended_time
+ |-- subsystem -> ../../../../class/mtty
+ `-- uevent
+
+2. Create a mediated device by using the dummy device that you created in the
+ previous step::
+
+ # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \
+ /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create
+
+3. Add parameters to qemu-kvm::
+
+ -device vfio-pci,\
+ sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
+
+4. Boot the VM.
+
+ In the Linux guest VM, with no hardware on the host, the device appears
+ as follows::
+
+ # lspci -s 00:05.0 -xxvv
+ 00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
+ Subsystem: Device 4348:3253
+ Physical Slot: 5
+ Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
+ Stepping- SERR- FastB2B- DisINTx-
+ Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
+ <TAbort- <MAbort- >SERR- <PERR- INTx-
+ Interrupt: pin A routed to IRQ 10
+ Region 0: I/O ports at c150 [size=8]
+ Region 1: I/O ports at c158 [size=8]
+ Kernel driver in use: serial
+ 00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
+ 10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
+ 20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
+ 30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
+
+ In the Linux guest VM, dmesg output for the device is as follows:
+
+ serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
+ 0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
+ 0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
+
+
+5. In the Linux guest VM, check the serial ports::
+
+ # setserial -g /dev/ttyS*
+ /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
+ /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
+ /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
+
+6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
+ /dev/ttyS2 with hardware flow control disabled.
+
+7. Type data on the minicom terminal or send data to the terminal emulation
+ program and read the data.
+
+ Data is loop backed from hosts mtty driver.
+
+8. Destroy the mediated device that you created::
+
+ # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
+
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index e54eb752e1ba..64ea19253ee3 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -88,6 +88,7 @@
#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+MODULE_DESCRIPTION("Mediated virtual PCI display host device driver");
MODULE_LICENSE("GPL v2");
static int max_mbytes = 256;
@@ -133,18 +134,15 @@ static struct mdev_type *mbochs_mdev_types[] = {
};
static dev_t mbochs_devt;
-static struct class *mbochs_class;
+static const struct class mbochs_class = {
+ .name = MBOCHS_CLASS_NAME,
+};
static struct cdev mbochs_cdev;
static struct device mbochs_dev;
static struct mdev_parent mbochs_parent;
static atomic_t mbochs_avail_mbytes;
static const struct vfio_device_ops mbochs_dev_ops;
-struct vfio_region_info_ext {
- struct vfio_region_info base;
- struct vfio_region_info_cap_type type;
-};
-
struct mbochs_mode {
u32 drm_format;
u32 bytepp;
@@ -1030,10 +1028,12 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
return 0;
}
-static int mbochs_get_region_info(struct mdev_state *mdev_state,
- struct vfio_region_info_ext *ext)
+static int mbochs_ioctl_get_region_info(struct vfio_device *vdev,
+ struct vfio_region_info *region_info,
+ struct vfio_info_cap *caps)
{
- struct vfio_region_info *region_info = &ext->base;
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
if (region_info->index >= MBOCHS_NUM_REGIONS)
return -EINVAL;
@@ -1058,20 +1058,23 @@ static int mbochs_get_region_info(struct mdev_state *mdev_state,
region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE);
break;
- case MBOCHS_EDID_REGION_INDEX:
- ext->base.argsz = sizeof(*ext);
- ext->base.offset = MBOCHS_EDID_OFFSET;
- ext->base.size = MBOCHS_EDID_SIZE;
- ext->base.flags = (VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE |
- VFIO_REGION_INFO_FLAG_CAPS);
- ext->base.cap_offset = offsetof(typeof(*ext), type);
- ext->type.header.id = VFIO_REGION_INFO_CAP_TYPE;
- ext->type.header.version = 1;
- ext->type.header.next = 0;
- ext->type.type = VFIO_REGION_TYPE_GFX;
- ext->type.subtype = VFIO_REGION_SUBTYPE_GFX_EDID;
- break;
+ case MBOCHS_EDID_REGION_INDEX: {
+ struct vfio_region_info_cap_type cap_type = {
+ .header.id = VFIO_REGION_INFO_CAP_TYPE,
+ .header.version = 1,
+ .type = VFIO_REGION_TYPE_GFX,
+ .subtype = VFIO_REGION_SUBTYPE_GFX_EDID,
+ };
+
+ region_info->offset = MBOCHS_EDID_OFFSET;
+ region_info->size = MBOCHS_EDID_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_CAPS);
+
+ return vfio_info_add_capability(caps, &cap_type.header,
+ sizeof(cap_type));
+ }
default:
region_info->size = 0;
region_info->offset = 0;
@@ -1188,7 +1191,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
struct mdev_state *mdev_state =
container_of(vdev, struct mdev_state, vdev);
int ret = 0;
- unsigned long minsz, outsz;
+ unsigned long minsz;
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
@@ -1212,30 +1215,6 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
return 0;
}
- case VFIO_DEVICE_GET_REGION_INFO:
- {
- struct vfio_region_info_ext info;
-
- minsz = offsetofend(typeof(info), base.offset);
-
- if (copy_from_user(&info, (void __user *)arg, minsz))
- return -EFAULT;
-
- outsz = info.base.argsz;
- if (outsz < minsz)
- return -EINVAL;
- if (outsz > sizeof(info))
- return -EINVAL;
-
- ret = mbochs_get_region_info(mdev_state, &info);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *)arg, &info, outsz))
- return -EFAULT;
-
- return 0;
- }
case VFIO_DEVICE_GET_IRQ_INFO:
{
@@ -1262,7 +1241,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
case VFIO_DEVICE_QUERY_GFX_PLANE:
{
- struct vfio_device_gfx_plane_info plane;
+ struct vfio_device_gfx_plane_info plane = {};
minsz = offsetofend(struct vfio_device_gfx_plane_info,
region_index);
@@ -1373,7 +1352,12 @@ static const struct vfio_device_ops mbochs_dev_ops = {
.read = mbochs_read,
.write = mbochs_write,
.ioctl = mbochs_ioctl,
+ .get_region_info_caps = mbochs_ioctl_get_region_info,
.mmap = mbochs_mmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
+ .detach_ioas = vfio_iommufd_emulated_detach_ioas,
};
static struct mdev_driver mbochs_driver = {
@@ -1418,13 +1402,10 @@ static int __init mbochs_dev_init(void)
if (ret)
goto err_cdev;
- mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME);
- if (IS_ERR(mbochs_class)) {
- pr_err("Error: failed to register mbochs_dev class\n");
- ret = PTR_ERR(mbochs_class);
+ ret = class_register(&mbochs_class);
+ if (ret)
goto err_driver;
- }
- mbochs_dev.class = mbochs_class;
+ mbochs_dev.class = &mbochs_class;
mbochs_dev.release = mbochs_device_release;
dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME);
@@ -1444,7 +1425,7 @@ err_device:
device_del(&mbochs_dev);
err_put:
put_device(&mbochs_dev);
- class_destroy(mbochs_class);
+ class_unregister(&mbochs_class);
err_driver:
mdev_unregister_driver(&mbochs_driver);
err_cdev:
@@ -1462,10 +1443,9 @@ static void __exit mbochs_dev_exit(void)
mdev_unregister_driver(&mbochs_driver);
cdev_del(&mbochs_cdev);
unregister_chrdev_region(mbochs_devt, MINORMASK + 1);
- class_destroy(mbochs_class);
- mbochs_class = NULL;
+ class_unregister(&mbochs_class);
}
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
module_init(mbochs_dev_init)
module_exit(mbochs_dev_exit)
diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index 3c8001b9e407..149af7f598f8 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -88,11 +88,9 @@ static void mdpy_fb_destroy(struct fb_info *info)
static const struct fb_ops mdpy_fb_ops = {
.owner = THIS_MODULE,
+ FB_DEFAULT_IOMEM_OPS,
.fb_destroy = mdpy_fb_destroy,
.fb_setcolreg = mdpy_fb_setcolreg,
- .fb_fillrect = cfb_fillrect,
- .fb_copyarea = cfb_copyarea,
- .fb_imageblit = cfb_imageblit,
};
static int mdpy_fb_probe(struct pci_dev *pdev,
@@ -162,7 +160,6 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
}
info->fbops = &mdpy_fb_ops;
- info->flags = FBINFO_DEFAULT;
info->pseudo_palette = par->palette;
ret = register_framebuffer(info);
@@ -232,4 +229,5 @@ static int __init mdpy_fb_init(void)
module_init(mdpy_fb_init);
MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table);
+MODULE_DESCRIPTION("Framebuffer driver for mdpy (mediated virtual pci display device)");
MODULE_LICENSE("GPL v2");
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index e8400fdab71d..0759bd68edca 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -40,6 +40,7 @@
#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+MODULE_DESCRIPTION("Mediated virtual PCI display host device driver");
MODULE_LICENSE("GPL v2");
#define MDPY_TYPE_1 "vga"
@@ -84,7 +85,9 @@ static struct mdev_type *mdpy_mdev_types[] = {
};
static dev_t mdpy_devt;
-static struct class *mdpy_class;
+static const struct class mdpy_class = {
+ .name = MDPY_CLASS_NAME,
+};
static struct cdev mdpy_cdev;
static struct device mdpy_dev;
static struct mdev_parent mdpy_parent;
@@ -432,10 +435,13 @@ static int mdpy_mmap(struct vfio_device *vdev, struct vm_area_struct *vma)
return remap_vmalloc_range(vma, mdev_state->memblk, 0);
}
-static int mdpy_get_region_info(struct mdev_state *mdev_state,
- struct vfio_region_info *region_info,
- u16 *cap_type_id, void **cap_type)
+static int mdpy_ioctl_get_region_info(struct vfio_device *vdev,
+ struct vfio_region_info *region_info,
+ struct vfio_info_cap *caps)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
if (region_info->index >= VFIO_PCI_NUM_REGIONS &&
region_info->index != MDPY_DISPLAY_REGION)
return -EINVAL;
@@ -541,30 +547,6 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd,
return 0;
}
- case VFIO_DEVICE_GET_REGION_INFO:
- {
- struct vfio_region_info info;
- u16 cap_type_id = 0;
- void *cap_type = NULL;
-
- minsz = offsetofend(struct vfio_region_info, offset);
-
- if (copy_from_user(&info, (void __user *)arg, minsz))
- return -EFAULT;
-
- if (info.argsz < minsz)
- return -EINVAL;
-
- ret = mdpy_get_region_info(mdev_state, &info, &cap_type_id,
- &cap_type);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *)arg, &info, minsz))
- return -EFAULT;
-
- return 0;
- }
case VFIO_DEVICE_GET_IRQ_INFO:
{
@@ -591,7 +573,7 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd,
case VFIO_DEVICE_QUERY_GFX_PLANE:
{
- struct vfio_device_gfx_plane_info plane;
+ struct vfio_device_gfx_plane_info plane = {};
minsz = offsetofend(struct vfio_device_gfx_plane_info,
region_index);
@@ -662,7 +644,12 @@ static const struct vfio_device_ops mdpy_dev_ops = {
.read = mdpy_read,
.write = mdpy_write,
.ioctl = mdpy_ioctl,
+ .get_region_info_caps = mdpy_ioctl_get_region_info,
.mmap = mdpy_mmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
+ .detach_ioas = vfio_iommufd_emulated_detach_ioas,
};
static struct mdev_driver mdpy_driver = {
@@ -705,13 +692,10 @@ static int __init mdpy_dev_init(void)
if (ret)
goto err_cdev;
- mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME);
- if (IS_ERR(mdpy_class)) {
- pr_err("Error: failed to register mdpy_dev class\n");
- ret = PTR_ERR(mdpy_class);
+ ret = class_register(&mdpy_class);
+ if (ret)
goto err_driver;
- }
- mdpy_dev.class = mdpy_class;
+ mdpy_dev.class = &mdpy_class;
mdpy_dev.release = mdpy_device_release;
dev_set_name(&mdpy_dev, "%s", MDPY_NAME);
@@ -731,7 +715,7 @@ err_device:
device_del(&mdpy_dev);
err_put:
put_device(&mdpy_dev);
- class_destroy(mdpy_class);
+ class_unregister(&mdpy_class);
err_driver:
mdev_unregister_driver(&mdpy_driver);
err_cdev:
@@ -749,8 +733,7 @@ static void __exit mdpy_dev_exit(void)
mdev_unregister_driver(&mdpy_driver);
cdev_del(&mdpy_cdev);
unregister_chrdev_region(mdpy_devt, MINORMASK + 1);
- class_destroy(mdpy_class);
- mdpy_class = NULL;
+ class_unregister(&mdpy_class);
}
module_param_named(count, mdpy_driver.max_instances, int, 0444);
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index e887de672c52..bd92c38379b8 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -29,6 +29,8 @@
#include <linux/serial.h>
#include <uapi/linux/serial_reg.h>
#include <linux/eventfd.h>
+#include <linux/anon_inodes.h>
+
/*
* #defines
*/
@@ -124,10 +126,32 @@ struct serial_port {
u8 intr_trigger_level; /* interrupt trigger level */
};
+struct mtty_data {
+ u64 magic;
+#define MTTY_MAGIC 0x7e9d09898c3e2c4e /* Nothing clever, just random */
+ u32 major_ver;
+#define MTTY_MAJOR_VER 1
+ u32 minor_ver;
+#define MTTY_MINOR_VER 0
+ u32 nr_ports;
+ u32 flags;
+ struct serial_port ports[2];
+};
+
+struct mdev_state;
+
+struct mtty_migration_file {
+ struct file *filp;
+ struct mutex lock;
+ struct mdev_state *mdev_state;
+ struct mtty_data data;
+ ssize_t filled_size;
+ u8 disabled:1;
+};
+
/* State of each mdev device */
struct mdev_state {
struct vfio_device vdev;
- int irq_fd;
struct eventfd_ctx *intx_evtfd;
struct eventfd_ctx *msi_evtfd;
int irq_index;
@@ -141,6 +165,13 @@ struct mdev_state {
struct mutex rxtx_lock;
struct vfio_device_info dev_info;
int nr_ports;
+ enum vfio_device_mig_state state;
+ struct mutex state_mutex;
+ struct mutex reset_mutex;
+ struct mtty_migration_file *saving_migf;
+ struct mtty_migration_file *resuming_migf;
+ u8 deferred_reset:1;
+ u8 intx_mask:1;
};
static struct mtty_type {
@@ -166,10 +197,6 @@ static const struct file_operations vd_fops = {
static const struct vfio_device_ops mtty_dev_ops;
-/* function prototypes */
-
-static int mtty_trigger_interrupt(struct mdev_state *mdev_state);
-
/* Helper functions */
static void dump_buffer(u8 *buf, uint32_t count)
@@ -186,6 +213,36 @@ static void dump_buffer(u8 *buf, uint32_t count)
#endif
}
+static bool is_intx(struct mdev_state *mdev_state)
+{
+ return mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX;
+}
+
+static bool is_msi(struct mdev_state *mdev_state)
+{
+ return mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX;
+}
+
+static bool is_noirq(struct mdev_state *mdev_state)
+{
+ return !is_intx(mdev_state) && !is_msi(mdev_state);
+}
+
+static void mtty_trigger_interrupt(struct mdev_state *mdev_state)
+{
+ lockdep_assert_held(&mdev_state->ops_lock);
+
+ if (is_msi(mdev_state)) {
+ if (mdev_state->msi_evtfd)
+ eventfd_signal(mdev_state->msi_evtfd);
+ } else if (is_intx(mdev_state)) {
+ if (mdev_state->intx_evtfd && !mdev_state->intx_mask) {
+ eventfd_signal(mdev_state->intx_evtfd);
+ mdev_state->intx_mask = true;
+ }
+ }
+}
+
static void mtty_create_config_space(struct mdev_state *mdev_state)
{
/* PCI dev ID */
@@ -567,7 +624,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
u8 lsr = 0;
mutex_lock(&mdev_state->rxtx_lock);
- /* atleast one char in FIFO */
+ /* at least one char in FIFO */
if (mdev_state->s[index].rxtx.head !=
mdev_state->s[index].rxtx.tail)
lsr |= UART_LSR_DR;
@@ -717,6 +774,541 @@ accessfailed:
return ret;
}
+static size_t mtty_data_size(struct mdev_state *mdev_state)
+{
+ return offsetof(struct mtty_data, ports) +
+ (mdev_state->nr_ports * sizeof(struct serial_port));
+}
+
+static void mtty_disable_file(struct mtty_migration_file *migf)
+{
+ mutex_lock(&migf->lock);
+ migf->disabled = true;
+ migf->filled_size = 0;
+ migf->filp->f_pos = 0;
+ mutex_unlock(&migf->lock);
+}
+
+static void mtty_disable_files(struct mdev_state *mdev_state)
+{
+ if (mdev_state->saving_migf) {
+ mtty_disable_file(mdev_state->saving_migf);
+ fput(mdev_state->saving_migf->filp);
+ mdev_state->saving_migf = NULL;
+ }
+
+ if (mdev_state->resuming_migf) {
+ mtty_disable_file(mdev_state->resuming_migf);
+ fput(mdev_state->resuming_migf->filp);
+ mdev_state->resuming_migf = NULL;
+ }
+}
+
+static void mtty_state_mutex_unlock(struct mdev_state *mdev_state)
+{
+again:
+ mutex_lock(&mdev_state->reset_mutex);
+ if (mdev_state->deferred_reset) {
+ mdev_state->deferred_reset = false;
+ mutex_unlock(&mdev_state->reset_mutex);
+ mdev_state->state = VFIO_DEVICE_STATE_RUNNING;
+ mtty_disable_files(mdev_state);
+ goto again;
+ }
+ mutex_unlock(&mdev_state->state_mutex);
+ mutex_unlock(&mdev_state->reset_mutex);
+}
+
+static int mtty_release_migf(struct inode *inode, struct file *filp)
+{
+ struct mtty_migration_file *migf = filp->private_data;
+
+ mtty_disable_file(migf);
+ mutex_destroy(&migf->lock);
+ kfree(migf);
+
+ return 0;
+}
+
+static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct mtty_migration_file *migf = filp->private_data;
+ struct mdev_state *mdev_state = migf->mdev_state;
+ loff_t *pos = &filp->f_pos;
+ struct vfio_precopy_info info = {};
+ unsigned long minsz;
+ int ret;
+
+ if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+ return -ENOTTY;
+
+ minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ mutex_lock(&mdev_state->state_mutex);
+ if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY &&
+ mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ mutex_lock(&migf->lock);
+
+ if (migf->disabled) {
+ mutex_unlock(&migf->lock);
+ ret = -ENODEV;
+ goto unlock;
+ }
+
+ if (*pos > migf->filled_size) {
+ mutex_unlock(&migf->lock);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ info.dirty_bytes = 0;
+ info.initial_bytes = migf->filled_size - *pos;
+ mutex_unlock(&migf->lock);
+
+ ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+unlock:
+ mtty_state_mutex_unlock(mdev_state);
+ return ret;
+}
+
+static ssize_t mtty_save_read(struct file *filp, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct mtty_migration_file *migf = filp->private_data;
+ ssize_t ret = 0;
+
+ if (pos)
+ return -ESPIPE;
+
+ pos = &filp->f_pos;
+
+ mutex_lock(&migf->lock);
+
+ dev_dbg(migf->mdev_state->vdev.dev, "%s ask %zu\n", __func__, len);
+
+ if (migf->disabled) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (*pos > migf->filled_size) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ len = min_t(size_t, migf->filled_size - *pos, len);
+ if (len) {
+ if (copy_to_user(buf, (void *)&migf->data + *pos, len)) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += len;
+ ret = len;
+ }
+out_unlock:
+ dev_dbg(migf->mdev_state->vdev.dev, "%s read %zu\n", __func__, ret);
+ mutex_unlock(&migf->lock);
+ return ret;
+}
+
+static const struct file_operations mtty_save_fops = {
+ .owner = THIS_MODULE,
+ .read = mtty_save_read,
+ .unlocked_ioctl = mtty_precopy_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .release = mtty_release_migf,
+};
+
+static void mtty_save_state(struct mdev_state *mdev_state)
+{
+ struct mtty_migration_file *migf = mdev_state->saving_migf;
+ int i;
+
+ mutex_lock(&migf->lock);
+ for (i = 0; i < mdev_state->nr_ports; i++) {
+ memcpy(&migf->data.ports[i],
+ &mdev_state->s[i], sizeof(struct serial_port));
+ migf->filled_size += sizeof(struct serial_port);
+ }
+ dev_dbg(mdev_state->vdev.dev,
+ "%s filled to %zu\n", __func__, migf->filled_size);
+ mutex_unlock(&migf->lock);
+}
+
+static int mtty_load_state(struct mdev_state *mdev_state)
+{
+ struct mtty_migration_file *migf = mdev_state->resuming_migf;
+ int i;
+
+ mutex_lock(&migf->lock);
+ /* magic and version already tested by resume write fn */
+ if (migf->filled_size < mtty_data_size(mdev_state)) {
+ dev_dbg(mdev_state->vdev.dev, "%s expected %zu, got %zu\n",
+ __func__, mtty_data_size(mdev_state),
+ migf->filled_size);
+ mutex_unlock(&migf->lock);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < mdev_state->nr_ports; i++)
+ memcpy(&mdev_state->s[i],
+ &migf->data.ports[i], sizeof(struct serial_port));
+
+ mutex_unlock(&migf->lock);
+ return 0;
+}
+
+static struct mtty_migration_file *
+mtty_save_device_data(struct mdev_state *mdev_state,
+ enum vfio_device_mig_state state)
+{
+ struct mtty_migration_file *migf = mdev_state->saving_migf;
+ struct mtty_migration_file *ret = NULL;
+
+ if (migf) {
+ if (state == VFIO_DEVICE_STATE_STOP_COPY)
+ goto fill_data;
+ return ret;
+ }
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("mtty_mig", &mtty_save_fops,
+ migf, O_RDONLY);
+ if (IS_ERR(migf->filp)) {
+ int rc = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(rc);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ migf->mdev_state = mdev_state;
+
+ migf->data.magic = MTTY_MAGIC;
+ migf->data.major_ver = MTTY_MAJOR_VER;
+ migf->data.minor_ver = MTTY_MINOR_VER;
+ migf->data.nr_ports = mdev_state->nr_ports;
+
+ migf->filled_size = offsetof(struct mtty_data, ports);
+
+ dev_dbg(mdev_state->vdev.dev, "%s filled header to %zu\n",
+ __func__, migf->filled_size);
+
+ ret = mdev_state->saving_migf = migf;
+
+fill_data:
+ if (state == VFIO_DEVICE_STATE_STOP_COPY)
+ mtty_save_state(mdev_state);
+
+ return ret;
+}
+
+static ssize_t mtty_resume_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct mtty_migration_file *migf = filp->private_data;
+ struct mdev_state *mdev_state = migf->mdev_state;
+ loff_t requested_length;
+ ssize_t ret = 0;
+
+ if (pos)
+ return -ESPIPE;
+
+ pos = &filp->f_pos;
+
+ if (*pos < 0 ||
+ check_add_overflow((loff_t)len, *pos, &requested_length))
+ return -EINVAL;
+
+ if (requested_length > mtty_data_size(mdev_state))
+ return -ENOMEM;
+
+ mutex_lock(&migf->lock);
+
+ if (migf->disabled) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (copy_from_user((void *)&migf->data + *pos, buf, len)) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ *pos += len;
+ ret = len;
+
+ dev_dbg(migf->mdev_state->vdev.dev, "%s received %zu, total %zu\n",
+ __func__, len, migf->filled_size + len);
+
+ if (migf->filled_size < offsetof(struct mtty_data, ports) &&
+ migf->filled_size + len >= offsetof(struct mtty_data, ports)) {
+ if (migf->data.magic != MTTY_MAGIC || migf->data.flags ||
+ migf->data.major_ver != MTTY_MAJOR_VER ||
+ migf->data.minor_ver != MTTY_MINOR_VER ||
+ migf->data.nr_ports != mdev_state->nr_ports) {
+ dev_dbg(migf->mdev_state->vdev.dev,
+ "%s failed validation\n", __func__);
+ ret = -EFAULT;
+ } else {
+ dev_dbg(migf->mdev_state->vdev.dev,
+ "%s header validated\n", __func__);
+ }
+ }
+
+ migf->filled_size += len;
+
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return ret;
+}
+
+static const struct file_operations mtty_resume_fops = {
+ .owner = THIS_MODULE,
+ .write = mtty_resume_write,
+ .release = mtty_release_migf,
+};
+
+static struct mtty_migration_file *
+mtty_resume_device_data(struct mdev_state *mdev_state)
+{
+ struct mtty_migration_file *migf;
+ int ret;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("mtty_mig", &mtty_resume_fops,
+ migf, O_WRONLY);
+ if (IS_ERR(migf->filp)) {
+ ret = PTR_ERR(migf->filp);
+ kfree(migf);
+ return ERR_PTR(ret);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ migf->mdev_state = mdev_state;
+
+ mdev_state->resuming_migf = migf;
+
+ return migf;
+}
+
+static struct file *mtty_step_state(struct mdev_state *mdev_state,
+ enum vfio_device_mig_state new)
+{
+ enum vfio_device_mig_state cur = mdev_state->state;
+
+ dev_dbg(mdev_state->vdev.dev, "%s: %d -> %d\n", __func__, cur, new);
+
+ /*
+ * The following state transitions are no-op considering
+ * mtty does not do DMA nor require any explicit start/stop.
+ *
+ * RUNNING -> RUNNING_P2P
+ * RUNNING_P2P -> RUNNING
+ * RUNNING_P2P -> STOP
+ * PRE_COPY -> PRE_COPY_P2P
+ * PRE_COPY_P2P -> PRE_COPY
+ * STOP -> RUNNING_P2P
+ */
+ if ((cur == VFIO_DEVICE_STATE_RUNNING &&
+ new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
+ (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
+ (new == VFIO_DEVICE_STATE_RUNNING ||
+ new == VFIO_DEVICE_STATE_STOP)) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY &&
+ new == VFIO_DEVICE_STATE_PRE_COPY_P2P) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
+ new == VFIO_DEVICE_STATE_PRE_COPY) ||
+ (cur == VFIO_DEVICE_STATE_STOP &&
+ new == VFIO_DEVICE_STATE_RUNNING_P2P))
+ return NULL;
+
+ /*
+ * The following state transitions simply close migration files,
+ * with the exception of RESUMING -> STOP, which needs to load
+ * the state first.
+ *
+ * RESUMING -> STOP
+ * PRE_COPY -> RUNNING
+ * PRE_COPY_P2P -> RUNNING_P2P
+ * STOP_COPY -> STOP
+ */
+ if (cur == VFIO_DEVICE_STATE_RESUMING &&
+ new == VFIO_DEVICE_STATE_STOP) {
+ int ret;
+
+ ret = mtty_load_state(mdev_state);
+ if (ret)
+ return ERR_PTR(ret);
+ mtty_disable_files(mdev_state);
+ return NULL;
+ }
+
+ if ((cur == VFIO_DEVICE_STATE_PRE_COPY &&
+ new == VFIO_DEVICE_STATE_RUNNING) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
+ new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
+ (cur == VFIO_DEVICE_STATE_STOP_COPY &&
+ new == VFIO_DEVICE_STATE_STOP)) {
+ mtty_disable_files(mdev_state);
+ return NULL;
+ }
+
+ /*
+ * The following state transitions return migration files.
+ *
+ * RUNNING -> PRE_COPY
+ * RUNNING_P2P -> PRE_COPY_P2P
+ * STOP -> STOP_COPY
+ * STOP -> RESUMING
+ * PRE_COPY_P2P -> STOP_COPY
+ */
+ if ((cur == VFIO_DEVICE_STATE_RUNNING &&
+ new == VFIO_DEVICE_STATE_PRE_COPY) ||
+ (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
+ new == VFIO_DEVICE_STATE_PRE_COPY_P2P) ||
+ (cur == VFIO_DEVICE_STATE_STOP &&
+ new == VFIO_DEVICE_STATE_STOP_COPY) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
+ new == VFIO_DEVICE_STATE_STOP_COPY)) {
+ struct mtty_migration_file *migf;
+
+ migf = mtty_save_device_data(mdev_state, new);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+
+ if (migf) {
+ get_file(migf->filp);
+
+ return migf->filp;
+ }
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP &&
+ new == VFIO_DEVICE_STATE_RESUMING) {
+ struct mtty_migration_file *migf;
+
+ migf = mtty_resume_device_data(mdev_state);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+
+ get_file(migf->filp);
+
+ return migf->filp;
+ }
+
+ /* vfio_mig_get_next_state() does not use arcs other than the above */
+ WARN_ON(true);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct file *mtty_set_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state new_state)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct file *ret = NULL;
+
+ dev_dbg(vdev->dev, "%s -> %d\n", __func__, new_state);
+
+ mutex_lock(&mdev_state->state_mutex);
+ while (mdev_state->state != new_state) {
+ enum vfio_device_mig_state next_state;
+ int rc = vfio_mig_get_next_state(vdev, mdev_state->state,
+ new_state, &next_state);
+ if (rc) {
+ ret = ERR_PTR(rc);
+ break;
+ }
+
+ ret = mtty_step_state(mdev_state, next_state);
+ if (IS_ERR(ret))
+ break;
+
+ mdev_state->state = next_state;
+
+ if (WARN_ON(ret && new_state != next_state)) {
+ fput(ret);
+ ret = ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+ mtty_state_mutex_unlock(mdev_state);
+ return ret;
+}
+
+static int mtty_get_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state *current_state)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ mutex_lock(&mdev_state->state_mutex);
+ *current_state = mdev_state->state;
+ mtty_state_mutex_unlock(mdev_state);
+ return 0;
+}
+
+static int mtty_get_data_size(struct vfio_device *vdev,
+ unsigned long *stop_copy_length)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ *stop_copy_length = mtty_data_size(mdev_state);
+ return 0;
+}
+
+static const struct vfio_migration_ops mtty_migration_ops = {
+ .migration_set_state = mtty_set_state,
+ .migration_get_state = mtty_get_state,
+ .migration_get_data_size = mtty_get_data_size,
+};
+
+static int mtty_log_start(struct vfio_device *vdev,
+ struct rb_root_cached *ranges,
+ u32 nnodes, u64 *page_size)
+{
+ return 0;
+}
+
+static int mtty_log_stop(struct vfio_device *vdev)
+{
+ return 0;
+}
+
+static int mtty_log_read_and_clear(struct vfio_device *vdev,
+ unsigned long iova, unsigned long length,
+ struct iova_bitmap *dirty)
+{
+ return 0;
+}
+
+static const struct vfio_log_ops mtty_log_ops = {
+ .log_start = mtty_log_start,
+ .log_stop = mtty_log_stop,
+ .log_read_and_clear = mtty_log_read_and_clear,
+};
+
static int mtty_init_dev(struct vfio_device *vdev)
{
struct mdev_state *mdev_state =
@@ -749,6 +1341,16 @@ static int mtty_init_dev(struct vfio_device *vdev)
mutex_init(&mdev_state->ops_lock);
mdev_state->mdev = mdev;
mtty_create_config_space(mdev_state);
+
+ mutex_init(&mdev_state->state_mutex);
+ mutex_init(&mdev_state->reset_mutex);
+ vdev->migration_flags = VFIO_MIGRATION_STOP_COPY |
+ VFIO_MIGRATION_P2P |
+ VFIO_MIGRATION_PRE_COPY;
+ vdev->mig_ops = &mtty_migration_ops;
+ vdev->log_ops = &mtty_log_ops;
+ mdev_state->state = VFIO_DEVICE_STATE_RUNNING;
+
return 0;
err_nr_ports:
@@ -782,6 +1384,8 @@ static void mtty_release_dev(struct vfio_device *vdev)
struct mdev_state *mdev_state =
container_of(vdev, struct mdev_state, vdev);
+ mutex_destroy(&mdev_state->reset_mutex);
+ mutex_destroy(&mdev_state->state_mutex);
atomic_add(mdev_state->nr_ports, &mdev_avail_ports);
kfree(mdev_state->vconfig);
}
@@ -798,6 +1402,15 @@ static int mtty_reset(struct mdev_state *mdev_state)
{
pr_info("%s: called\n", __func__);
+ mutex_lock(&mdev_state->reset_mutex);
+ mdev_state->deferred_reset = true;
+ if (!mutex_trylock(&mdev_state->state_mutex)) {
+ mutex_unlock(&mdev_state->reset_mutex);
+ return 0;
+ }
+ mutex_unlock(&mdev_state->reset_mutex);
+ mtty_state_mutex_unlock(mdev_state);
+
return 0;
}
@@ -921,6 +1534,25 @@ write_err:
return -EFAULT;
}
+static void mtty_disable_intx(struct mdev_state *mdev_state)
+{
+ if (mdev_state->intx_evtfd) {
+ eventfd_ctx_put(mdev_state->intx_evtfd);
+ mdev_state->intx_evtfd = NULL;
+ mdev_state->intx_mask = false;
+ mdev_state->irq_index = -1;
+ }
+}
+
+static void mtty_disable_msi(struct mdev_state *mdev_state)
+{
+ if (mdev_state->msi_evtfd) {
+ eventfd_ctx_put(mdev_state->msi_evtfd);
+ mdev_state->msi_evtfd = NULL;
+ mdev_state->irq_index = -1;
+ }
+}
+
static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
unsigned int index, unsigned int start,
unsigned int count, void *data)
@@ -932,59 +1564,113 @@ static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
case VFIO_PCI_INTX_IRQ_INDEX:
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
case VFIO_IRQ_SET_ACTION_MASK:
+ if (!is_intx(mdev_state) || start != 0 || count != 1) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mdev_state->intx_mask = true;
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t mask = *(uint8_t *)data;
+
+ if (mask)
+ mdev_state->intx_mask = true;
+ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ ret = -ENOTTY; /* No support for mask fd */
+ }
+ break;
case VFIO_IRQ_SET_ACTION_UNMASK:
+ if (!is_intx(mdev_state) || start != 0 || count != 1) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mdev_state->intx_mask = false;
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t mask = *(uint8_t *)data;
+
+ if (mask)
+ mdev_state->intx_mask = false;
+ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ ret = -ENOTTY; /* No support for unmask fd */
+ }
break;
case VFIO_IRQ_SET_ACTION_TRIGGER:
- {
- if (flags & VFIO_IRQ_SET_DATA_NONE) {
- pr_info("%s: disable INTx\n", __func__);
- if (mdev_state->intx_evtfd)
- eventfd_ctx_put(mdev_state->intx_evtfd);
+ if (is_intx(mdev_state) && !count &&
+ (flags & VFIO_IRQ_SET_DATA_NONE)) {
+ mtty_disable_intx(mdev_state);
+ break;
+ }
+
+ if (!(is_intx(mdev_state) || is_noirq(mdev_state)) ||
+ start != 0 || count != 1) {
+ ret = -EINVAL;
break;
}
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int fd = *(int *)data;
+ struct eventfd_ctx *evt;
+
+ mtty_disable_intx(mdev_state);
- if (fd > 0) {
- struct eventfd_ctx *evt;
-
- evt = eventfd_ctx_fdget(fd);
- if (IS_ERR(evt)) {
- ret = PTR_ERR(evt);
- break;
- }
- mdev_state->intx_evtfd = evt;
- mdev_state->irq_fd = fd;
- mdev_state->irq_index = index;
+ if (fd < 0)
+ break;
+
+ evt = eventfd_ctx_fdget(fd);
+ if (IS_ERR(evt)) {
+ ret = PTR_ERR(evt);
break;
}
+ mdev_state->intx_evtfd = evt;
+ mdev_state->irq_index = index;
+ break;
+ }
+
+ if (!is_intx(mdev_state)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mtty_trigger_interrupt(mdev_state);
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t trigger = *(uint8_t *)data;
+
+ if (trigger)
+ mtty_trigger_interrupt(mdev_state);
}
break;
}
- }
break;
case VFIO_PCI_MSI_IRQ_INDEX:
switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
case VFIO_IRQ_SET_ACTION_MASK:
case VFIO_IRQ_SET_ACTION_UNMASK:
+ ret = -ENOTTY;
break;
case VFIO_IRQ_SET_ACTION_TRIGGER:
- if (flags & VFIO_IRQ_SET_DATA_NONE) {
- if (mdev_state->msi_evtfd)
- eventfd_ctx_put(mdev_state->msi_evtfd);
- pr_info("%s: disable MSI\n", __func__);
- mdev_state->irq_index = VFIO_PCI_INTX_IRQ_INDEX;
+ if (is_msi(mdev_state) && !count &&
+ (flags & VFIO_IRQ_SET_DATA_NONE)) {
+ mtty_disable_msi(mdev_state);
+ break;
+ }
+
+ if (!(is_msi(mdev_state) || is_noirq(mdev_state)) ||
+ start != 0 || count != 1) {
+ ret = -EINVAL;
break;
}
+
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int fd = *(int *)data;
struct eventfd_ctx *evt;
- if (fd <= 0)
- break;
+ mtty_disable_msi(mdev_state);
- if (mdev_state->msi_evtfd)
+ if (fd < 0)
break;
evt = eventfd_ctx_fdget(fd);
@@ -993,20 +1679,37 @@ static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
break;
}
mdev_state->msi_evtfd = evt;
- mdev_state->irq_fd = fd;
mdev_state->irq_index = index;
+ break;
+ }
+
+ if (!is_msi(mdev_state)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mtty_trigger_interrupt(mdev_state);
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t trigger = *(uint8_t *)data;
+
+ if (trigger)
+ mtty_trigger_interrupt(mdev_state);
}
break;
- }
- break;
+ }
+ break;
case VFIO_PCI_MSIX_IRQ_INDEX:
- pr_info("%s: MSIX_IRQ\n", __func__);
+ dev_dbg(mdev_state->vdev.dev, "%s: MSIX_IRQ\n", __func__);
+ ret = -ENOTTY;
break;
case VFIO_PCI_ERR_IRQ_INDEX:
- pr_info("%s: ERR_IRQ\n", __func__);
+ dev_dbg(mdev_state->vdev.dev, "%s: ERR_IRQ\n", __func__);
+ ret = -ENOTTY;
break;
case VFIO_PCI_REQ_IRQ_INDEX:
- pr_info("%s: REQ_IRQ\n", __func__);
+ dev_dbg(mdev_state->vdev.dev, "%s: REQ_IRQ\n", __func__);
+ ret = -ENOTTY;
break;
}
@@ -1014,37 +1717,12 @@ static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
return ret;
}
-static int mtty_trigger_interrupt(struct mdev_state *mdev_state)
-{
- int ret = -1;
-
- if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) &&
- (!mdev_state->msi_evtfd))
- return -EINVAL;
- else if ((mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX) &&
- (!mdev_state->intx_evtfd)) {
- pr_info("%s: Intr eventfd not found\n", __func__);
- return -EINVAL;
- }
-
- if (mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX)
- ret = eventfd_signal(mdev_state->msi_evtfd, 1);
- else
- ret = eventfd_signal(mdev_state->intx_evtfd, 1);
-
-#if defined(DEBUG_INTR)
- pr_info("Intx triggered\n");
-#endif
- if (ret != 1)
- pr_err("%s: eventfd signal failed (%d)\n", __func__, ret);
-
- return ret;
-}
-
-static int mtty_get_region_info(struct mdev_state *mdev_state,
- struct vfio_region_info *region_info,
- u16 *cap_type_id, void **cap_type)
+static int mtty_ioctl_get_region_info(struct vfio_device *vdev,
+ struct vfio_region_info *region_info,
+ struct vfio_info_cap *caps)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int size = 0;
u32 bar_index;
@@ -1084,22 +1762,16 @@ static int mtty_get_region_info(struct mdev_state *mdev_state,
static int mtty_get_irq_info(struct vfio_irq_info *irq_info)
{
- switch (irq_info->index) {
- case VFIO_PCI_INTX_IRQ_INDEX:
- case VFIO_PCI_MSI_IRQ_INDEX:
- case VFIO_PCI_REQ_IRQ_INDEX:
- break;
-
- default:
+ if (irq_info->index != VFIO_PCI_INTX_IRQ_INDEX &&
+ irq_info->index != VFIO_PCI_MSI_IRQ_INDEX)
return -EINVAL;
- }
irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
irq_info->count = 1;
if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
- irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |
- VFIO_IRQ_INFO_AUTOMASKED);
+ irq_info->flags |= VFIO_IRQ_INFO_MASKABLE |
+ VFIO_IRQ_INFO_AUTOMASKED;
else
irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;
@@ -1147,30 +1819,6 @@ static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd,
return 0;
}
- case VFIO_DEVICE_GET_REGION_INFO:
- {
- struct vfio_region_info info;
- u16 cap_type_id = 0;
- void *cap_type = NULL;
-
- minsz = offsetofend(struct vfio_region_info, offset);
-
- if (copy_from_user(&info, (void __user *)arg, minsz))
- return -EFAULT;
-
- if (info.argsz < minsz)
- return -EINVAL;
-
- ret = mtty_get_region_info(mdev_state, &info, &cap_type_id,
- &cap_type);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *)arg, &info, minsz))
- return -EFAULT;
-
- return 0;
- }
case VFIO_DEVICE_GET_IRQ_INFO:
{
@@ -1262,6 +1910,16 @@ static unsigned int mtty_get_available(struct mdev_type *mtype)
return atomic_read(&mdev_avail_ports) / type->nr_ports;
}
+static void mtty_close(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ mtty_disable_files(mdev_state);
+ mtty_disable_intx(mdev_state);
+ mtty_disable_msi(mdev_state);
+}
+
static const struct vfio_device_ops mtty_dev_ops = {
.name = "vfio-mtty",
.init = mtty_init_dev,
@@ -1269,6 +1927,12 @@ static const struct vfio_device_ops mtty_dev_ops = {
.read = mtty_read,
.write = mtty_write,
.ioctl = mtty_ioctl,
+ .get_region_info_caps = mtty_ioctl_get_region_info,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
+ .detach_ioas = vfio_iommufd_emulated_detach_ioas,
+ .close_device = mtty_close,
};
static struct mdev_driver mtty_driver = {
@@ -1316,7 +1980,7 @@ static int __init mtty_dev_init(void)
if (ret)
goto err_cdev;
- mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME);
+ mtty_dev.vd_class = class_create(MTTY_CLASS_NAME);
if (IS_ERR(mtty_dev.vd_class)) {
pr_err("Error: failed to register mtty_dev class\n");
@@ -1371,6 +2035,6 @@ module_init(mtty_dev_init)
module_exit(mtty_dev_exit)
MODULE_LICENSE("GPL v2");
-MODULE_INFO(supported, "Test driver that simulate serial port over PCI");
+MODULE_DESCRIPTION("Test driver that simulate serial port over PCI");
MODULE_VERSION(VERSION_STRING);
MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore
index 79212d91285b..8708341bc082 100644
--- a/samples/vfs/.gitignore
+++ b/samples/vfs/.gitignore
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
/test-fsmount
+/test-list-all-mounts
/test-statx
+/mountinfo
diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index 6377a678134a..9256ca5d762b 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-userprogs-always-y += test-fsmount test-statx
+userprogs-always-y += test-fsmount test-statx mountinfo test-list-all-mounts
+userccflags += -I $(srctree)/tools/testing/selftests/
userccflags += -I usr/include
diff --git a/samples/vfs/mountinfo.c b/samples/vfs/mountinfo.c
new file mode 100644
index 000000000000..bc78275cac69
--- /dev/null
+++ b/samples/vfs/mountinfo.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Use pidfds, nsfds, listmount() and statmount() mimic the
+ * contents of /proc/self/mountinfo.
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "samples-vfs.h"
+
+/* max mounts per listmount call */
+#define MAXMOUNTS 1024
+
+/* size of struct statmount (including trailing string buffer) */
+#define STATMOUNT_BUFSIZE 4096
+
+static bool ext_format;
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open -1
+#endif
+
+/*
+ * There are no bindings in glibc for listmount() and statmount() (yet),
+ * make our own here.
+ */
+static int statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask,
+ struct statmount *buf, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static ssize_t listmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 last_mnt_id,
+ __u64 list[], size_t num, unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+static void show_mnt_attrs(__u64 flags)
+{
+ printf("%s", flags & MOUNT_ATTR_RDONLY ? "ro" : "rw");
+
+ if (flags & MOUNT_ATTR_NOSUID)
+ printf(",nosuid");
+ if (flags & MOUNT_ATTR_NODEV)
+ printf(",nodev");
+ if (flags & MOUNT_ATTR_NOEXEC)
+ printf(",noexec");
+
+ switch (flags & MOUNT_ATTR__ATIME) {
+ case MOUNT_ATTR_RELATIME:
+ printf(",relatime");
+ break;
+ case MOUNT_ATTR_NOATIME:
+ printf(",noatime");
+ break;
+ case MOUNT_ATTR_STRICTATIME:
+ /* print nothing */
+ break;
+ }
+
+ if (flags & MOUNT_ATTR_NODIRATIME)
+ printf(",nodiratime");
+ if (flags & MOUNT_ATTR_NOSYMFOLLOW)
+ printf(",nosymfollow");
+ if (flags & MOUNT_ATTR_IDMAP)
+ printf(",idmapped");
+}
+
+static void show_propagation(struct statmount *sm)
+{
+ if (sm->mnt_propagation & MS_SHARED)
+ printf(" shared:%llu", sm->mnt_peer_group);
+ if (sm->mnt_propagation & MS_SLAVE) {
+ printf(" master:%llu", sm->mnt_master);
+ if (sm->propagate_from && sm->propagate_from != sm->mnt_master)
+ printf(" propagate_from:%llu", sm->propagate_from);
+ }
+ if (sm->mnt_propagation & MS_UNBINDABLE)
+ printf(" unbindable");
+}
+
+static void show_sb_flags(__u64 flags)
+{
+ printf("%s", flags & MS_RDONLY ? "ro" : "rw");
+ if (flags & MS_SYNCHRONOUS)
+ printf(",sync");
+ if (flags & MS_DIRSYNC)
+ printf(",dirsync");
+ if (flags & MS_MANDLOCK)
+ printf(",mand");
+ if (flags & MS_LAZYTIME)
+ printf(",lazytime");
+}
+
+static int dump_mountinfo(__u64 mnt_id, __u64 mnt_ns_id)
+{
+ int ret;
+ struct statmount *buf = alloca(STATMOUNT_BUFSIZE);
+ const __u64 mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
+ STATMOUNT_PROPAGATE_FROM | STATMOUNT_FS_TYPE |
+ STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT |
+ STATMOUNT_MNT_OPTS | STATMOUNT_FS_SUBTYPE |
+ STATMOUNT_SB_SOURCE;
+
+ ret = statmount(mnt_id, mnt_ns_id, mask, buf, STATMOUNT_BUFSIZE, 0);
+ if (ret < 0) {
+ perror("statmount");
+ return 1;
+ }
+
+ if (ext_format)
+ printf("0x%llx 0x%llx 0x%llx ", mnt_ns_id, mnt_id, buf->mnt_parent_id);
+
+ printf("%u %u %u:%u %s %s ", buf->mnt_id_old, buf->mnt_parent_id_old,
+ buf->sb_dev_major, buf->sb_dev_minor,
+ &buf->str[buf->mnt_root],
+ &buf->str[buf->mnt_point]);
+ show_mnt_attrs(buf->mnt_attr);
+ show_propagation(buf);
+
+ printf(" - %s", &buf->str[buf->fs_type]);
+ if (buf->mask & STATMOUNT_FS_SUBTYPE)
+ printf(".%s", &buf->str[buf->fs_subtype]);
+ if (buf->mask & STATMOUNT_SB_SOURCE)
+ printf(" %s ", &buf->str[buf->sb_source]);
+ else
+ printf(" :none ");
+
+ show_sb_flags(buf->sb_flags);
+ if (buf->mask & STATMOUNT_MNT_OPTS)
+ printf(",%s", &buf->str[buf->mnt_opts]);
+ printf("\n");
+ return 0;
+}
+
+static int dump_mounts(__u64 mnt_ns_id)
+{
+ __u64 mntid[MAXMOUNTS];
+ __u64 last_mnt_id = 0;
+ ssize_t count;
+ int i;
+
+ /*
+ * Get a list of all mntids in mnt_ns_id. If it returns MAXMOUNTS
+ * mounts, then go again until we get everything.
+ */
+ do {
+ count = listmount(LSMT_ROOT, mnt_ns_id, last_mnt_id, mntid, MAXMOUNTS, 0);
+ if (count < 0 || count > MAXMOUNTS) {
+ errno = count < 0 ? errno : count;
+ perror("listmount");
+ return 1;
+ }
+
+ /* Walk the returned mntids and print info about each */
+ for (i = 0; i < count; ++i) {
+ int ret = dump_mountinfo(mntid[i], mnt_ns_id);
+
+ if (ret != 0)
+ return ret;
+ }
+ /* Set up last_mnt_id to pick up where we left off */
+ last_mnt_id = mntid[count - 1];
+ } while (count == MAXMOUNTS);
+ return 0;
+}
+
+static void usage(const char * const prog)
+{
+ printf("Usage:\n");
+ printf("%s [-e] [-p pid] [-r] [-h]\n", prog);
+ printf(" -e: extended format\n");
+ printf(" -h: print usage message\n");
+ printf(" -p: get mount namespace from given pid\n");
+ printf(" -r: recursively print all mounts in all child namespaces\n");
+}
+
+int main(int argc, char * const *argv)
+{
+ struct mnt_ns_info mni = { .size = MNT_NS_INFO_SIZE_VER0 };
+ int pidfd, mntns, ret, opt;
+ pid_t pid = getpid();
+ bool recursive = false;
+
+ while ((opt = getopt(argc, argv, "ehp:r")) != -1) {
+ switch (opt) {
+ case 'e':
+ ext_format = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ return 0;
+ case 'p':
+ pid = atoi(optarg);
+ break;
+ case 'r':
+ recursive = true;
+ break;
+ }
+ }
+
+ /* Get a pidfd for pid */
+ pidfd = syscall(__NR_pidfd_open, pid, 0);
+ if (pidfd < 0) {
+ perror("pidfd_open");
+ return 1;
+ }
+
+ /* Get the mnt namespace for pidfd */
+ mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, NULL);
+ if (mntns < 0) {
+ perror("PIDFD_GET_MNT_NAMESPACE");
+ return 1;
+ }
+ close(pidfd);
+
+ /* get info about mntns. In particular, the mnt_ns_id */
+ ret = ioctl(mntns, NS_MNT_GET_INFO, &mni);
+ if (ret < 0) {
+ perror("NS_MNT_GET_INFO");
+ return 1;
+ }
+
+ do {
+ int ret;
+
+ ret = dump_mounts(mni.mnt_ns_id);
+ if (ret)
+ return ret;
+
+ if (!recursive)
+ break;
+
+ /* get the next mntns (and overwrite the old mount ns info) */
+ ret = ioctl(mntns, NS_MNT_GET_NEXT, &mni);
+ close(mntns);
+ mntns = ret;
+ } while (mntns >= 0);
+
+ return 0;
+}
diff --git a/samples/vfs/samples-vfs.h b/samples/vfs/samples-vfs.h
new file mode 100644
index 000000000000..498baf581b56
--- /dev/null
+++ b/samples/vfs/samples-vfs.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SAMPLES_VFS_H
+#define __SAMPLES_VFS_H
+
+#include <errno.h>
+#include <linux/types.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#define die_errno(format, ...) \
+ do { \
+ fprintf(stderr, "%m | %s: %d: %s: " format "\n", __FILE__, \
+ __LINE__, __func__, ##__VA_ARGS__); \
+ exit(EXIT_FAILURE); \
+ } while (0)
+
+struct statmount {
+ __u32 size; /* Total size, including strings */
+ __u32 mnt_opts; /* [str] Options (comma separated, escaped) */
+ __u64 mask; /* What results were written */
+ __u32 sb_dev_major; /* Device ID */
+ __u32 sb_dev_minor;
+ __u64 sb_magic; /* ..._SUPER_MAGIC */
+ __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+ __u32 fs_type; /* [str] Filesystem type */
+ __u64 mnt_id; /* Unique ID of mount */
+ __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
+ __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
+ __u32 mnt_parent_id_old;
+ __u64 mnt_attr; /* MOUNT_ATTR_... */
+ __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+ __u64 mnt_peer_group; /* ID of shared peer group */
+ __u64 mnt_master; /* Mount receives propagation from this ID */
+ __u64 propagate_from; /* Propagation from in current namespace */
+ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
+ __u32 mnt_point; /* [str] Mountpoint relative to current root */
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
+ __u32 sb_source; /* [str] Source string of the mount */
+ __u32 opt_num; /* Number of fs options */
+ __u32 opt_array; /* [str] Array of nul terminated fs options */
+ __u32 opt_sec_num; /* Number of security options */
+ __u32 opt_sec_array; /* [str] Array of nul terminated security options */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings */
+ __u64 __spare2[44];
+ char str[]; /* Variable size part containing strings */
+};
+
+struct mnt_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 mnt_id;
+ __u64 param;
+ __u64 mnt_ns_id;
+};
+
+#ifndef MNT_ID_REQ_SIZE_VER0
+#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#endif
+
+#ifndef MNT_ID_REQ_SIZE_VER1
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
+#endif
+
+/* Get the id for a mount namespace */
+#ifndef NS_GET_MNTNS_ID
+#define NS_GET_MNTNS_ID _IO(0xb7, 0x5)
+#endif
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#ifndef MNT_NS_INFO_SIZE_VER0
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+#endif
+
+#ifndef NS_MNT_GET_INFO
+#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info)
+#endif
+
+#ifndef NS_MNT_GET_NEXT
+#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info)
+#endif
+
+#ifndef NS_MNT_GET_PREV
+#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info)
+#endif
+
+#ifndef PIDFD_GET_MNT_NAMESPACE
+#define PIDFD_GET_MNT_NAMESPACE _IO(0xFF, 3)
+#endif
+
+#ifndef __NR_listmount
+#define __NR_listmount 458
+#endif
+
+#ifndef __NR_statmount
+#define __NR_statmount 457
+#endif
+
+#ifndef LSMT_ROOT
+#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#endif
+
+/* @mask bits for statmount(2) */
+#ifndef STATMOUNT_SB_BASIC
+#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
+#endif
+
+#ifndef STATMOUNT_MNT_BASIC
+#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
+#endif
+
+#ifndef STATMOUNT_PROPAGATE_FROM
+#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
+#endif
+
+#ifndef STATMOUNT_MNT_ROOT
+#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
+#endif
+
+#ifndef STATMOUNT_MNT_POINT
+#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
+#endif
+
+#ifndef STATMOUNT_FS_TYPE
+#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#endif
+
+#ifndef STATMOUNT_MNT_NS_ID
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#endif
+
+#ifndef STATMOUNT_MNT_OPTS
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
+#endif
+
+#ifndef STATMOUNT_FS_SUBTYPE
+#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
+#endif
+
+#ifndef STATMOUNT_SB_SOURCE
+#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
+#endif
+
+#ifndef STATMOUNT_OPT_ARRAY
+#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
+#endif
+
+#ifndef STATMOUNT_OPT_SEC_ARRAY
+#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#endif
+
+#ifndef STATX_MNT_ID_UNIQUE
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#endif
+
+#ifndef STATMOUNT_MNT_UIDMAP
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#endif
+
+#ifndef STATMOUNT_MNT_GIDMAP
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
+#endif
+
+#ifndef MOUNT_ATTR_RDONLY
+#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
+#endif
+
+#ifndef MOUNT_ATTR_NOSUID
+#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
+#endif
+
+#ifndef MOUNT_ATTR_NODEV
+#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
+#endif
+
+#ifndef MOUNT_ATTR_NOEXEC
+#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
+#endif
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
+#endif
+
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
+#endif
+
+#ifndef MOUNT_ATTR_NOATIME
+#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
+#endif
+
+#ifndef MOUNT_ATTR_STRICTATIME
+#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
+#endif
+
+#ifndef MOUNT_ATTR_NODIRATIME
+#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#endif
+
+#ifndef MOUNT_ATTR_IDMAP
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+#endif
+
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
+#endif
+
+#ifndef MS_RDONLY
+#define MS_RDONLY 1 /* Mount read-only */
+#endif
+
+#ifndef MS_SYNCHRONOUS
+#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
+#endif
+
+#ifndef MS_MANDLOCK
+#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
+#endif
+
+#ifndef MS_DIRSYNC
+#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#endif
+
+#ifndef MS_UNBINDABLE
+#define MS_UNBINDABLE (1<<17) /* change to unbindable */
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18) /* change to private */
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19) /* change to slave */
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1<<20) /* change to shared */
+#endif
+
+#ifndef MS_LAZYTIME
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+#endif
+
+#endif /* __SAMPLES_VFS_H */
diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c
new file mode 100644
index 000000000000..713c174626aa
--- /dev/null
+++ b/samples/vfs/test-list-all-mounts.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "../../tools/testing/selftests/pidfd/pidfd.h"
+#include "samples-vfs.h"
+
+static int __statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask,
+ struct statmount *stmnt, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER1,
+ .mnt_id = mnt_id,
+ .param = mask,
+ .mnt_ns_id = mnt_ns_id,
+ };
+
+ return syscall(__NR_statmount, &req, stmnt, bufsize, flags);
+}
+
+static struct statmount *sys_statmount(__u64 mnt_id, __u64 mnt_ns_id,
+ __u64 mask, unsigned int flags)
+{
+ size_t bufsize = 1 << 15;
+ struct statmount *stmnt = NULL, *tmp = NULL;
+ int ret;
+
+ for (;;) {
+ tmp = realloc(stmnt, bufsize);
+ if (!tmp)
+ goto out;
+
+ stmnt = tmp;
+ ret = __statmount(mnt_id, mnt_ns_id, mask, stmnt, bufsize, flags);
+ if (!ret)
+ return stmnt;
+
+ if (errno != EOVERFLOW)
+ goto out;
+
+ bufsize <<= 1;
+ if (bufsize >= UINT_MAX / 2)
+ goto out;
+ }
+
+out:
+ free(stmnt);
+ return NULL;
+}
+
+static ssize_t sys_listmount(__u64 mnt_id, __u64 last_mnt_id, __u64 mnt_ns_id,
+ __u64 list[], size_t num, unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER1,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ .mnt_ns_id = mnt_ns_id,
+ };
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+int main(int argc, char *argv[])
+{
+#define LISTMNT_BUFFER 10
+ __u64 list[LISTMNT_BUFFER], last_mnt_id = 0;
+ int ret, pidfd, fd_mntns;
+ struct mnt_ns_info info = {};
+
+ pidfd = sys_pidfd_open(getpid(), 0);
+ if (pidfd < 0)
+ die_errno("pidfd_open failed");
+
+ fd_mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, 0);
+ if (fd_mntns < 0)
+ die_errno("ioctl(PIDFD_GET_MNT_NAMESPACE) failed");
+
+ ret = ioctl(fd_mntns, NS_MNT_GET_INFO, &info);
+ if (ret < 0)
+ die_errno("ioctl(NS_GET_MNTNS_ID) failed");
+
+ printf("Listing %u mounts for mount namespace %" PRIu64 "\n",
+ info.nr_mounts, (uint64_t)info.mnt_ns_id);
+ for (;;) {
+ ssize_t nr_mounts;
+next:
+ nr_mounts = sys_listmount(LSMT_ROOT, last_mnt_id,
+ info.mnt_ns_id, list, LISTMNT_BUFFER,
+ 0);
+ if (nr_mounts <= 0) {
+ int fd_mntns_next;
+
+ printf("Finished listing %u mounts for mount namespace %" PRIu64 "\n\n",
+ info.nr_mounts, (uint64_t)info.mnt_ns_id);
+ fd_mntns_next = ioctl(fd_mntns, NS_MNT_GET_NEXT, &info);
+ if (fd_mntns_next < 0) {
+ if (errno == ENOENT) {
+ printf("Finished listing all mount namespaces\n");
+ exit(0);
+ }
+ die_errno("ioctl(NS_MNT_GET_NEXT) failed");
+ }
+ close(fd_mntns);
+ fd_mntns = fd_mntns_next;
+ last_mnt_id = 0;
+ printf("Listing %u mounts for mount namespace %" PRIu64 "\n",
+ info.nr_mounts, (uint64_t)info.mnt_ns_id);
+ goto next;
+ }
+
+ for (size_t cur = 0; cur < nr_mounts; cur++) {
+ struct statmount *stmnt;
+
+ last_mnt_id = list[cur];
+
+ stmnt = sys_statmount(last_mnt_id, info.mnt_ns_id,
+ STATMOUNT_SB_BASIC |
+ STATMOUNT_MNT_BASIC |
+ STATMOUNT_MNT_ROOT |
+ STATMOUNT_MNT_POINT |
+ STATMOUNT_MNT_NS_ID |
+ STATMOUNT_MNT_OPTS |
+ STATMOUNT_FS_TYPE |
+ STATMOUNT_MNT_UIDMAP |
+ STATMOUNT_MNT_GIDMAP, 0);
+ if (!stmnt) {
+ printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n",
+ (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id);
+ continue;
+ }
+
+ printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n",
+ (uint64_t)stmnt->mnt_id,
+ (uint64_t)stmnt->mnt_parent_id,
+ (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "",
+ (stmnt->mask & STATMOUNT_MNT_ROOT) ? stmnt->str + stmnt->mnt_root : "",
+ (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "",
+ (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : "");
+
+ if (stmnt->mask & STATMOUNT_MNT_UIDMAP) {
+ const char *idmap = stmnt->str + stmnt->mnt_uidmap;
+
+ for (size_t idx = 0; idx < stmnt->mnt_uidmap_num; idx++) {
+ printf("mnt_uidmap[%zu]:\t%s\n", idx, idmap);
+ idmap += strlen(idmap) + 1;
+ }
+ }
+
+ if (stmnt->mask & STATMOUNT_MNT_GIDMAP) {
+ const char *idmap = stmnt->str + stmnt->mnt_gidmap;
+
+ for (size_t idx = 0; idx < stmnt->mnt_gidmap_num; idx++) {
+ printf("mnt_gidmap[%zu]:\t%s\n", idx, idmap);
+ idmap += strlen(idmap) + 1;
+ }
+ }
+
+ printf("\n");
+
+ free(stmnt);
+ }
+ }
+
+ exit(0);
+}
diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c
index 49c7a46cee07..424a6fa15723 100644
--- a/samples/vfs/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -19,6 +19,12 @@
#include <time.h>
#include <sys/syscall.h>
#include <sys/types.h>
+
+// Work around glibc header silliness
+#undef AT_RENAME_NOREPLACE
+#undef AT_RENAME_EXCHANGE
+#undef AT_RENAME_WHITEOUT
+
#include <linux/stat.h>
#include <linux/fcntl.h>
#define statx foo
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
index 8c6cb57d5cfc..24cf7d7a1972 100644
--- a/samples/watch_queue/watch_test.c
+++ b/samples/watch_queue/watch_test.c
@@ -16,6 +16,12 @@
#include <errno.h>
#include <sys/ioctl.h>
#include <limits.h>
+
+// Work around glibc header silliness
+#undef AT_RENAME_NOREPLACE
+#undef AT_RENAME_EXCHANGE
+#undef AT_RENAME_WHITEOUT
+
#include <linux/watch_queue.h>
#include <linux/unistd.h>
#include <linux/keyctl.h>