summaryrefslogtreecommitdiff
path: root/samples/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/.gitignore51
-rw-r--r--samples/bpf/Makefile504
-rw-r--r--samples/bpf/Makefile.target75
-rw-r--r--samples/bpf/README.rst91
-rw-r--r--samples/bpf/asm_goto_workaround.h28
-rw-r--r--samples/bpf/bpf_insn.h (renamed from samples/bpf/libbpf.h)57
-rw-r--r--samples/bpf/bpf_load.c777
-rw-r--r--samples/bpf/bpf_load.h63
-rw-r--r--samples/bpf/cgroup_helpers.c177
-rw-r--r--samples/bpf/cgroup_helpers.h16
-rw-r--r--samples/bpf/cookie_uid_helper_example.c39
-rw-r--r--samples/bpf/cpustat_kern.c280
-rw-r--r--samples/bpf/cpustat_user.c251
-rwxr-xr-xsamples/bpf/do_hbm_test.sh438
-rw-r--r--samples/bpf/fds_example.c39
-rw-r--r--samples/bpf/gnu/stubs.h1
-rw-r--r--samples/bpf/hash_func01.h55
-rw-r--r--samples/bpf/hbm.c515
-rw-r--r--samples/bpf/hbm.h38
-rw-r--r--samples/bpf/hbm_edt_kern.c168
-rw-r--r--samples/bpf/hbm_kern.h215
-rw-r--r--samples/bpf/hbm_out_kern.c179
-rw-r--r--samples/bpf/ibumad_kern.c138
-rw-r--r--samples/bpf/ibumad_user.c158
-rw-r--r--samples/bpf/lathist_kern.c26
-rw-r--r--samples/bpf/lathist_user.c49
-rw-r--r--samples/bpf/load_sock_ops.c97
-rw-r--r--samples/bpf/lwt_len_hist.bpf.c (renamed from samples/bpf/lwt_len_hist_kern.c)40
-rwxr-xr-x[-rw-r--r--]samples/bpf/lwt_len_hist.sh9
-rw-r--r--samples/bpf/lwt_len_hist_user.c5
-rw-r--r--samples/bpf/map_perf_test.bpf.c297
-rw-r--r--samples/bpf/map_perf_test_kern.c249
-rw-r--r--samples/bpf/map_perf_test_user.c243
-rw-r--r--samples/bpf/net_shared.h34
-rw-r--r--samples/bpf/offwaketime.bpf.c (renamed from samples/bpf/offwaketime_kern.c)88
-rw-r--r--samples/bpf/offwaketime_user.c78
-rw-r--r--samples/bpf/parse_ldabs.c3
-rw-r--r--samples/bpf/parse_simple.c2
-rw-r--r--samples/bpf/parse_varlen.c8
-rwxr-xr-xsamples/bpf/run_cookie_uid_helper_example.sh1
-rw-r--r--samples/bpf/sampleip_kern.c16
-rw-r--r--samples/bpf/sampleip_user.c120
-rw-r--r--samples/bpf/sock_example.c26
-rw-r--r--samples/bpf/sock_example.h2
-rw-r--r--samples/bpf/sock_flags_kern.c44
-rw-r--r--samples/bpf/sockex1_kern.c15
-rw-r--r--samples/bpf/sockex1_user.c37
-rw-r--r--samples/bpf/sockex2_kern.c27
-rw-r--r--samples/bpf/sockex2_user.c37
-rw-r--r--samples/bpf/sockex3_kern.c126
-rw-r--r--samples/bpf/sockex3_user.c73
-rw-r--r--samples/bpf/spintest.bpf.c60
-rw-r--r--samples/bpf/spintest_kern.c68
-rw-r--r--samples/bpf/spintest_user.c68
-rw-r--r--samples/bpf/syscall_nrs.c12
-rw-r--r--samples/bpf/syscall_tp_kern.c102
-rw-r--r--samples/bpf/syscall_tp_user.c155
-rw-r--r--samples/bpf/task_fd_query_kern.c19
-rw-r--r--samples/bpf/task_fd_query_user.c423
-rwxr-xr-xsamples/bpf/tc_l2_redirect.sh4
-rw-r--r--samples/bpf/tc_l2_redirect_kern.c8
-rw-r--r--samples/bpf/tc_l2_redirect_user.c7
-rw-r--r--samples/bpf/tcbpf1_kern.c3
-rw-r--r--samples/bpf/tcbpf2_kern.c382
-rw-r--r--samples/bpf/tcp_basertt_kern.c71
-rw-r--r--samples/bpf/tcp_bpf.readme28
-rw-r--r--samples/bpf/tcp_bufs_kern.c27
-rw-r--r--samples/bpf/tcp_clamp_kern.c37
-rw-r--r--samples/bpf/tcp_cong_kern.c21
-rw-r--r--samples/bpf/tcp_dumpstats_kern.c68
-rw-r--r--samples/bpf/tcp_iw_kern.c27
-rw-r--r--samples/bpf/tcp_rwnd_kern.c19
-rw-r--r--samples/bpf/tcp_synrto_kern.c19
-rw-r--r--samples/bpf/tcp_tos_reflect_kern.c80
-rw-r--r--samples/bpf/test_cgrp2_array_pin.c109
-rw-r--r--samples/bpf/test_cgrp2_attach.c171
-rw-r--r--samples/bpf/test_cgrp2_attach2.c196
-rw-r--r--samples/bpf/test_cgrp2_sock.c86
-rwxr-xr-xsamples/bpf/test_cgrp2_sock.sh47
-rw-r--r--samples/bpf/test_cgrp2_sock2.c66
-rwxr-xr-xsamples/bpf/test_cgrp2_sock2.sh81
-rwxr-xr-xsamples/bpf/test_cgrp2_tc.sh184
-rw-r--r--samples/bpf/test_cgrp2_tc_kern.c70
-rwxr-xr-xsamples/bpf/test_cls_bpf.sh1
-rw-r--r--samples/bpf/test_current_task_under_cgroup_kern.c43
-rw-r--r--samples/bpf/test_current_task_under_cgroup_user.c85
-rwxr-xr-xsamples/bpf/test_ipip.sh178
-rw-r--r--samples/bpf/test_lru_dist.c29
-rw-r--r--samples/bpf/test_lwt_bpf.c52
-rwxr-xr-x[-rw-r--r--]samples/bpf/test_lwt_bpf.sh24
-rw-r--r--samples/bpf/test_map_in_map.bpf.c (renamed from samples/bpf/test_map_in_map_kern.c)109
-rw-r--r--samples/bpf/test_map_in_map_user.c65
-rw-r--r--samples/bpf/test_overhead_kprobe_kern.c41
-rw-r--r--samples/bpf/test_overhead_tp_kern.c36
-rw-r--r--samples/bpf/test_overhead_user.c162
-rw-r--r--samples/bpf/test_probe_write_user_kern.c52
-rw-r--r--samples/bpf/test_probe_write_user_user.c78
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh168
-rw-r--r--samples/bpf/trace_event_kern.c42
-rw-r--r--samples/bpf/trace_event_user.c194
-rw-r--r--samples/bpf/trace_output.bpf.c (renamed from samples/bpf/trace_output_kern.c)19
-rw-r--r--samples/bpf/trace_output_user.c191
-rw-r--r--samples/bpf/tracex1.bpf.c (renamed from samples/bpf/tracex1_kern.c)27
-rw-r--r--samples/bpf/tracex1_user.c41
-rw-r--r--samples/bpf/tracex2_kern.c100
-rw-r--r--samples/bpf/tracex2_user.c159
-rw-r--r--samples/bpf/tracex3.bpf.c (renamed from samples/bpf/tracex3_kern.c)67
-rw-r--r--samples/bpf/tracex3_user.c67
-rw-r--r--samples/bpf/tracex4.bpf.c (renamed from samples/bpf/tracex4_kern.c)22
-rw-r--r--samples/bpf/tracex4_user.c63
-rw-r--r--samples/bpf/tracex5.bpf.c (renamed from samples/bpf/tracex5_kern.c)42
-rw-r--r--samples/bpf/tracex5_user.c73
-rw-r--r--samples/bpf/tracex6.bpf.c81
-rw-r--r--samples/bpf/tracex6_kern.c41
-rw-r--r--samples/bpf/tracex6_user.c67
-rw-r--r--samples/bpf/xdp1_kern.c93
-rw-r--r--samples/bpf/xdp1_user.c119
-rw-r--r--samples/bpf/xdp2_kern.c114
-rwxr-xr-xsamples/bpf/xdp2skb_meta.sh220
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c104
-rw-r--r--samples/bpf/xdp_adjust_tail_kern.c156
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c198
-rw-r--r--samples/bpf/xdp_fwd_kern.c158
-rw-r--r--samples/bpf/xdp_fwd_user.c226
-rw-r--r--samples/bpf/xdp_router_ipv4.bpf.c189
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c699
-rw-r--r--samples/bpf/xdp_sample.bpf.c266
-rw-r--r--samples/bpf/xdp_sample.bpf.h121
-rw-r--r--samples/bpf/xdp_sample_shared.h17
-rw-r--r--samples/bpf/xdp_sample_user.c1673
-rw-r--r--samples/bpf/xdp_sample_user.h110
-rw-r--r--samples/bpf/xdp_tx_iptunnel_common.h5
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c28
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c99
134 files changed, 10209 insertions, 5628 deletions
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
new file mode 100644
index 000000000000..0002cd359fb1
--- /dev/null
+++ b/samples/bpf/.gitignore
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0-only
+cpustat
+fds_example
+hbm
+ibumad
+lathist
+lwt_len_hist
+map_perf_test
+offwaketime
+per_socket_stats_example
+sampleip
+sock_example
+sockex1
+sockex2
+sockex3
+spintest
+syscall_nrs.h
+syscall_tp
+task_fd_query
+tc_l2_redirect
+test_cgrp2_array_pin
+test_cgrp2_attach
+test_cgrp2_attach2
+test_cgrp2_sock
+test_cgrp2_sock2
+test_current_task_under_cgroup
+test_lru_dist
+test_map_in_map
+test_overhead
+test_probe_write_user
+trace_event
+trace_output
+tracex1
+tracex2
+tracex3
+tracex4
+tracex5
+tracex6
+tracex7
+xdp_adjust_tail
+xdp_fwd
+xdp_router_ipv4
+xdp_tx_iptunnel
+testfile.img
+hbm_out.log
+iperf.*
+*.out
+*.skel.h
+/vmlinux.h
+/bpftool/
+/libbpf/
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 87246be6feb8..95a4fa1f1e44 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -1,180 +1,256 @@
-# kbuild trick to avoid linker error. Can be omitted if a module is built.
-obj- := dummy.o
+# SPDX-License-Identifier: GPL-2.0
+
+BPF_SAMPLES_PATH ?= $(abspath $(src))
+TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
+
+pound := \#
# List of programs to build
-hostprogs-y := test_lru_dist
-hostprogs-y += sock_example
-hostprogs-y += fds_example
-hostprogs-y += sockex1
-hostprogs-y += sockex2
-hostprogs-y += sockex3
-hostprogs-y += tracex1
-hostprogs-y += tracex2
-hostprogs-y += tracex3
-hostprogs-y += tracex4
-hostprogs-y += tracex5
-hostprogs-y += tracex6
-hostprogs-y += test_probe_write_user
-hostprogs-y += trace_output
-hostprogs-y += lathist
-hostprogs-y += offwaketime
-hostprogs-y += spintest
-hostprogs-y += map_perf_test
-hostprogs-y += test_overhead
-hostprogs-y += test_cgrp2_array_pin
-hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_attach2
-hostprogs-y += test_cgrp2_sock
-hostprogs-y += test_cgrp2_sock2
-hostprogs-y += xdp1
-hostprogs-y += xdp2
-hostprogs-y += test_current_task_under_cgroup
-hostprogs-y += trace_event
-hostprogs-y += sampleip
-hostprogs-y += tc_l2_redirect
-hostprogs-y += lwt_len_hist
-hostprogs-y += xdp_tx_iptunnel
-hostprogs-y += test_map_in_map
-hostprogs-y += per_socket_stats_example
-hostprogs-y += load_sock_ops
+tprogs-y := test_lru_dist
+tprogs-y += sock_example
+tprogs-y += fds_example
+tprogs-y += sockex1
+tprogs-y += sockex2
+tprogs-y += sockex3
+tprogs-y += tracex1
+tprogs-y += tracex3
+tprogs-y += tracex4
+tprogs-y += tracex5
+tprogs-y += tracex6
+tprogs-y += trace_output
+tprogs-y += lathist
+tprogs-y += offwaketime
+tprogs-y += spintest
+tprogs-y += map_perf_test
+tprogs-y += xdp_router_ipv4
+tprogs-y += trace_event
+tprogs-y += sampleip
+tprogs-y += tc_l2_redirect
+tprogs-y += lwt_len_hist
+tprogs-y += xdp_tx_iptunnel
+tprogs-y += test_map_in_map
+tprogs-y += per_socket_stats_example
+tprogs-y += syscall_tp
+tprogs-y += cpustat
+tprogs-y += xdp_adjust_tail
+tprogs-y += xdp_fwd
+tprogs-y += task_fd_query
+tprogs-y += ibumad
+tprogs-y += hbm
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
-
-test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
-sock_example-objs := sock_example.o $(LIBBPF)
-fds_example-objs := bpf_load.o $(LIBBPF) fds_example.o
-sockex1-objs := bpf_load.o $(LIBBPF) sockex1_user.o
-sockex2-objs := bpf_load.o $(LIBBPF) sockex2_user.o
-sockex3-objs := bpf_load.o $(LIBBPF) sockex3_user.o
-tracex1-objs := bpf_load.o $(LIBBPF) tracex1_user.o
-tracex2-objs := bpf_load.o $(LIBBPF) tracex2_user.o
-tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
-tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
-tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
-tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
-load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
-test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
-trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
-lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
-offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
-spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
-map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
-test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
-test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
-test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
-test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o
-test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
-test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
-xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
-# reuse xdp1 source intentionally
-xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
- test_current_task_under_cgroup_user.o
-trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
-sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
-tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
-lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
-xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
-test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
-per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
+LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf
+LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf
+LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
+LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
+LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
+
+CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
+TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+XDP_SAMPLE := xdp_sample_user.o
+
+fds_example-objs := fds_example.o
+sockex1-objs := sockex1_user.o
+sockex2-objs := sockex2_user.o
+sockex3-objs := sockex3_user.o
+tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
+tracex3-objs := tracex3_user.o
+tracex4-objs := tracex4_user.o
+tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
+tracex6-objs := tracex6_user.o
+trace_output-objs := trace_output_user.o
+lathist-objs := lathist_user.o
+offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
+spintest-objs := spintest_user.o $(TRACE_HELPERS)
+map_perf_test-objs := map_perf_test_user.o
+test_overhead-objs := test_overhead_user.o
+trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
+sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
+tc_l2_redirect-objs := tc_l2_redirect_user.o
+lwt_len_hist-objs := lwt_len_hist_user.o
+xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
+test_map_in_map-objs := test_map_in_map_user.o
+per_socket_stats_example-objs := cookie_uid_helper_example.o
+syscall_tp-objs := syscall_tp_user.o
+cpustat-objs := cpustat_user.o
+xdp_adjust_tail-objs := xdp_adjust_tail_user.o
+xdp_fwd-objs := xdp_fwd_user.o
+task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
+ibumad-objs := ibumad_user.o
+hbm-objs := hbm.o $(CGROUP_HELPERS)
+
+xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE)
# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockex1_kern.o
-always += sockex2_kern.o
-always += sockex3_kern.o
-always += tracex1_kern.o
-always += tracex2_kern.o
-always += tracex3_kern.o
-always += tracex4_kern.o
-always += tracex5_kern.o
-always += tracex6_kern.o
-always += sock_flags_kern.o
-always += test_probe_write_user_kern.o
-always += trace_output_kern.o
-always += tcbpf1_kern.o
-always += tcbpf2_kern.o
-always += tc_l2_redirect_kern.o
-always += lathist_kern.o
-always += offwaketime_kern.o
-always += spintest_kern.o
-always += map_perf_test_kern.o
-always += test_overhead_tp_kern.o
-always += test_overhead_kprobe_kern.o
-always += parse_varlen.o parse_simple.o parse_ldabs.o
-always += test_cgrp2_tc_kern.o
-always += xdp1_kern.o
-always += xdp2_kern.o
-always += test_current_task_under_cgroup_kern.o
-always += trace_event_kern.o
-always += sampleip_kern.o
-always += lwt_len_hist_kern.o
-always += xdp_tx_iptunnel_kern.o
-always += test_map_in_map_kern.o
-always += cookie_uid_helper_example.o
-always += tcp_synrto_kern.o
-always += tcp_rwnd_kern.o
-always += tcp_bufs_kern.o
-always += tcp_cong_kern.o
-always += tcp_iw_kern.o
-always += tcp_clamp_kern.o
-
-HOSTCFLAGS += -I$(objtree)/usr/include
-HOSTCFLAGS += -I$(srctree)/tools/lib/
-HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-HOSTCFLAGS += -I$(srctree)/tools/perf
-
-HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
-HOSTLOADLIBES_fds_example += -lelf
-HOSTLOADLIBES_sockex1 += -lelf
-HOSTLOADLIBES_sockex2 += -lelf
-HOSTLOADLIBES_sockex3 += -lelf
-HOSTLOADLIBES_tracex1 += -lelf
-HOSTLOADLIBES_tracex2 += -lelf
-HOSTLOADLIBES_tracex3 += -lelf
-HOSTLOADLIBES_tracex4 += -lelf -lrt
-HOSTLOADLIBES_tracex5 += -lelf
-HOSTLOADLIBES_tracex6 += -lelf
-HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
-HOSTLOADLIBES_load_sock_ops += -lelf
-HOSTLOADLIBES_test_probe_write_user += -lelf
-HOSTLOADLIBES_trace_output += -lelf -lrt
-HOSTLOADLIBES_lathist += -lelf
-HOSTLOADLIBES_offwaketime += -lelf
-HOSTLOADLIBES_spintest += -lelf
-HOSTLOADLIBES_map_perf_test += -lelf -lrt
-HOSTLOADLIBES_test_overhead += -lelf -lrt
-HOSTLOADLIBES_xdp1 += -lelf
-HOSTLOADLIBES_xdp2 += -lelf
-HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
-HOSTLOADLIBES_trace_event += -lelf
-HOSTLOADLIBES_sampleip += -lelf
-HOSTLOADLIBES_tc_l2_redirect += -l elf
-HOSTLOADLIBES_lwt_len_hist += -l elf
-HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
-HOSTLOADLIBES_test_map_in_map += -lelf
+always-y := $(tprogs-y)
+always-y += sockex1_kern.o
+always-y += sockex2_kern.o
+always-y += sockex3_kern.o
+always-y += tracex1.bpf.o
+always-y += tracex3.bpf.o
+always-y += tracex4.bpf.o
+always-y += tracex5.bpf.o
+always-y += tracex6.bpf.o
+always-y += trace_output.bpf.o
+always-y += tcbpf1_kern.o
+always-y += tc_l2_redirect_kern.o
+always-y += lathist_kern.o
+always-y += offwaketime.bpf.o
+always-y += spintest.bpf.o
+always-y += map_perf_test.bpf.o
+always-y += parse_varlen.o parse_simple.o parse_ldabs.o
+always-y += trace_event_kern.o
+always-y += sampleip_kern.o
+always-y += lwt_len_hist.bpf.o
+always-y += xdp_tx_iptunnel_kern.o
+always-y += test_map_in_map.bpf.o
+always-y += tcp_synrto_kern.o
+always-y += tcp_rwnd_kern.o
+always-y += tcp_bufs_kern.o
+always-y += tcp_cong_kern.o
+always-y += tcp_iw_kern.o
+always-y += tcp_clamp_kern.o
+always-y += tcp_basertt_kern.o
+always-y += tcp_tos_reflect_kern.o
+always-y += tcp_dumpstats_kern.o
+always-y += xdp2skb_meta_kern.o
+always-y += syscall_tp_kern.o
+always-y += cpustat_kern.o
+always-y += xdp_adjust_tail_kern.o
+always-y += xdp_fwd_kern.o
+always-y += task_fd_query_kern.o
+always-y += ibumad_kern.o
+always-y += hbm_out_kern.o
+always-y += hbm_edt_kern.o
+
+COMMON_CFLAGS = $(TPROGS_USER_CFLAGS)
+TPROGS_LDFLAGS = $(TPROGS_USER_LDFLAGS)
+
+ifeq ($(ARCH), arm)
+# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
+# headers when arm instruction set identification is requested.
+ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS))
+BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
+TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
+endif
+
+ifeq ($(ARCH), mips)
+TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__
+ifdef CONFIG_MACH_LOONGSON64
+BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-loongson64
+BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic
+endif
+endif
+
+ifeq ($(ARCH), x86)
+BPF_EXTRA_CFLAGS += -fcf-protection
+endif
+
+COMMON_CFLAGS += -Wall -O2
+COMMON_CFLAGS += -Wmissing-prototypes
+COMMON_CFLAGS += -Wstrict-prototypes
+COMMON_CFLAGS += $(call try-run,\
+ printf "int main() { return 0; }" |\
+ $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,)
+
+TPROGS_CFLAGS += $(COMMON_CFLAGS)
+TPROGS_CFLAGS += -I$(objtree)/usr/include
+TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
+TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
+TPROGS_CFLAGS += -I$(srctree)/tools/include
+TPROGS_CFLAGS += -I$(srctree)/tools/perf
+TPROGS_CFLAGS += -I$(srctree)/tools/lib
+TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
+
+ifdef SYSROOT
+COMMON_CFLAGS += --sysroot=$(SYSROOT)
+TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
+endif
+
+TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
+TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread
+TPROGLDLIBS_tracex4 += -lrt
+TPROGLDLIBS_trace_output += -lrt
+TPROGLDLIBS_map_perf_test += -lrt
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
+OPT ?= opt
+LLVM_DIS ?= llvm-dis
+LLVM_OBJCOPY ?= llvm-objcopy
+LLVM_READELF ?= llvm-readelf
+BTF_PAHOLE ?= pahole
+
+# Detect that we're cross compiling and use the cross compiler
+ifdef CROSS_COMPILE
+CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+# Don't evaluate probes and warnings if we need to run make recursively
+ifneq ($(src),)
+HDR_PROBE := $(shell printf "$(pound)include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \
+ -o /dev/null 2>/dev/null && echo okay)
+
+ifeq ($(HDR_PROBE),)
+$(warning WARNING: Detected possible issues with include path.)
+$(warning WARNING: Please install kernel headers locally (make headers_install).)
+endif
+
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ $(CLANG) --target=bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+ $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
+
+BPF_EXTRA_CFLAGS += -fno-stack-protector
+ifneq ($(BTF_LLVM_PROBE),)
+ BPF_EXTRA_CFLAGS += -g
+else
+ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
+ BPF_EXTRA_CFLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+endif
+endif
# Trick to allow make to be run from this directory
all:
- $(MAKE) -C ../../ $(CURDIR)/
+ $(MAKE) -C ../../ M=$(CURDIR) BPF_SAMPLES_PATH=$(CURDIR)
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
- @rm -f *~
+ @find $(CURDIR) -type f -name '*~' -delete
+ @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool
+
+$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
+# Fix up variables inherited from Kbuild that tools/ build system won't like
+ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \
+ LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \
+ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ $@ install_headers
-$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
- $(call if_changed_dep,cc_s_c)
+BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT)
+ $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \
+ OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap
+
+$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT):
+ $(call msg,MKDIR,$@)
+ $(Q)mkdir -p $@
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
+targets += syscall_nrs.s
clean-files += syscall_nrs.h
FORCE:
@@ -198,19 +274,119 @@ verify_target_bpf: verify_cmds
exit 2; \
else true; fi
-$(src)/*.c: verify_target_bpf
+$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
+$(src)/*.c: verify_target_bpf $(LIBBPF)
+
+libbpf_hdrs: $(LIBBPF)
+$(obj)/$(TRACE_HELPERS) $(obj)/$(CGROUP_HELPERS) $(obj)/$(XDP_SAMPLE): | libbpf_hdrs
+
+.PHONY: libbpf_hdrs
+
+$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h
+
+$(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h
+$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+$(obj)/hbm.o: $(src)/hbm.h
+$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+
+# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
+# TPROGS_CFLAGS causes conflicts
+XDP_SAMPLE_CFLAGS += -Wall -O2 \
+ -I$(src)/../../tools/include \
+ -I$(src)/../../tools/include/uapi \
+ -I$(LIBBPF_INCLUDE) \
+ -I$(src)/../../tools/testing/selftests/bpf
+
+$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) $(TPROGS_USER_CFLAGS)
+$(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+# Override includes for trace_helpers.o because __must_check won't be defined
+# in our include path.
+$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check=
+
+-include $(BPF_SAMPLES_PATH)/Makefile.target
+
+VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
+ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \
+ $(abspath $(objtree)/vmlinux)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+ifeq ($(VMLINUX_BTF),)
+ $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\
+ build the kernel or set VMLINUX_BTF like "VMLINUX_BTF=/sys/kernel/btf/vmlinux" or VMLINUX_H variable)
+endif
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+clean-files += vmlinux.h
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+endef
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+
+$(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o
+
+$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
+ @echo " CLANG-BPF " $@
+ $(Q)$(CLANG) -g -O2 --target=bpf -D__TARGET_ARCH_$(SRCARCH) \
+ -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $(filter %.bpf.c,$^) -o $@
+
+LINKED_SKELS := xdp_router_ipv4.skel.h
+clean-files += $(LINKED_SKELS)
+
+xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
+BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c))
+BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED))
+BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS))
-$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
+$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL)
+ @echo " BPF GEN-OBJ " $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps))
+ @echo " BPF GEN-SKEL" $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
+# below we use long chain of commands, clang | opt | llvm-dis | llc,
+# to generate final object file. 'clang' compiles the source into IR
+# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
+# processing (llvm12) and IR optimizations. 'llvm-dis' converts
+# 'opt' output to IR, and finally 'llc' generates bpf byte code.
$(obj)/%.o: $(src)/%.c
- $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
- -I$(srctree)/tools/testing/selftests/bpf/ \
- -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
- -Wno-compare-distinct-pointer-types \
+ @echo " CLANG-bpf " $@
+ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
+ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
+ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
+ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
+ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
+ -fno-asynchronous-unwind-tables \
+ -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
+ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
+ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+ $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
new file mode 100644
index 000000000000..7621f55e2947
--- /dev/null
+++ b/samples/bpf/Makefile.target
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+# ==========================================================================
+# Building binaries on the host system
+# Binaries are not used during the compilation of the kernel, and intended
+# to be build for target board, target board can be host of course. Added to
+# build binaries to run not on host system.
+#
+# Sample syntax
+# tprogs-y := xsk_example
+# Will compile xsk_example.c and create an executable named xsk_example
+#
+# tprogs-y := xdpsock
+# xdpsock-objs := xdpsock_1.o xdpsock_2.o
+# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable
+# xdpsock, based on xdpsock_1.o and xdpsock_2.o
+#
+# Derived from scripts/Makefile.host
+#
+__tprogs := $(sort $(tprogs-y))
+
+# C code
+# Executables compiled from a single .c file
+tprog-csingle := $(foreach m,$(__tprogs), \
+ $(if $($(m)-objs),,$(m)))
+
+# C executables linked based on several .o files
+tprog-cmulti := $(foreach m,$(__tprogs),\
+ $(if $($(m)-objs),$(m)))
+
+# Object (.o) files compiled from .c files
+tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs)))
+
+tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle))
+tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti))
+tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs))
+
+#####
+# Handle options to gcc. Support building with separate output directory
+
+_tprogc_flags = $(TPROGS_CFLAGS) \
+ $(TPROGCFLAGS_$(basetarget).o)
+
+# $(objtree)/$(obj) for including generated headers from checkin source files
+ifeq ($(KBUILD_EXTMOD),)
+ifdef building_out_of_srctree
+_tprogc_flags += -I $(objtree)/$(obj)
+endif
+endif
+
+tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags)
+
+# Create executable from a single .c file
+# tprog-csingle -> Executable
+quiet_cmd_tprog-csingle = CC $@
+ cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-csingle)
+
+# Link an executable based on list of .o files, all plain c
+# tprog-cmulti -> executable
+quiet_cmd_tprog-cmulti = LD $@
+ cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \
+ $(addprefix $(obj)/,$($(@F)-objs)) \
+ $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F))
+$(tprog-cmulti): $(tprog-cobjs) FORCE
+ $(call if_changed,tprog-cmulti)
+$(call multi_depend, $(tprog-cmulti), , -objs)
+
+# Create .o file from a single .c file
+# tprog-cobjs -> .o
+quiet_cmd_tprog-cobjs = CC $@
+ cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
+$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-cobjs)
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 79f9a58f1872..cabe2d216997 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -4,15 +4,39 @@ eBPF sample programs
This directory contains a test stubs, verifier test-suite and examples
for using eBPF. The examples use libbpf from tools/lib/bpf.
+Note that the XDP-specific samples have been removed from this directory and
+moved to the xdp-tools repository: https://github.com/xdp-project/xdp-tools
+See the commit messages removing each tool from this directory for how to
+convert specific command invocations between the old samples and the utilities
+in xdp-tools.
+
Build dependencies
==================
Compiling requires having installed:
- * clang >= version 3.4.0
- * llvm >= version 3.7.1
+ * clang
+ * llvm
+ * pahole
+
+Consult :ref:`Documentation/process/changes.rst <changes>` for the minimum
+version numbers required and how to update them. Note that LLVM's tool
+'llc' must support target 'bpf', list version and supported targets with
+command: ``llc --version``
+
+Clean and configuration
+-----------------------
+
+It can be needed to clean tools, samples or kernel before trying new arch or
+after some changes (on demand)::
-Note that LLVM's tool 'llc' must support target 'bpf', list version
-and supported targets with command: ``llc --version``
+ make -C tools clean
+ make -C samples/bpf clean
+ make clean
+
+Configure kernel, defconfig for instance
+(see "tools/testing/selftests/bpf/config" for a reference config)::
+
+ make defconfig
Kernel headers
--------------
@@ -23,8 +47,8 @@ user, simply call::
make headers_install
-This will creates a local "usr/include" directory in the git/build top
-level directory, that the make system automatically pickup first.
+This will create a local "usr/include" directory in the git/build top
+level directory, that the make system will automatically pick up first.
Compiling
=========
@@ -32,12 +56,10 @@ Compiling
For building the BPF samples, issue the below command from the kernel
top level directory::
- make samples/bpf/
-
-Do notice the "/" slash after the directory name.
+ make M=samples/bpf
It is also possible to call make from this directory. This will just
-hide the the invocation of make as above with the appended "/".
+hide the invocation of make as above.
Manually compiling LLVM with 'bpf' support
------------------------------------------
@@ -50,17 +72,50 @@ To generate a smaller llc binary one can use::
-DLLVM_TARGETS_TO_BUILD="BPF"
+We recommend that developers who want the fastest incremental builds
+use the Ninja build system, you can find it in your system's package
+manager, usually the package is ninja or ninja-build.
+
Quick sniplet for manually compiling LLVM and clang
-(build dependencies are cmake and gcc-c++)::
+(build dependencies are ninja, cmake and gcc-c++)::
- $ git clone http://llvm.org/git/llvm.git
- $ cd llvm/tools
- $ git clone --depth 1 http://llvm.org/git/clang.git
- $ cd ..; mkdir build; cd build
- $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86"
- $ make -j $(getconf _NPROCESSORS_ONLN)
+ $ git clone https://github.com/llvm/llvm-project.git
+ $ mkdir -p llvm-project/llvm/build
+ $ cd llvm-project/llvm/build
+ $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+ -DLLVM_ENABLE_PROJECTS="clang" \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_BUILD_RUNTIME=OFF
+ $ ninja
It is also possible to point make to the newly compiled 'llc' or
'clang' command via redefining LLC or CLANG on the make command line::
- make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
+
+Cross compiling samples
+-----------------------
+In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH
+environment variables before calling make. But do this before clean,
+configuration and header install steps described above. This will direct make to
+build samples for the cross target::
+
+ export ARCH=arm64
+ export CROSS_COMPILE="aarch64-linux-gnu-"
+
+Headers can be also installed on RFS of target board if need to keep them in
+sync (not necessarily and it creates a local "usr/include" directory also)::
+
+ make INSTALL_HDR_PATH=~/some_sysroot/usr headers_install
+
+Pointing LLC and CLANG is not necessarily if it's installed on HOST and have
+in its targets appropriate arm64 arch (usually it has several arches).
+Build samples::
+
+ make M=samples/bpf
+
+Or build samples with SYSROOT if some header or library is absent in toolchain,
+say libelf, providing address to file system containing headers and libs,
+can be RFS of target board::
+
+ make M=samples/bpf SYSROOT=~/some_sysroot
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
new file mode 100644
index 000000000000..634e81d83efd
--- /dev/null
+++ b/samples/bpf/asm_goto_workaround.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Facebook */
+#ifndef __ASM_GOTO_WORKAROUND_H
+#define __ASM_GOTO_WORKAROUND_H
+
+/*
+ * This will bring in asm_goto_output and asm_inline macro definitions
+ * if enabled by compiler and config options.
+ */
+#include <linux/types.h>
+
+#ifdef asm_goto_output
+#undef asm_goto_output
+#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output")
+#endif
+
+/*
+ * asm_inline is defined as asm __inline in "include/linux/compiler_types.h"
+ * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not
+ * supported by CLANG.
+ */
+#ifdef asm_inline
+#undef asm_inline
+#define asm_inline asm
+#endif
+
+#define volatile(x...) volatile("")
+#endif
diff --git a/samples/bpf/libbpf.h b/samples/bpf/bpf_insn.h
index 8ab36a04c174..29c3bb6ad1cd 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/bpf_insn.h
@@ -1,8 +1,7 @@
-/* eBPF mini library */
-#ifndef __LIBBPF_H
-#define __LIBBPF_H
-
-#include <bpf/bpf.h>
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* eBPF instruction mini library */
+#ifndef __BPF_INSN_H
+#define __BPF_INSN_H
struct bpf_insn;
@@ -135,15 +134,31 @@ struct bpf_insn;
.off = OFF, \
.imm = 0 })
-/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
-
-#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
- ((struct bpf_insn) { \
- .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+/*
+ * Atomic operations:
+ *
+ * BPF_ADD *(uint *) (dst_reg + off16) += src_reg
+ * BPF_AND *(uint *) (dst_reg + off16) &= src_reg
+ * BPF_OR *(uint *) (dst_reg + off16) |= src_reg
+ * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg
+ * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
+ * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg);
+ * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg);
+ * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
+ * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ */
+
+#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
- .imm = 0 })
+ .imm = OP })
+
+/* Legacy alias */
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF)
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
@@ -165,6 +180,16 @@ struct bpf_insn;
.off = OFF, \
.imm = 0 })
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
@@ -175,6 +200,16 @@ struct bpf_insn;
.off = OFF, \
.imm = IMM })
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
/* Raw code statement block */
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
deleted file mode 100644
index 899f40310bc3..000000000000
--- a/samples/bpf/bpf_load.c
+++ /dev/null
@@ -1,777 +0,0 @@
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <libelf.h>
-#include <gelf.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/perf_event.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/types.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <poll.h>
-#include <ctype.h>
-#include <assert.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-#include "perf-sys.h"
-
-#define DEBUGFS "/sys/kernel/debug/tracing/"
-
-static char license[128];
-static int kern_version;
-static bool processed_sec[128];
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-int map_fd[MAX_MAPS];
-int prog_fd[MAX_PROGS];
-int event_fd[MAX_PROGS];
-int prog_cnt;
-int prog_array_fd = -1;
-
-struct bpf_map_data map_data[MAX_MAPS];
-int map_data_count = 0;
-
-static int populate_prog_array(const char *event, int prog_fd)
-{
- int ind = atoi(event), err;
-
- err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
- if (err < 0) {
- printf("failed to store prog_fd in prog_array\n");
- return -1;
- }
- return 0;
-}
-
-static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
-{
- bool is_socket = strncmp(event, "socket", 6) == 0;
- bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
- bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
- bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
- bool is_xdp = strncmp(event, "xdp", 3) == 0;
- bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
- bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
- bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
- bool is_sockops = strncmp(event, "sockops", 7) == 0;
- size_t insns_cnt = size / sizeof(struct bpf_insn);
- enum bpf_prog_type prog_type;
- char buf[256];
- int fd, efd, err, id;
- struct perf_event_attr attr = {};
-
- attr.type = PERF_TYPE_TRACEPOINT;
- attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = 1;
- attr.wakeup_events = 1;
-
- if (is_socket) {
- prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- } else if (is_kprobe || is_kretprobe) {
- prog_type = BPF_PROG_TYPE_KPROBE;
- } else if (is_tracepoint) {
- prog_type = BPF_PROG_TYPE_TRACEPOINT;
- } else if (is_xdp) {
- prog_type = BPF_PROG_TYPE_XDP;
- } else if (is_perf_event) {
- prog_type = BPF_PROG_TYPE_PERF_EVENT;
- } else if (is_cgroup_skb) {
- prog_type = BPF_PROG_TYPE_CGROUP_SKB;
- } else if (is_cgroup_sk) {
- prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- } else if (is_sockops) {
- prog_type = BPF_PROG_TYPE_SOCK_OPS;
- } else {
- printf("Unknown event '%s'\n", event);
- return -1;
- }
-
- fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
- if (fd < 0) {
- printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
- return -1;
- }
-
- prog_fd[prog_cnt++] = fd;
-
- if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
- return 0;
-
- if (is_socket || is_sockops) {
- if (is_socket)
- event += 6;
- else
- event += 7;
- if (*event != '/')
- return 0;
- event++;
- if (!isdigit(*event)) {
- printf("invalid prog number\n");
- return -1;
- }
- return populate_prog_array(event, fd);
- }
-
- if (is_kprobe || is_kretprobe) {
- if (is_kprobe)
- event += 7;
- else
- event += 10;
-
- if (*event == 0) {
- printf("event name cannot be empty\n");
- return -1;
- }
-
- if (isdigit(*event))
- return populate_prog_array(event, fd);
-
- snprintf(buf, sizeof(buf),
- "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
- is_kprobe ? 'p' : 'r', event, event);
- err = system(buf);
- if (err < 0) {
- printf("failed to create kprobe '%s' error '%s'\n",
- event, strerror(errno));
- return -1;
- }
-
- strcpy(buf, DEBUGFS);
- strcat(buf, "events/kprobes/");
- strcat(buf, event);
- strcat(buf, "/id");
- } else if (is_tracepoint) {
- event += 11;
-
- if (*event == 0) {
- printf("event name cannot be empty\n");
- return -1;
- }
- strcpy(buf, DEBUGFS);
- strcat(buf, "events/");
- strcat(buf, event);
- strcat(buf, "/id");
- }
-
- efd = open(buf, O_RDONLY, 0);
- if (efd < 0) {
- printf("failed to open event %s\n", event);
- return -1;
- }
-
- err = read(efd, buf, sizeof(buf));
- if (err < 0 || err >= sizeof(buf)) {
- printf("read from '%s' failed '%s'\n", event, strerror(errno));
- return -1;
- }
-
- close(efd);
-
- buf[err] = 0;
- id = atoi(buf);
- attr.config = id;
-
- efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
- if (efd < 0) {
- printf("event %d fd %d err %s\n", id, efd, strerror(errno));
- return -1;
- }
- event_fd[prog_cnt - 1] = efd;
- ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
- ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
-
- return 0;
-}
-
-static int load_maps(struct bpf_map_data *maps, int nr_maps,
- fixup_map_cb fixup_map)
-{
- int i;
-
- for (i = 0; i < nr_maps; i++) {
- if (fixup_map) {
- fixup_map(&maps[i], i);
- /* Allow userspace to assign map FD prior to creation */
- if (maps[i].fd != -1) {
- map_fd[i] = maps[i].fd;
- continue;
- }
- }
-
- if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
- maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
-
- map_fd[i] = bpf_create_map_in_map(maps[i].def.type,
- maps[i].def.key_size,
- inner_map_fd,
- maps[i].def.max_entries,
- maps[i].def.map_flags);
- } else {
- map_fd[i] = bpf_create_map(maps[i].def.type,
- maps[i].def.key_size,
- maps[i].def.value_size,
- maps[i].def.max_entries,
- maps[i].def.map_flags);
- }
- if (map_fd[i] < 0) {
- printf("failed to create a map: %d %s\n",
- errno, strerror(errno));
- return 1;
- }
- maps[i].fd = map_fd[i];
-
- if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
- prog_array_fd = map_fd[i];
- }
- return 0;
-}
-
-static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
- GElf_Shdr *shdr, Elf_Data **data)
-{
- Elf_Scn *scn;
-
- scn = elf_getscn(elf, i);
- if (!scn)
- return 1;
-
- if (gelf_getshdr(scn, shdr) != shdr)
- return 2;
-
- *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
- if (!*shname || !shdr->sh_size)
- return 3;
-
- *data = elf_getdata(scn, 0);
- if (!*data || elf_getdata(scn, *data) != NULL)
- return 4;
-
- return 0;
-}
-
-static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
- GElf_Shdr *shdr, struct bpf_insn *insn,
- struct bpf_map_data *maps, int nr_maps)
-{
- int i, nrels;
-
- nrels = shdr->sh_size / shdr->sh_entsize;
-
- for (i = 0; i < nrels; i++) {
- GElf_Sym sym;
- GElf_Rel rel;
- unsigned int insn_idx;
- bool match = false;
- int j, map_idx;
-
- gelf_getrel(data, i, &rel);
-
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-
- gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
-
- if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
- printf("invalid relo for insn[%d].code 0x%x\n",
- insn_idx, insn[insn_idx].code);
- return 1;
- }
- insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
-
- /* Match FD relocation against recorded map_data[] offset */
- for (map_idx = 0; map_idx < nr_maps; map_idx++) {
- if (maps[map_idx].elf_offset == sym.st_value) {
- match = true;
- break;
- }
- }
- if (match) {
- insn[insn_idx].imm = maps[map_idx].fd;
- } else {
- printf("invalid relo for insn[%d] no map_data match\n",
- insn_idx);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int cmp_symbols(const void *l, const void *r)
-{
- const GElf_Sym *lsym = (const GElf_Sym *)l;
- const GElf_Sym *rsym = (const GElf_Sym *)r;
-
- if (lsym->st_value < rsym->st_value)
- return -1;
- else if (lsym->st_value > rsym->st_value)
- return 1;
- else
- return 0;
-}
-
-static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
- Elf *elf, Elf_Data *symbols, int strtabidx)
-{
- int map_sz_elf, map_sz_copy;
- bool validate_zero = false;
- Elf_Data *data_maps;
- int i, nr_maps;
- GElf_Sym *sym;
- Elf_Scn *scn;
- int copy_sz;
-
- if (maps_shndx < 0)
- return -EINVAL;
- if (!symbols)
- return -EINVAL;
-
- /* Get data for maps section via elf index */
- scn = elf_getscn(elf, maps_shndx);
- if (scn)
- data_maps = elf_getdata(scn, NULL);
- if (!scn || !data_maps) {
- printf("Failed to get Elf_Data from maps section %d\n",
- maps_shndx);
- return -EINVAL;
- }
-
- /* For each map get corrosponding symbol table entry */
- sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
- for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
- assert(nr_maps < MAX_MAPS+1);
- if (!gelf_getsym(symbols, i, &sym[nr_maps]))
- continue;
- if (sym[nr_maps].st_shndx != maps_shndx)
- continue;
- /* Only increment iif maps section */
- nr_maps++;
- }
-
- /* Align to map_fd[] order, via sort on offset in sym.st_value */
- qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
-
- /* Keeping compatible with ELF maps section changes
- * ------------------------------------------------
- * The program size of struct bpf_map_def is known by loader
- * code, but struct stored in ELF file can be different.
- *
- * Unfortunately sym[i].st_size is zero. To calculate the
- * struct size stored in the ELF file, assume all struct have
- * the same size, and simply divide with number of map
- * symbols.
- */
- map_sz_elf = data_maps->d_size / nr_maps;
- map_sz_copy = sizeof(struct bpf_map_def);
- if (map_sz_elf < map_sz_copy) {
- /*
- * Backward compat, loading older ELF file with
- * smaller struct, keeping remaining bytes zero.
- */
- map_sz_copy = map_sz_elf;
- } else if (map_sz_elf > map_sz_copy) {
- /*
- * Forward compat, loading newer ELF file with larger
- * struct with unknown features. Assume zero means
- * feature not used. Thus, validate rest of struct
- * data is zero.
- */
- validate_zero = true;
- }
-
- /* Memcpy relevant part of ELF maps data to loader maps */
- for (i = 0; i < nr_maps; i++) {
- unsigned char *addr, *end;
- struct bpf_map_def *def;
- const char *map_name;
- size_t offset;
-
- map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
- maps[i].name = strdup(map_name);
- if (!maps[i].name) {
- printf("strdup(%s): %s(%d)\n", map_name,
- strerror(errno), errno);
- free(sym);
- return -errno;
- }
-
- /* Symbol value is offset into ELF maps section data area */
- offset = sym[i].st_value;
- def = (struct bpf_map_def *)(data_maps->d_buf + offset);
- maps[i].elf_offset = offset;
- memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
- memcpy(&maps[i].def, def, map_sz_copy);
-
- /* Verify no newer features were requested */
- if (validate_zero) {
- addr = (unsigned char*) def + map_sz_copy;
- end = (unsigned char*) def + map_sz_elf;
- for (; addr < end; addr++) {
- if (*addr != 0) {
- free(sym);
- return -EFBIG;
- }
- }
- }
- }
-
- free(sym);
- return nr_maps;
-}
-
-static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
-{
- int fd, i, ret, maps_shndx = -1, strtabidx = -1;
- Elf *elf;
- GElf_Ehdr ehdr;
- GElf_Shdr shdr, shdr_prog;
- Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
- char *shname, *shname_prog;
- int nr_maps = 0;
-
- /* reset global variables */
- kern_version = 0;
- memset(license, 0, sizeof(license));
- memset(processed_sec, 0, sizeof(processed_sec));
-
- if (elf_version(EV_CURRENT) == EV_NONE)
- return 1;
-
- fd = open(path, O_RDONLY, 0);
- if (fd < 0)
- return 1;
-
- elf = elf_begin(fd, ELF_C_READ, NULL);
-
- if (!elf)
- return 1;
-
- if (gelf_getehdr(elf, &ehdr) != &ehdr)
- return 1;
-
- /* clear all kprobes */
- i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
-
- /* scan over all elf sections to get license and map info */
- for (i = 1; i < ehdr.e_shnum; i++) {
-
- if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
- continue;
-
- if (0) /* helpful for llvm debugging */
- printf("section %d:%s data %p size %zd link %d flags %d\n",
- i, shname, data->d_buf, data->d_size,
- shdr.sh_link, (int) shdr.sh_flags);
-
- if (strcmp(shname, "license") == 0) {
- processed_sec[i] = true;
- memcpy(license, data->d_buf, data->d_size);
- } else if (strcmp(shname, "version") == 0) {
- processed_sec[i] = true;
- if (data->d_size != sizeof(int)) {
- printf("invalid size of version section %zd\n",
- data->d_size);
- return 1;
- }
- memcpy(&kern_version, data->d_buf, sizeof(int));
- } else if (strcmp(shname, "maps") == 0) {
- int j;
-
- maps_shndx = i;
- data_maps = data;
- for (j = 0; j < MAX_MAPS; j++)
- map_data[j].fd = -1;
- } else if (shdr.sh_type == SHT_SYMTAB) {
- strtabidx = shdr.sh_link;
- symbols = data;
- }
- }
-
- ret = 1;
-
- if (!symbols) {
- printf("missing SHT_SYMTAB section\n");
- goto done;
- }
-
- if (data_maps) {
- nr_maps = load_elf_maps_section(map_data, maps_shndx,
- elf, symbols, strtabidx);
- if (nr_maps < 0) {
- printf("Error: Failed loading ELF maps (errno:%d):%s\n",
- nr_maps, strerror(-nr_maps));
- ret = 1;
- goto done;
- }
- if (load_maps(map_data, nr_maps, fixup_map))
- goto done;
- map_data_count = nr_maps;
-
- processed_sec[maps_shndx] = true;
- }
-
- /* process all relo sections, and rewrite bpf insns for maps */
- for (i = 1; i < ehdr.e_shnum; i++) {
- if (processed_sec[i])
- continue;
-
- if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
- continue;
-
- if (shdr.sh_type == SHT_REL) {
- struct bpf_insn *insns;
-
- /* locate prog sec that need map fixup (relocations) */
- if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
- &shdr_prog, &data_prog))
- continue;
-
- if (shdr_prog.sh_type != SHT_PROGBITS ||
- !(shdr_prog.sh_flags & SHF_EXECINSTR))
- continue;
-
- insns = (struct bpf_insn *) data_prog->d_buf;
- processed_sec[i] = true; /* relo section */
-
- if (parse_relo_and_apply(data, symbols, &shdr, insns,
- map_data, nr_maps))
- continue;
- }
- }
-
- /* load programs */
- for (i = 1; i < ehdr.e_shnum; i++) {
-
- if (processed_sec[i])
- continue;
-
- if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
- continue;
-
- if (memcmp(shname, "kprobe/", 7) == 0 ||
- memcmp(shname, "kretprobe/", 10) == 0 ||
- memcmp(shname, "tracepoint/", 11) == 0 ||
- memcmp(shname, "xdp", 3) == 0 ||
- memcmp(shname, "perf_event", 10) == 0 ||
- memcmp(shname, "socket", 6) == 0 ||
- memcmp(shname, "cgroup/", 7) == 0 ||
- memcmp(shname, "sockops", 7) == 0) {
- ret = load_and_attach(shname, data->d_buf,
- data->d_size);
- if (ret != 0)
- goto done;
- }
- }
-
- ret = 0;
-done:
- close(fd);
- return ret;
-}
-
-int load_bpf_file(char *path)
-{
- return do_load_bpf_file(path, NULL);
-}
-
-int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
-{
- return do_load_bpf_file(path, fixup_map);
-}
-
-void read_trace_pipe(void)
-{
- int trace_fd;
-
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
- if (trace_fd < 0)
- return;
-
- while (1) {
- static char buf[4096];
- ssize_t sz;
-
- sz = read(trace_fd, buf, sizeof(buf));
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
- }
- }
-}
-
-#define MAX_SYMS 300000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
-
-static int ksym_cmp(const void *p1, const void *p2)
-{
- return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
-}
-
-int load_kallsyms(void)
-{
- FILE *f = fopen("/proc/kallsyms", "r");
- char func[256], buf[256];
- char symbol;
- void *addr;
- int i = 0;
-
- if (!f)
- return -ENOENT;
-
- while (!feof(f)) {
- if (!fgets(buf, sizeof(buf), f))
- break;
- if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
- break;
- if (!addr)
- continue;
- syms[i].addr = (long) addr;
- syms[i].name = strdup(func);
- i++;
- }
- sym_cnt = i;
- qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
- return 0;
-}
-
-struct ksym *ksym_search(long key)
-{
- int start = 0, end = sym_cnt;
- int result;
-
- while (start < end) {
- size_t mid = start + (end - start) / 2;
-
- result = key - syms[mid].addr;
- if (result < 0)
- end = mid;
- else if (result > 0)
- start = mid + 1;
- else
- return &syms[mid];
- }
-
- if (start >= 1 && syms[start - 1].addr < key &&
- key < syms[start].addr)
- /* valid ksym */
- return &syms[start - 1];
-
- /* out of range. return _stext */
- return &syms[0];
-}
-
-int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
- struct sockaddr_nl sa;
- int sock, seq = 0, len, ret = -1;
- char buf[4096];
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
- struct nlmsghdr *nh;
- struct nlmsgerr *err;
-
- memset(&sa, 0, sizeof(sa));
- sa.nl_family = AF_NETLINK;
-
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
- }
-
- if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
- req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
- if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
- }
-
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- len = recv(sock, buf, sizeof(buf), 0);
- if (len < 0) {
- printf("recv from netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
- nh = NLMSG_NEXT(nh, len)) {
- if (nh->nlmsg_pid != getpid()) {
- printf("Wrong pid %d, expected %d\n",
- nh->nlmsg_pid, getpid());
- goto cleanup;
- }
- if (nh->nlmsg_seq != seq) {
- printf("Wrong seq %d, expected %d\n",
- nh->nlmsg_seq, seq);
- goto cleanup;
- }
- switch (nh->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(nh);
- if (!err->error)
- continue;
- printf("nlmsg error %s\n", strerror(-err->error));
- goto cleanup;
- case NLMSG_DONE:
- break;
- }
- }
-
- ret = 0;
-
-cleanup:
- close(sock);
- return ret;
-}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
deleted file mode 100644
index ca0563d04744..000000000000
--- a/samples/bpf/bpf_load.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef __BPF_LOAD_H
-#define __BPF_LOAD_H
-
-#include "libbpf.h"
-
-#define MAX_MAPS 32
-#define MAX_PROGS 32
-
-struct bpf_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
-};
-
-struct bpf_map_data {
- int fd;
- char *name;
- size_t elf_offset;
- struct bpf_map_def def;
-};
-
-typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
-
-extern int prog_fd[MAX_PROGS];
-extern int event_fd[MAX_PROGS];
-extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
-extern int prog_cnt;
-
-/* There is a one-to-one mapping between map_fd[] and map_data[].
- * The map_data[] just contains more rich info on the given map.
- */
-extern int map_fd[MAX_MAPS];
-extern struct bpf_map_data map_data[MAX_MAPS];
-extern int map_data_count;
-
-/* parses elf file compiled by llvm .c->.o
- * . parses 'maps' section and creates maps via BPF syscall
- * . parses 'license' section and passes it to syscall
- * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
- * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
- * . loads eBPF programs via BPF syscall
- *
- * One ELF file can contain multiple BPF programs which will be loaded
- * and their FDs stored stored in prog_fd array
- *
- * returns zero on success
- */
-int load_bpf_file(char *path);
-int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
-
-void read_trace_pipe(void);
-struct ksym {
- long addr;
- char *name;
-};
-
-int load_kallsyms(void);
-struct ksym *ksym_search(long key);
-int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-#endif
diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c
deleted file mode 100644
index 9d1be9426401..000000000000
--- a/samples/bpf/cgroup_helpers.c
+++ /dev/null
@@ -1,177 +0,0 @@
-#define _GNU_SOURCE
-#include <sched.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <linux/limits.h>
-#include <stdio.h>
-#include <linux/sched.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <ftw.h>
-
-
-#include "cgroup_helpers.h"
-
-/*
- * To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
- *
- * We assume /mnt exists.
- */
-
-#define WALK_FD_LIMIT 16
-#define CGROUP_MOUNT_PATH "/mnt"
-#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
-#define format_cgroup_path(buf, path) \
- snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
- CGROUP_WORK_DIR, path)
-
-/**
- * setup_cgroup_environment() - Setup the cgroup environment
- *
- * After calling this function, cleanup_cgroup_environment should be called
- * once testing is complete.
- *
- * This function will print an error to stderr and return 1 if it is unable
- * to setup the cgroup environment. If setup is successful, 0 is returned.
- */
-int setup_cgroup_environment(void)
-{
- char cgroup_workdir[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_workdir, "");
-
- if (unshare(CLONE_NEWNS)) {
- log_err("unshare");
- return 1;
- }
-
- if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
- log_err("mount fakeroot");
- return 1;
- }
-
- if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
- log_err("mount cgroup2");
- return 1;
- }
-
- /* Cleanup existing failed runs, now that the environment is setup */
- cleanup_cgroup_environment();
-
- if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
- log_err("mkdir cgroup work dir");
- return 1;
- }
-
- return 0;
-}
-
-static int nftwfunc(const char *filename, const struct stat *statptr,
- int fileflags, struct FTW *pfwt)
-{
- if ((fileflags & FTW_D) && rmdir(filename))
- log_err("Removing cgroup: %s", filename);
- return 0;
-}
-
-
-static int join_cgroup_from_top(char *cgroup_path)
-{
- char cgroup_procs_path[PATH_MAX + 1];
- pid_t pid = getpid();
- int fd, rc = 0;
-
- snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
- "%s/cgroup.procs", cgroup_path);
-
- fd = open(cgroup_procs_path, O_WRONLY);
- if (fd < 0) {
- log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
- return 1;
- }
-
- if (dprintf(fd, "%d\n", pid) < 0) {
- log_err("Joining Cgroup");
- rc = 1;
- }
-
- close(fd);
- return rc;
-}
-
-/**
- * join_cgroup() - Join a cgroup
- * @path: The cgroup path, relative to the workdir, to join
- *
- * This function expects a cgroup to already be created, relative to the cgroup
- * work dir, and it joins it. For example, passing "/my-cgroup" as the path
- * would actually put the calling process into the cgroup
- * "/cgroup-test-work-dir/my-cgroup"
- *
- * On success, it returns 0, otherwise on failure it returns 1.
- */
-int join_cgroup(char *path)
-{
- char cgroup_path[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_path, path);
- return join_cgroup_from_top(cgroup_path);
-}
-
-/**
- * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
- *
- * This is an idempotent function to delete all temporary cgroups that
- * have been created during the test, including the cgroup testing work
- * directory.
- *
- * At call time, it moves the calling process to the root cgroup, and then
- * runs the deletion process. It is idempotent, and should not fail, unless
- * a process is lingering.
- *
- * On failure, it will print an error to stderr, and try to continue.
- */
-void cleanup_cgroup_environment(void)
-{
- char cgroup_workdir[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_workdir, "");
- join_cgroup_from_top(CGROUP_MOUNT_PATH);
- nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
-}
-
-/**
- * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
- * @path: The cgroup path, relative to the workdir, to join
- *
- * This function creates a cgroup under the top level workdir and returns the
- * file descriptor. It is idempotent.
- *
- * On success, it returns the file descriptor. On failure it returns 0.
- * If there is a failure, it prints the error to stderr.
- */
-int create_and_get_cgroup(char *path)
-{
- char cgroup_path[PATH_MAX + 1];
- int fd;
-
- format_cgroup_path(cgroup_path, path);
- if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
- log_err("mkdiring cgroup");
- return 0;
- }
-
- fd = open(cgroup_path, O_RDONLY);
- if (fd < 0) {
- log_err("Opening Cgroup");
- return 0;
- }
-
- return fd;
-}
diff --git a/samples/bpf/cgroup_helpers.h b/samples/bpf/cgroup_helpers.h
deleted file mode 100644
index 78c55207b6bd..000000000000
--- a/samples/bpf/cgroup_helpers.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __CGROUP_HELPERS_H
-#define __CGROUP_HELPERS_H
-#include <errno.h>
-#include <string.h>
-
-#define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
- __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
-
-
-int create_and_get_cgroup(char *path);
-int join_cgroup(char *path);
-int setup_cgroup_environment(void);
-void cleanup_cgroup_environment(void);
-
-#endif
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index 9d751e209f31..f0df3dda4b1f 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -51,7 +51,7 @@
#include <sys/types.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "libbpf.h"
+#include "bpf_insn.h"
#define PORT 8888
@@ -67,8 +67,8 @@ static bool test_finish;
static void maps_create(void)
{
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t),
- sizeof(struct stats), 100, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t),
+ sizeof(struct stats), 100, NULL);
if (map_fd < 0)
error(1, errno, "map create failed!\n");
}
@@ -147,19 +147,23 @@ static void prog_load(void)
*/
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1,
- offsetof(struct stats, packets)),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_9, BPF_REG_1,
+ offsetof(struct stats, packets)),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, len)),
- BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1,
- offsetof(struct stats, bytes)),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_9, BPF_REG_1,
+ offsetof(struct stats, bytes)),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
offsetof(struct __sk_buff, len)),
BPF_EXIT_INSN(),
};
- prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
- ARRAY_SIZE(prog), "GPL", 0,
- log_buf, sizeof(log_buf));
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = log_buf,
+ .log_size = sizeof(log_buf),
+ );
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, ARRAY_SIZE(prog), &opts);
if (prog_fd < 0)
error(1, errno, "failed to load prog\n%s\n", log_buf);
}
@@ -167,7 +171,7 @@ static void prog_load(void)
static void prog_attach_iptables(char *file)
{
int ret;
- char rules[100];
+ char rules[256];
if (bpf_obj_pin(prog_fd, file))
error(1, errno, "bpf_obj_pin");
@@ -175,8 +179,13 @@ static void prog_attach_iptables(char *file)
printf("file path too long: %s\n", file);
exit(1);
}
- sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
- file);
+ ret = snprintf(rules, sizeof(rules),
+ "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
+ file);
+ if (ret < 0 || ret >= sizeof(rules)) {
+ printf("error constructing iptables command\n");
+ exit(1);
+ }
ret = system(rules);
if (ret < 0) {
printf("iptables rule update failed: %d/n", WEXITSTATUS(ret));
@@ -246,7 +255,7 @@ static void udp_client(void)
recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
(struct sockaddr *)&si_me, &slen);
if (recv_len < 0)
- error(1, errno, "revieve\n");
+ error(1, errno, "receive\n");
res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
sizeof(si_me.sin_addr));
if (res != 0)
@@ -313,7 +322,7 @@ int main(int argc, char *argv[])
print_table();
printf("\n");
sleep(1);
- };
+ }
} else if (cfg_test_cookie) {
udp_client();
}
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 000000000000..7ec7143e2757
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ * WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp |
+ * +--------------------------+
+ * | cstate index |
+ * +--------------------------+
+ * | pstate timestamp |
+ * +--------------------------+
+ * | pstate index |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME 0
+#define MAP_OFF_CSTATE_IDX 1
+#define MAP_OFF_PSTATE_TIME 2
+#define MAP_OFF_PSTATE_IDX 3
+#define MAP_OFF_NUM 4
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
+} my_map SEC(".maps");
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
+} cstate_duration SEC(".maps");
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
+} pstate_duration SEC(".maps");
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/tracing/events/power/cpu_idle/format
+ * /sys/kernel/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+ u64 pad;
+ u32 state;
+ u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+ u32 i;
+
+ for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+ if (frequency == cpu_opps[i])
+ return i;
+ }
+
+ return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+ u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ if (ctx->cpu_id > MAX_CPU)
+ return 0;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+ cts = bpf_map_lookup_elem(&my_map, &key);
+ if (!cts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ prev_state = *cstate;
+ *cstate = ctx->state;
+
+ if (!*cts) {
+ *cts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *cts;
+ *cts = cur_ts;
+
+ /*
+ * When state doesn't equal to (u32)-1, the cpu will enter
+ * one idle state; for this case we need to record interval
+ * for the pstate.
+ *
+ * OPP2
+ * +---------------------+
+ * OPP1 | |
+ * ---------+ |
+ * | Idle state
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ if (ctx->state != (u32)-1) {
+
+ /* record pstate after have first cpu_frequency event */
+ if (!*pts)
+ return 0;
+
+ delta = cur_ts - *pts;
+
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ /*
+ * When state equal to (u32)-1, the cpu just exits from one
+ * specific idle state; for this case we need to record
+ * interval for the pstate.
+ *
+ * OPP2
+ * -----------+
+ * | OPP1
+ * | +-----------
+ * | Idle state |
+ * +---------------------+
+ *
+ * |<- cstate duration ->|
+ * ^ ^
+ * cts cur_ts
+ */
+ } else {
+
+ key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+ val = bpf_map_lookup_elem(&cstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+ }
+
+ /* Update timestamp for pstate as new start time */
+ if (*pts)
+ *pts = cur_ts;
+
+ return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+ u64 *pts, *cstate, *pstate, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ *pstate = ctx->state;
+
+ if (!*pts) {
+ *pts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *pts;
+ *pts = cur_ts;
+
+ /* When CPU is in idle, bail out to skip pstate statistics */
+ if (*cstate != (u32)(-1))
+ return 0;
+
+ /*
+ * The cpu changes to another different OPP (in below diagram
+ * change frequency from OPP3 to OPP1), need recording interval
+ * for previous frequency OPP3 and update timestamp as start
+ * time for new frequency OPP1.
+ *
+ * OPP3
+ * +---------------------+
+ * OPP2 | |
+ * ---------+ |
+ * | OPP1
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 000000000000..356f756cba0d
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+static int cstate_map_fd, pstate_map_fd;
+
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+#define MAX_STARS 40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ "208000"
+#define CPUFREQ_HIGHEST_FREQ "12000000"
+
+struct cpu_stat_data {
+ unsigned long cstate[MAX_CSTATE_ENTRIES];
+ unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+ int i, j;
+ char state_str[sizeof("cstate-9")];
+ struct cpu_stat_data *data;
+
+ /* Clear screen */
+ printf("\033[2J");
+
+ /* Header */
+ printf("\nCPU states statistics:\n");
+ printf("%-10s ", "state(ms)");
+
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ sprintf(state_str, "cstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ sprintf(state_str, "pstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ printf("\n");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ data = &stat_data[j];
+
+ printf("CPU-%-6d ", j);
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+ printf("%-11lu ", data->cstate[i] / 1000000);
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+ printf("%-11lu ", data->pstate[i] / 1000000);
+
+ printf("\n");
+ }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+ unsigned long key, value;
+ int c, i;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ key = c * MAX_CSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(cstate_fd, &key, &value);
+ stat_data[c].cstate[i] = value;
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ key = c * MAX_PSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(pstate_fd, &key, &value);
+ stat_data[c].pstate[i] = value;
+ }
+ }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+ int rcpu, i, ret;
+ cpu_set_t cpumask;
+ cpu_set_t original_cpumask;
+
+ ret = sysconf(_SC_NPROCESSORS_CONF);
+ if (ret < 0)
+ return -1;
+
+ rcpu = sched_getcpu();
+ if (rcpu < 0)
+ return -1;
+
+ /* Keep track of the CPUs we will run on */
+ sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+ for (i = 0; i < ret; i++) {
+
+ /* Pointless to wake up ourself */
+ if (i == rcpu)
+ continue;
+
+ /* Pointless to wake CPUs we will not run on */
+ if (!CPU_ISSET(i, &original_cpumask))
+ continue;
+
+ CPU_ZERO(&cpumask);
+ CPU_SET(i, &cpumask);
+
+ sched_setaffinity(0, sizeof(cpumask), &cpumask);
+ }
+
+ /* Enable all the CPUs of the original mask */
+ sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+ return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+ int len, fd;
+
+ fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+ if (fd < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ return fd;
+ }
+
+ len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+ len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+err:
+ close(fd);
+ return len;
+}
+
+static void int_exit(int sig)
+{
+ cpu_stat_inject_cpu_idle_event();
+ cpu_stat_inject_cpu_frequency_event();
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
+ cpu_stat_print();
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char filename[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
+ pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
+ if (cstate_map_fd < 0 || pstate_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
+
+ ret = cpu_stat_inject_cpu_idle_event();
+ if (ret < 0)
+ return 1;
+
+ ret = cpu_stat_inject_cpu_frequency_event();
+ if (ret < 0)
+ return 1;
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ while (1) {
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
+ cpu_stat_print();
+ sleep(5);
+ }
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+ return 0;
+}
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
new file mode 100755
index 000000000000..7f4f722787d5
--- /dev/null
+++ b/samples/bpf/do_hbm_test.sh
@@ -0,0 +1,438 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 Facebook
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of version 2 of the GNU General Public
+# License as published by the Free Software Foundation.
+
+Usage() {
+ echo "Script for testing HBM (Host Bandwidth Manager) framework."
+ echo "It creates a cgroup to use for testing and load a BPF program to limit"
+ echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
+ echo "loads. The output is the goodput in Mbps (unless -D was used)."
+ echo ""
+ echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
+ echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
+ echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
+ echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
+ echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
+ echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
+ echo " Where:"
+ echo " out egress (default)"
+ echo " -b or --bpf BPF program filename to load and attach."
+ echo " Default is hbm_out_kern.o for egress,"
+ echo " -c or -cc TCP congestion control (cubic or dctcp)"
+ echo " --debug print BPF trace buffer"
+ echo " -d or --delay add a delay in ms using netem"
+ echo " -D In addition to the goodput in Mbps, it also outputs"
+ echo " other detailed information. This information is"
+ echo " test dependent (i.e. iperf3 or netperf)."
+ echo " -E enable ECN (not required for dctcp)"
+ echo " --edt use fq's Earliest Departure Time (requires fq)"
+ echo " -f or --flows number of concurrent flows (default=1)"
+ echo " -i or --id cgroup id (an integer, default is 1)"
+ echo " -N use netperf instead of iperf3"
+ echo " --no_cn Do not return CN notifications"
+ echo " -l do not limit flows using loopback"
+ echo " -h Help"
+ echo " -p or --port iperf3 port (default is 5201)"
+ echo " -P use an iperf3 instance for each flow"
+ echo " -q use the specified qdisc"
+ echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
+ echo " -R Use TCP_RR for netperf. 1st flow has req"
+ echo " size of 10KB, rest of 1MB. Reply in all"
+ echo " cases is 1 byte."
+ echo " More detailed output for each flow can be found"
+ echo " in the files netperf.<cg>.<flow>, where <cg> is the"
+ echo " cgroup id as specified with the -i flag, and <flow>"
+ echo " is the flow id starting at 1 and increasing by 1 for"
+ echo " flow (as specified by -f)."
+ echo " -s or --server hostname of netperf server. Used to create netperf"
+ echo " test traffic between to hosts (default is within host)"
+ echo " netserver must be running on the host."
+ echo " -S or --stats whether to update hbm stats (default is yes)."
+ echo " -t or --time duration of iperf3 in seconds (default=5)"
+ echo " -w Work conserving flag. cgroup can increase its"
+ echo " bandwidth beyond the rate limit specified"
+ echo " while there is available bandwidth. Current"
+ echo " implementation assumes there is only one NIC"
+ echo " (eth0), but can be extended to support multiple"
+ echo " NICs."
+ echo " cubic or dctcp specify which TCP CC to use"
+ echo " "
+ exit
+}
+
+#set -x
+
+debug_flag=0
+args="$@"
+name="$0"
+netem=0
+cc=x
+dir="-o"
+dir_name="out"
+dur=5
+flows=1
+id=1
+prog=""
+port=5201
+rate=1000
+multi_iperf=0
+flow_cnt=1
+use_netperf=0
+rr=0
+ecn=0
+details=0
+server=""
+qdisc=""
+flags=""
+do_stats=0
+
+BPFFS=/sys/fs/bpf
+function config_bpffs () {
+ if mount | grep $BPFFS > /dev/null; then
+ echo "bpffs already mounted"
+ else
+ echo "bpffs not mounted. Mounting..."
+ mount -t bpf none $BPFFS
+ fi
+}
+
+function start_hbm () {
+ rm -f hbm.out
+ echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
+ echo " " >> hbm.out
+ ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
+ echo $!
+}
+
+processArgs () {
+ for i in $args ; do
+ case $i in
+ # Support for upcoming ingress rate limiting
+ #in) # support for upcoming ingress rate limiting
+ # dir="-i"
+ # dir_name="in"
+ # ;;
+ out)
+ dir="-o"
+ dir_name="out"
+ ;;
+ -b=*|--bpf=*)
+ prog="${i#*=}"
+ ;;
+ -c=*|--cc=*)
+ cc="${i#*=}"
+ ;;
+ --no_cn)
+ flags="$flags --no_cn"
+ ;;
+ --debug)
+ flags="$flags -d"
+ debug_flag=1
+ ;;
+ -d=*|--delay=*)
+ netem="${i#*=}"
+ ;;
+ -D)
+ details=1
+ ;;
+ -E)
+ ecn=1
+ ;;
+ --edt)
+ flags="$flags --edt"
+ qdisc="fq"
+ ;;
+ -f=*|--flows=*)
+ flows="${i#*=}"
+ ;;
+ -i=*|--id=*)
+ id="${i#*=}"
+ ;;
+ -l)
+ flags="$flags -l"
+ ;;
+ -N)
+ use_netperf=1
+ ;;
+ -p=*|--port=*)
+ port="${i#*=}"
+ ;;
+ -P)
+ multi_iperf=1
+ ;;
+ -q=*)
+ qdisc="${i#*=}"
+ ;;
+ -r=*|--rate=*)
+ rate="${i#*=}"
+ ;;
+ -R)
+ rr=1
+ ;;
+ -s=*|--server=*)
+ server="${i#*=}"
+ ;;
+ -S|--stats)
+ flags="$flags -s"
+ do_stats=1
+ ;;
+ -t=*|--time=*)
+ dur="${i#*=}"
+ ;;
+ -w)
+ flags="$flags -w"
+ ;;
+ cubic)
+ cc=cubic
+ ;;
+ dctcp)
+ cc=dctcp
+ ;;
+ *)
+ echo "Unknown arg:$i"
+ Usage
+ ;;
+ esac
+ done
+}
+
+processArgs
+config_bpffs
+
+if [ $debug_flag -eq 1 ] ; then
+ rm -f hbm_out.log
+fi
+
+hbm_pid=$(start_hbm)
+usleep 100000
+
+host=`hostname`
+cg_base_dir=/sys/fs/cgroup/unified
+cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
+
+echo $$ >> $cg_dir/cgroup.procs
+
+ulimit -l unlimited
+
+rm -f ss.out
+rm -f hbm.[0-9]*.$dir_name
+if [ $ecn -ne 0 ] ; then
+ sysctl -w -q -n net.ipv4.tcp_ecn=1
+fi
+
+if [ $use_netperf -eq 0 ] ; then
+ cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
+ if [ "$cc" != "x" ] ; then
+ sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
+ fi
+fi
+
+if [ "$netem" -ne "0" ] ; then
+ if [ "$qdisc" != "" ] ; then
+ echo "WARNING: Ignoring -q options because -d option used"
+ fi
+ tc qdisc del dev lo root > /dev/null 2>&1
+ tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
+elif [ "$qdisc" != "" ] ; then
+ tc qdisc del dev eth0 root > /dev/null 2>&1
+ tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
+fi
+
+n=0
+m=$[$dur * 5]
+hn="::1"
+if [ $use_netperf -ne 0 ] ; then
+ if [ "$server" != "" ] ; then
+ hn=$server
+ fi
+fi
+
+( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
+
+if [ $use_netperf -ne 0 ] ; then
+ begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
+ awk '{ print $1 }'`
+ if [ "$begNetserverPid" == "" ] ; then
+ if [ "$server" == "" ] ; then
+ ( ./netserver > /dev/null 2>&1) &
+ usleep 100000
+ fi
+ fi
+ flow_cnt=1
+ if [ "$server" == "" ] ; then
+ np_server=$host
+ else
+ np_server=$server
+ fi
+ if [ "$cc" == "x" ] ; then
+ np_cc=""
+ else
+ np_cc="-K $cc,$cc"
+ fi
+ replySize=1
+ while [ $flow_cnt -le $flows ] ; do
+ if [ $rr -ne 0 ] ; then
+ reqSize=1M
+ if [ $flow_cnt -eq 1 ] ; then
+ reqSize=10K
+ fi
+ if [ "$dir" == "-i" ] ; then
+ replySize=$reqSize
+ reqSize=1
+ fi
+ ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+ else
+ if [ "$dir" == "-i" ] ; then
+ ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+ else
+ ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+ fi
+ fi
+ flow_cnt=$[flow_cnt+1]
+ done
+
+# sleep for duration of test (plus some buffer)
+ n=$[dur+2]
+ sleep $n
+
+# force graceful termination of netperf
+ pids=`pgrep netperf`
+ for p in $pids ; do
+ kill -SIGALRM $p
+ done
+
+ flow_cnt=1
+ rate=0
+ if [ $details -ne 0 ] ; then
+ echo ""
+ echo "Details for HBM in cgroup $id"
+ if [ $do_stats -eq 1 ] ; then
+ if [ -e hbm.$id.$dir_name ] ; then
+ cat hbm.$id.$dir_name
+ fi
+ fi
+ fi
+ while [ $flow_cnt -le $flows ] ; do
+ if [ "$dir" == "-i" ] ; then
+ r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
+ else
+ r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
+ fi
+ echo "rate for flow $flow_cnt: $r"
+ rate=$[rate+r]
+ if [ $details -ne 0 ] ; then
+ echo "-----"
+ echo "Details for cgroup $id, flow $flow_cnt"
+ cat netperf.$id.$flow_cnt
+ fi
+ flow_cnt=$[flow_cnt+1]
+ done
+ if [ $details -ne 0 ] ; then
+ echo ""
+ delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+ echo "PING AVG DELAY:$delay"
+ echo "AGGREGATE_GOODPUT:$rate"
+ else
+ echo $rate
+ fi
+elif [ $multi_iperf -eq 0 ] ; then
+ (iperf3 -s -p $port -1 > /dev/null 2>&1) &
+ usleep 100000
+ iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
+ rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
+ rate=`echo $rates | grep -o "[0-9]*$"`
+
+ if [ $details -ne 0 ] ; then
+ echo ""
+ echo "Details for HBM in cgroup $id"
+ if [ $do_stats -eq 1 ] ; then
+ if [ -e hbm.$id.$dir_name ] ; then
+ cat hbm.$id.$dir_name
+ fi
+ fi
+ delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+ echo "PING AVG DELAY:$delay"
+ echo "AGGREGATE_GOODPUT:$rate"
+ else
+ echo $rate
+ fi
+else
+ flow_cnt=1
+ while [ $flow_cnt -le $flows ] ; do
+ (iperf3 -s -p $port -1 > /dev/null 2>&1) &
+ ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
+ port=$[port+1]
+ flow_cnt=$[flow_cnt+1]
+ done
+ n=$[dur+1]
+ sleep $n
+ flow_cnt=1
+ rate=0
+ if [ $details -ne 0 ] ; then
+ echo ""
+ echo "Details for HBM in cgroup $id"
+ if [ $do_stats -eq 1 ] ; then
+ if [ -e hbm.$id.$dir_name ] ; then
+ cat hbm.$id.$dir_name
+ fi
+ fi
+ fi
+
+ while [ $flow_cnt -le $flows ] ; do
+ r=`cat iperf3.$id.$flow_cnt`
+# echo "rate for flow $flow_cnt: $r"
+ if [ $details -ne 0 ] ; then
+ echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
+ fi
+ rate=$[rate+r]
+ flow_cnt=$[flow_cnt+1]
+ done
+ if [ $details -ne 0 ] ; then
+ delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+ echo "PING AVG DELAY:$delay"
+ echo "AGGREGATE_GOODPUT:$rate"
+ else
+ echo $rate
+ fi
+fi
+
+if [ $use_netperf -eq 0 ] ; then
+ sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
+fi
+if [ $ecn -ne 0 ] ; then
+ sysctl -w -q -n net.ipv4.tcp_ecn=0
+fi
+if [ "$netem" -ne "0" ] ; then
+ tc qdisc del dev lo root > /dev/null 2>&1
+fi
+if [ "$qdisc" != "" ] ; then
+ tc qdisc del dev eth0 root > /dev/null 2>&1
+fi
+sleep 2
+
+hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
+if [ "$hbmPid" == "$hbm_pid" ] ; then
+ kill $hbm_pid
+fi
+
+sleep 1
+
+# Detach any pinned BPF programs that may have lingered
+rm -rf $BPFFS/hbm*
+
+if [ $use_netperf -ne 0 ] ; then
+ if [ "$server" == "" ] ; then
+ if [ "$begNetserverPid" == "" ] ; then
+ netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
+ if [ "$netserverPid" != "" ] ; then
+ kill $netserverPid
+ fi
+ fi
+ fi
+fi
+exit
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index e29bd52ff9e8..88a26f3ce201 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -12,9 +12,12 @@
#include <sys/types.h>
#include <sys/socket.h>
-#include "bpf_load.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+
+#include <bpf/libbpf.h>
+#include "bpf_insn.h"
#include "sock_example.h"
+#include "bpf_util.h"
#define BPF_F_PIN (1 << 0)
#define BPF_F_GET (1 << 1)
@@ -28,6 +31,8 @@
#define BPF_M_MAP 1
#define BPF_M_PROG 2
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
static void usage(void)
{
printf("Usage: fds_example [...]\n");
@@ -42,27 +47,30 @@ static void usage(void)
printf(" -h Display this help.\n");
}
-static int bpf_map_create(void)
-{
- return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
- sizeof(uint32_t), 1024, 0);
-}
-
static int bpf_prog_create(const char *object)
{
static struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(insns);
+ struct bpf_object *obj;
+ int err;
if (object) {
- assert(!load_bpf_file((char *)object));
- return prog_fd[0];
+ obj = bpf_object__open_file(object, NULL);
+ assert(!libbpf_get_error(obj));
+ err = bpf_object__load(obj);
+ assert(!err);
+ return bpf_program__fd(bpf_object__next_program(obj, NULL));
} else {
- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER,
- insns, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = bpf_log_buf,
+ .log_size = BPF_LOG_BUF_SIZE,
+ );
+
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ insns, insns_cnt, &opts);
}
}
@@ -72,7 +80,8 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
int fd, ret;
if (flags & BPF_F_PIN) {
- fd = bpf_map_create();
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(uint32_t),
+ sizeof(uint32_t), 1024, NULL);
printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
assert(fd > 0);
diff --git a/samples/bpf/gnu/stubs.h b/samples/bpf/gnu/stubs.h
new file mode 100644
index 000000000000..1c638d9dce1a
--- /dev/null
+++ b/samples/bpf/gnu/stubs.h
@@ -0,0 +1 @@
+/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */
diff --git a/samples/bpf/hash_func01.h b/samples/bpf/hash_func01.h
new file mode 100644
index 000000000000..38255812e376
--- /dev/null
+++ b/samples/bpf/hash_func01.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1
+ *
+ * Based on Paul Hsieh's (LGPG 2.1) hash function
+ * From: http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+#define get16bits(d) (*((const __u16 *) (d)))
+
+static __always_inline
+__u32 SuperFastHash (const char *data, int len, __u32 initval) {
+ __u32 hash = initval;
+ __u32 tmp;
+ int rem;
+
+ if (len <= 0 || data == NULL) return 0;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+#pragma clang loop unroll(full)
+ for (;len > 0; len--) {
+ hash += get16bits (data);
+ tmp = (get16bits (data+2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (__u16);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= ((signed char)data[sizeof (__u16)]) << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += (signed char)*data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
new file mode 100644
index 000000000000..fc88d4dbdf48
--- /dev/null
+++ b/samples/bpf/hbm.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Example program for Host Bandwidth Management
+ *
+ * This program loads a cgroup skb BPF program to enforce cgroup output
+ * (egress) or input (ingress) bandwidth limits.
+ *
+ * USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog]
+ * Where:
+ * -d Print BPF trace debug buffer
+ * -l Also limit flows doing loopback
+ * -n <#> To create cgroup \"/hbm#\" and attach prog
+ * Default is /hbm1
+ * --no_cn Do not return cn notifications
+ * -r <rate> Rate limit in Mbps
+ * -s Get HBM stats (marked, dropped, etc.)
+ * -t <time> Exit after specified seconds (default is 0)
+ * -w Work conserving flag. cgroup can increase its bandwidth
+ * beyond the rate limit specified while there is available
+ * bandwidth. Current implementation assumes there is only
+ * NIC (eth0), but can be extended to support multiple NICs.
+ * Currently only supported for egress.
+ * -h Print this info
+ * prog BPF program file name. Name defaults to hbm_out_kern.o
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <linux/compiler.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <getopt.h>
+
+#include "cgroup_helpers.h"
+#include "hbm.h"
+#include "bpf_util.h"
+#include <bpf/libbpf.h>
+
+bool outFlag = true;
+int minRate = 1000; /* cgroup rate limit in Mbps */
+int rate = 1000; /* can grow if rate conserving is enabled */
+int dur = 1;
+bool stats_flag;
+bool loopback_flag;
+bool debugFlag;
+bool work_conserving_flag;
+bool no_cn_flag;
+bool edt_flag;
+
+static void Usage(void);
+static void read_trace_pipe2(void);
+static void do_error(char *msg, bool errno_flag);
+
+#define TRACEFS "/sys/kernel/tracing/"
+
+static struct bpf_program *bpf_prog;
+static struct bpf_object *obj;
+static int queue_stats_fd;
+
+static void read_trace_pipe2(void)
+{
+ int trace_fd;
+ FILE *outf;
+ char *outFname = "hbm_out.log";
+
+ trace_fd = open(TRACEFS "trace_pipe", O_RDONLY, 0);
+ if (trace_fd < 0) {
+ printf("Error opening trace_pipe\n");
+ return;
+ }
+
+// Future support of ingress
+// if (!outFlag)
+// outFname = "hbm_in.log";
+ outf = fopen(outFname, "w");
+
+ if (outf == NULL)
+ printf("Error creating %s\n", outFname);
+
+ while (1) {
+ static char buf[4097];
+ ssize_t sz;
+
+ sz = read(trace_fd, buf, sizeof(buf) - 1);
+ if (sz > 0) {
+ buf[sz] = 0;
+ puts(buf);
+ if (outf != NULL) {
+ fprintf(outf, "%s\n", buf);
+ fflush(outf);
+ }
+ }
+ }
+}
+
+static void do_error(char *msg, bool errno_flag)
+{
+ if (errno_flag)
+ printf("ERROR: %s, errno: %d\n", msg, errno);
+ else
+ printf("ERROR: %s\n", msg);
+ exit(1);
+}
+
+static int prog_load(char *prog)
+{
+ struct bpf_program *pos;
+ const char *sec_name;
+
+ obj = bpf_object__open_file(prog, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("ERROR: opening BPF object file failed\n");
+ return 1;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("ERROR: loading BPF object file failed\n");
+ goto err;
+ }
+
+ bpf_object__for_each_program(pos, obj) {
+ sec_name = bpf_program__section_name(pos);
+ if (sec_name && !strcmp(sec_name, "cgroup_skb/egress")) {
+ bpf_prog = pos;
+ break;
+ }
+ }
+ if (!bpf_prog) {
+ printf("ERROR: finding a prog in obj file failed\n");
+ goto err;
+ }
+
+ queue_stats_fd = bpf_object__find_map_fd_by_name(obj, "queue_stats");
+ if (queue_stats_fd < 0) {
+ printf("ERROR: finding a map in obj file failed\n");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ bpf_object__close(obj);
+ return 1;
+}
+
+static int run_bpf_prog(char *prog, int cg_id)
+{
+ struct hbm_queue_stats qstats = {0};
+ char cg_dir[100], cg_pin_path[100];
+ struct bpf_link *link = NULL;
+ int key = 0;
+ int cg1 = 0;
+ int rc = 0;
+
+ sprintf(cg_dir, "/hbm%d", cg_id);
+ rc = prog_load(prog);
+ if (rc != 0)
+ return rc;
+
+ if (setup_cgroup_environment()) {
+ printf("ERROR: setting cgroup environment\n");
+ goto err;
+ }
+ cg1 = create_and_get_cgroup(cg_dir);
+ if (!cg1) {
+ printf("ERROR: create_and_get_cgroup\n");
+ goto err;
+ }
+ if (join_cgroup(cg_dir)) {
+ printf("ERROR: join_cgroup\n");
+ goto err;
+ }
+
+ qstats.rate = rate;
+ qstats.stats = stats_flag ? 1 : 0;
+ qstats.loopback = loopback_flag ? 1 : 0;
+ qstats.no_cn = no_cn_flag ? 1 : 0;
+ if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) {
+ printf("ERROR: Could not update map element\n");
+ goto err;
+ }
+
+ if (!outFlag)
+ bpf_program__set_expected_attach_type(bpf_prog, BPF_CGROUP_INET_INGRESS);
+
+ link = bpf_program__attach_cgroup(bpf_prog, cg1);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach_cgroup failed\n");
+ goto err;
+ }
+
+ sprintf(cg_pin_path, "/sys/fs/bpf/hbm%d", cg_id);
+ rc = bpf_link__pin(link, cg_pin_path);
+ if (rc < 0) {
+ printf("ERROR: bpf_link__pin failed: %d\n", rc);
+ goto err;
+ }
+
+ if (work_conserving_flag) {
+ struct timeval t0, t_last, t_new;
+ FILE *fin;
+ unsigned long long last_eth_tx_bytes, new_eth_tx_bytes;
+ signed long long last_cg_tx_bytes, new_cg_tx_bytes;
+ signed long long delta_time, delta_bytes, delta_rate;
+ int delta_ms;
+#define DELTA_RATE_CHECK 10000 /* in us */
+#define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */
+
+ bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
+ if (gettimeofday(&t0, NULL) < 0)
+ do_error("gettimeofday failed", true);
+ t_last = t0;
+ fin = fopen("/sys/class/net/eth0/statistics/tx_bytes", "r");
+ if (fscanf(fin, "%llu", &last_eth_tx_bytes) != 1)
+ do_error("fscanf fails", false);
+ fclose(fin);
+ last_cg_tx_bytes = qstats.bytes_total;
+ while (true) {
+ usleep(DELTA_RATE_CHECK);
+ if (gettimeofday(&t_new, NULL) < 0)
+ do_error("gettimeofday failed", true);
+ delta_ms = (t_new.tv_sec - t0.tv_sec) * 1000 +
+ (t_new.tv_usec - t0.tv_usec)/1000;
+ if (delta_ms > dur * 1000)
+ break;
+ delta_time = (t_new.tv_sec - t_last.tv_sec) * 1000000 +
+ (t_new.tv_usec - t_last.tv_usec);
+ if (delta_time == 0)
+ continue;
+ t_last = t_new;
+ fin = fopen("/sys/class/net/eth0/statistics/tx_bytes",
+ "r");
+ if (fscanf(fin, "%llu", &new_eth_tx_bytes) != 1)
+ do_error("fscanf fails", false);
+ fclose(fin);
+ printf(" new_eth_tx_bytes:%llu\n",
+ new_eth_tx_bytes);
+ bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
+ new_cg_tx_bytes = qstats.bytes_total;
+ delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
+ last_eth_tx_bytes = new_eth_tx_bytes;
+ delta_rate = (delta_bytes * 8000000) / delta_time;
+ printf("%5d - eth_rate:%.1fGbps cg_rate:%.3fGbps",
+ delta_ms, delta_rate/1000000000.0,
+ rate/1000.0);
+ if (delta_rate < RATE_THRESHOLD) {
+ /* can increase cgroup rate limit, but first
+ * check if we are using the current limit.
+ * Currently increasing by 6.25%, unknown
+ * if that is the optimal rate.
+ */
+ int rate_diff100;
+
+ delta_bytes = new_cg_tx_bytes -
+ last_cg_tx_bytes;
+ last_cg_tx_bytes = new_cg_tx_bytes;
+ delta_rate = (delta_bytes * 8000000) /
+ delta_time;
+ printf(" rate:%.3fGbps",
+ delta_rate/1000000000.0);
+ rate_diff100 = (((long long)rate)*1000000 -
+ delta_rate) * 100 /
+ (((long long) rate) * 1000000);
+ printf(" rdiff:%d", rate_diff100);
+ if (rate_diff100 <= 3) {
+ rate += (rate >> 4);
+ if (rate > RATE_THRESHOLD / 1000000)
+ rate = RATE_THRESHOLD / 1000000;
+ qstats.rate = rate;
+ printf(" INC\n");
+ } else {
+ printf("\n");
+ }
+ } else {
+ /* Need to decrease cgroup rate limit.
+ * Currently decreasing by 12.5%, unknown
+ * if that is optimal
+ */
+ printf(" DEC\n");
+ rate -= (rate >> 3);
+ if (rate < minRate)
+ rate = minRate;
+ qstats.rate = rate;
+ }
+ if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY))
+ do_error("update map element fails", false);
+ }
+ } else {
+ sleep(dur);
+ }
+ // Get stats!
+ if (stats_flag && bpf_map_lookup_elem(queue_stats_fd, &key, &qstats)) {
+ char fname[100];
+ FILE *fout;
+
+ if (!outFlag)
+ sprintf(fname, "hbm.%d.in", cg_id);
+ else
+ sprintf(fname, "hbm.%d.out", cg_id);
+ fout = fopen(fname, "w");
+ fprintf(fout, "id:%d\n", cg_id);
+ fprintf(fout, "ERROR: Could not lookup queue_stats\n");
+ fclose(fout);
+ } else if (stats_flag && qstats.lastPacketTime >
+ qstats.firstPacketTime) {
+ long long delta_us = (qstats.lastPacketTime -
+ qstats.firstPacketTime)/1000;
+ unsigned int rate_mbps = ((qstats.bytes_total -
+ qstats.bytes_dropped) * 8 /
+ delta_us);
+ double percent_pkts, percent_bytes;
+ char fname[100];
+ FILE *fout;
+ int k;
+ static const char *returnValNames[] = {
+ "DROP_PKT",
+ "ALLOW_PKT",
+ "DROP_PKT_CWR",
+ "ALLOW_PKT_CWR"
+ };
+#define RET_VAL_COUNT 4
+
+// Future support of ingress
+// if (!outFlag)
+// sprintf(fname, "hbm.%d.in", cg_id);
+// else
+ sprintf(fname, "hbm.%d.out", cg_id);
+ fout = fopen(fname, "w");
+ fprintf(fout, "id:%d\n", cg_id);
+ fprintf(fout, "rate_mbps:%d\n", rate_mbps);
+ fprintf(fout, "duration:%.1f secs\n",
+ (qstats.lastPacketTime - qstats.firstPacketTime) /
+ 1000000000.0);
+ fprintf(fout, "packets:%d\n", (int)qstats.pkts_total);
+ fprintf(fout, "bytes_MB:%d\n", (int)(qstats.bytes_total /
+ 1000000));
+ fprintf(fout, "pkts_dropped:%d\n", (int)qstats.pkts_dropped);
+ fprintf(fout, "bytes_dropped_MB:%d\n",
+ (int)(qstats.bytes_dropped /
+ 1000000));
+ // Marked Pkts and Bytes
+ percent_pkts = (qstats.pkts_marked * 100.0) /
+ (qstats.pkts_total + 1);
+ percent_bytes = (qstats.bytes_marked * 100.0) /
+ (qstats.bytes_total + 1);
+ fprintf(fout, "pkts_marked_percent:%6.2f\n", percent_pkts);
+ fprintf(fout, "bytes_marked_percent:%6.2f\n", percent_bytes);
+
+ // Dropped Pkts and Bytes
+ percent_pkts = (qstats.pkts_dropped * 100.0) /
+ (qstats.pkts_total + 1);
+ percent_bytes = (qstats.bytes_dropped * 100.0) /
+ (qstats.bytes_total + 1);
+ fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
+ fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
+
+ // ECN CE markings
+ percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
+ (qstats.pkts_total + 1);
+ fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
+ (int)qstats.pkts_ecn_ce);
+
+ // Average cwnd
+ fprintf(fout, "avg cwnd:%d\n",
+ (int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
+ // Average rtt
+ fprintf(fout, "avg rtt:%d\n",
+ (int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
+ // Average credit
+ if (edt_flag)
+ fprintf(fout, "avg credit_ms:%.03f\n",
+ (qstats.sum_credit /
+ (qstats.pkts_total + 1.0)) / 1000000.0);
+ else
+ fprintf(fout, "avg credit:%d\n",
+ (int)(qstats.sum_credit /
+ (1500 * ((int)qstats.pkts_total ) + 1)));
+
+ // Return values stats
+ for (k = 0; k < RET_VAL_COUNT; k++) {
+ percent_pkts = (qstats.returnValCount[k] * 100.0) /
+ (qstats.pkts_total + 1);
+ fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
+ percent_pkts, (int)qstats.returnValCount[k]);
+ }
+ fclose(fout);
+ }
+
+ if (debugFlag)
+ read_trace_pipe2();
+ goto cleanup;
+
+err:
+ rc = 1;
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+
+ if (cg1 != -1)
+ close(cg1);
+
+ if (rc != 0)
+ cleanup_cgroup_environment();
+ return rc;
+}
+
+static void Usage(void)
+{
+ printf("This program loads a cgroup skb BPF program to enforce\n"
+ "cgroup output (egress) bandwidth limits.\n\n"
+ "USAGE: hbm [-o] [-d] [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
+ " [-s] [-t <secs>] [-w] [-h] [prog]\n"
+ " Where:\n"
+ " -o indicates egress direction (default)\n"
+ " -d print BPF trace debug buffer\n"
+ " --edt use fq's Earliest Departure Time\n"
+ " -l also limit flows using loopback\n"
+ " -n <#> to create cgroup \"/hbm#\" and attach prog\n"
+ " Default is /hbm1\n"
+ " --no_cn disable CN notifications\n"
+ " -r <rate> Rate in Mbps\n"
+ " -s Update HBM stats\n"
+ " -t <time> Exit after specified seconds (default is 0)\n"
+ " -w Work conserving flag. cgroup can increase\n"
+ " bandwidth beyond the rate limit specified\n"
+ " while there is available bandwidth. Current\n"
+ " implementation assumes there is only eth0\n"
+ " but can be extended to support multiple NICs\n"
+ " -h print this info\n"
+ " prog BPF program file name. Name defaults to\n"
+ " hbm_out_kern.o\n");
+}
+
+int main(int argc, char **argv)
+{
+ char *prog = "hbm_out_kern.o";
+ int k;
+ int cg_id = 1;
+ char *optstring = "iodln:r:st:wh";
+ struct option loptions[] = {
+ {"no_cn", 0, NULL, 1},
+ {"edt", 0, NULL, 2},
+ {NULL, 0, NULL, 0}
+ };
+
+ while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
+ switch (k) {
+ case 1:
+ no_cn_flag = true;
+ break;
+ case 2:
+ prog = "hbm_edt_kern.o";
+ edt_flag = true;
+ break;
+ case'o':
+ break;
+ case 'd':
+ debugFlag = true;
+ break;
+ case 'l':
+ loopback_flag = true;
+ break;
+ case 'n':
+ cg_id = atoi(optarg);
+ break;
+ case 'r':
+ minRate = atoi(optarg) * 1.024;
+ rate = minRate;
+ break;
+ case 's':
+ stats_flag = true;
+ break;
+ case 't':
+ dur = atoi(optarg);
+ break;
+ case 'w':
+ work_conserving_flag = true;
+ break;
+ case '?':
+ if (optopt == 'n' || optopt == 'r' || optopt == 't')
+ fprintf(stderr,
+ "Option -%c requires an argument.\n\n",
+ optopt);
+ case 'h':
+ default:
+ Usage();
+ return 0;
+ }
+ }
+
+ if (optind < argc)
+ prog = argv[optind];
+ printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
+
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ return run_bpf_prog(prog, cg_id);
+}
diff --git a/samples/bpf/hbm.h b/samples/bpf/hbm.h
new file mode 100644
index 000000000000..f0963ed6a562
--- /dev/null
+++ b/samples/bpf/hbm.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Include file for Host Bandwidth Management (HBM) programs
+ */
+struct hbm_vqueue {
+ struct bpf_spin_lock lock;
+ /* 4 byte hole */
+ unsigned long long lasttime; /* In ns */
+ int credit; /* In bytes */
+ unsigned int rate; /* In bytes per NS << 20 */
+};
+
+struct hbm_queue_stats {
+ unsigned long rate; /* in Mbps*/
+ unsigned long stats:1, /* get HBM stats (marked, dropped,..) */
+ loopback:1, /* also limit flows using loopback */
+ no_cn:1; /* do not use cn flags */
+ unsigned long long pkts_marked;
+ unsigned long long bytes_marked;
+ unsigned long long pkts_dropped;
+ unsigned long long bytes_dropped;
+ unsigned long long pkts_total;
+ unsigned long long bytes_total;
+ unsigned long long firstPacketTime;
+ unsigned long long lastPacketTime;
+ unsigned long long pkts_ecn_ce;
+ unsigned long long returnValCount[4];
+ unsigned long long sum_cwnd;
+ unsigned long long sum_rtt;
+ unsigned long long sum_cwnd_cnt;
+ long long sum_credit;
+};
diff --git a/samples/bpf/hbm_edt_kern.c b/samples/bpf/hbm_edt_kern.c
new file mode 100644
index 000000000000..6294f1d716c0
--- /dev/null
+++ b/samples/bpf/hbm_edt_kern.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample Host Bandwidth Manager (HBM) BPF program.
+ *
+ * A cgroup skb BPF egress program to limit cgroup output bandwidth.
+ * It uses a modified virtual token bucket queue to limit average
+ * egress bandwidth. The implementation uses credits instead of tokens.
+ * Negative credits imply that queueing would have happened (this is
+ * a virtual queue, so no queueing is done by it. However, queueing may
+ * occur at the actual qdisc (which is not used for rate limiting).
+ *
+ * This implementation uses 3 thresholds, one to start marking packets and
+ * the other two to drop packets:
+ * CREDIT
+ * - <--------------------------|------------------------> +
+ * | | | 0
+ * | Large pkt |
+ * | drop thresh |
+ * Small pkt drop Mark threshold
+ * thresh
+ *
+ * The effect of marking depends on the type of packet:
+ * a) If the packet is ECN enabled and it is a TCP packet, then the packet
+ * is ECN marked.
+ * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
+ * to reduce the congestion window. The current implementation uses a linear
+ * distribution (0% probability at marking threshold, 100% probability
+ * at drop threshold).
+ * c) If the packet is not a TCP packet, then it is dropped.
+ *
+ * If the credit is below the drop threshold, the packet is dropped. If it
+ * is a TCP packet, then it also calls tcp_cwr since packets dropped by
+ * a cgroup skb BPF program do not automatically trigger a call to
+ * tcp_cwr in the current kernel code.
+ *
+ * This BPF program actually uses 2 drop thresholds, one threshold
+ * for larger packets (>= 120 bytes) and another for smaller packets. This
+ * protects smaller packets such as SYNs, ACKs, etc.
+ *
+ * The default bandwidth limit is set at 1Gbps but this can be changed by
+ * a user program through a shared BPF map. In addition, by default this BPF
+ * program does not limit connections using loopback. This behavior can be
+ * overwritten by the user program. There is also an option to calculate
+ * some statistics, such as percent of packets marked or dropped, which
+ * a user program, such as hbm, can access.
+ */
+
+#include "hbm_kern.h"
+
+SEC("cgroup_skb/egress")
+int _hbm_out_cg(struct __sk_buff *skb)
+{
+ long long delta = 0, delta_send;
+ unsigned long long curtime, sendtime;
+ struct hbm_queue_stats *qsp = NULL;
+ unsigned int queue_index = 0;
+ bool congestion_flag = false;
+ bool ecn_ce_flag = false;
+ struct hbm_pkt_info pkti = {};
+ struct hbm_vqueue *qdp;
+ bool drop_flag = false;
+ bool cwr_flag = false;
+ int len = skb->len;
+ int rv = ALLOW_PKT;
+
+ qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
+
+ // Check if we should ignore loopback traffic
+ if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
+ return ALLOW_PKT;
+
+ hbm_get_pkt_info(skb, &pkti);
+
+ // We may want to account for the length of headers in len
+ // calculation, like ETH header + overhead, specially if it
+ // is a gso packet. But I am not doing it right now.
+
+ qdp = bpf_get_local_storage(&queue_state, 0);
+ if (!qdp)
+ return ALLOW_PKT;
+ if (qdp->lasttime == 0)
+ hbm_init_edt_vqueue(qdp, 1024);
+
+ curtime = bpf_ktime_get_ns();
+
+ // Begin critical section
+ bpf_spin_lock(&qdp->lock);
+ delta = qdp->lasttime - curtime;
+ // bound bursts to 100us
+ if (delta < -BURST_SIZE_NS) {
+ // negative delta is a credit that allows bursts
+ qdp->lasttime = curtime - BURST_SIZE_NS;
+ delta = -BURST_SIZE_NS;
+ }
+ sendtime = qdp->lasttime;
+ delta_send = BYTES_TO_NS(len, qdp->rate);
+ __sync_add_and_fetch(&(qdp->lasttime), delta_send);
+ bpf_spin_unlock(&qdp->lock);
+ // End critical section
+
+ // Set EDT of packet
+ skb->tstamp = sendtime;
+
+ // Check if we should update rate
+ if (qsp != NULL && (qsp->rate * 128) != qdp->rate)
+ qdp->rate = qsp->rate * 128;
+
+ // Set flags (drop, congestion, cwr)
+ // last packet will be sent in the future, bound latency
+ if (delta > DROP_THRESH_NS || (delta > LARGE_PKT_DROP_THRESH_NS &&
+ len > LARGE_PKT_THRESH)) {
+ drop_flag = true;
+ if (pkti.is_tcp && pkti.ecn == 0)
+ cwr_flag = true;
+ } else if (delta > MARK_THRESH_NS) {
+ if (pkti.is_tcp)
+ congestion_flag = true;
+ else
+ drop_flag = true;
+ }
+
+ if (congestion_flag) {
+ if (bpf_skb_ecn_set_ce(skb)) {
+ ecn_ce_flag = true;
+ } else {
+ if (pkti.is_tcp) {
+ unsigned int rand = bpf_get_prandom_u32();
+
+ if (delta >= MARK_THRESH_NS +
+ (rand % MARK_REGION_SIZE_NS)) {
+ // Do congestion control
+ cwr_flag = true;
+ }
+ } else if (len > LARGE_PKT_THRESH) {
+ // Problem if too many small packets?
+ drop_flag = true;
+ congestion_flag = false;
+ }
+ }
+ }
+
+ if (pkti.is_tcp && drop_flag && pkti.packets_out <= 1) {
+ drop_flag = false;
+ cwr_flag = true;
+ congestion_flag = false;
+ }
+
+ if (qsp != NULL && qsp->no_cn)
+ cwr_flag = false;
+
+ hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
+ cwr_flag, ecn_ce_flag, &pkti, (int) delta);
+
+ if (drop_flag) {
+ __sync_add_and_fetch(&(qdp->lasttime), -delta_send);
+ rv = DROP_PKT;
+ }
+
+ if (cwr_flag)
+ rv |= CWR;
+ return rv;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
new file mode 100644
index 000000000000..1752a46a2b05
--- /dev/null
+++ b/samples/bpf/hbm_kern.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Include file for sample Host Bandwidth Manager (HBM) BPF programs
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
+#include <net/inet_ecn.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include "hbm.h"
+
+#define DROP_PKT 0
+#define ALLOW_PKT 1
+#define TCP_ECN_OK 1
+#define CWR 2
+
+#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
+#endif
+
+#define INITIAL_CREDIT_PACKETS 100
+#define MAX_BYTES_PER_PACKET 1500
+#define MARK_THRESH (40 * MAX_BYTES_PER_PACKET)
+#define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET)
+#define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
+#define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH)
+#define LARGE_PKT_THRESH 120
+#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
+#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
+
+// Time base accounting for fq's EDT
+#define BURST_SIZE_NS 100000 // 100us
+#define MARK_THRESH_NS 50000 // 50us
+#define DROP_THRESH_NS 500000 // 500us
+// Reserve 20us of queuing for small packets (less than 120 bytes)
+#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
+#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
+
+// rate in bytes per ns << 20
+#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct hbm_vqueue);
+} queue_state SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, struct hbm_queue_stats);
+} queue_stats SEC(".maps");
+
+struct hbm_pkt_info {
+ int cwnd;
+ int rtt;
+ int packets_out;
+ bool is_ip;
+ bool is_tcp;
+ short ecn;
+};
+
+static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
+{
+ struct bpf_sock *sk;
+ struct bpf_tcp_sock *tp;
+
+ sk = skb->sk;
+ if (sk) {
+ sk = bpf_sk_fullsock(sk);
+ if (sk) {
+ if (sk->protocol == IPPROTO_TCP) {
+ tp = bpf_tcp_sock(sk);
+ if (tp) {
+ pkti->cwnd = tp->snd_cwnd;
+ pkti->rtt = tp->srtt_us >> 3;
+ pkti->packets_out = tp->packets_out;
+ return 0;
+ }
+ }
+ }
+ }
+ pkti->cwnd = 0;
+ pkti->rtt = 0;
+ pkti->packets_out = 0;
+ return 1;
+}
+
+static void hbm_get_pkt_info(struct __sk_buff *skb,
+ struct hbm_pkt_info *pkti)
+{
+ struct iphdr iph;
+ struct ipv6hdr *ip6h;
+
+ pkti->cwnd = 0;
+ pkti->rtt = 0;
+ bpf_skb_load_bytes(skb, 0, &iph, 12);
+ if (iph.version == 6) {
+ ip6h = (struct ipv6hdr *)&iph;
+ pkti->is_ip = true;
+ pkti->is_tcp = (ip6h->nexthdr == 6);
+ pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
+ } else if (iph.version == 4) {
+ pkti->is_ip = true;
+ pkti->is_tcp = (iph.protocol == 6);
+ pkti->ecn = iph.tos & INET_ECN_MASK;
+ } else {
+ pkti->is_ip = false;
+ pkti->is_tcp = false;
+ pkti->ecn = 0;
+ }
+ if (pkti->is_tcp)
+ get_tcp_info(skb, pkti);
+}
+
+static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
+{
+ bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+ qdp->lasttime = bpf_ktime_get_ns();
+ qdp->credit = INIT_CREDIT;
+ qdp->rate = rate * 128;
+}
+
+static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
+ int rate)
+{
+ unsigned long long curtime;
+
+ curtime = bpf_ktime_get_ns();
+ bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+ qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst
+ qdp->credit = 0; // not used
+ qdp->rate = rate * 128;
+}
+
+static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
+ int len,
+ unsigned long long curtime,
+ bool congestion_flag,
+ bool drop_flag,
+ bool cwr_flag,
+ bool ecn_ce_flag,
+ struct hbm_pkt_info *pkti,
+ int credit)
+{
+ int rv = ALLOW_PKT;
+
+ if (qsp != NULL) {
+ // Following is needed for work conserving
+ __sync_add_and_fetch(&(qsp->bytes_total), len);
+ if (qsp->stats) {
+ // Optionally update statistics
+ if (qsp->firstPacketTime == 0)
+ qsp->firstPacketTime = curtime;
+ qsp->lastPacketTime = curtime;
+ __sync_add_and_fetch(&(qsp->pkts_total), 1);
+ if (congestion_flag) {
+ __sync_add_and_fetch(&(qsp->pkts_marked), 1);
+ __sync_add_and_fetch(&(qsp->bytes_marked), len);
+ }
+ if (drop_flag) {
+ __sync_add_and_fetch(&(qsp->pkts_dropped), 1);
+ __sync_add_and_fetch(&(qsp->bytes_dropped),
+ len);
+ }
+ if (ecn_ce_flag)
+ __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
+ if (pkti->cwnd) {
+ __sync_add_and_fetch(&(qsp->sum_cwnd),
+ pkti->cwnd);
+ __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
+ }
+ if (pkti->rtt)
+ __sync_add_and_fetch(&(qsp->sum_rtt),
+ pkti->rtt);
+ __sync_add_and_fetch(&(qsp->sum_credit), credit);
+
+ if (drop_flag)
+ rv = DROP_PKT;
+ if (cwr_flag)
+ rv |= 2;
+ if (rv == DROP_PKT)
+ __sync_add_and_fetch(&(qsp->returnValCount[0]),
+ 1);
+ else if (rv == ALLOW_PKT)
+ __sync_add_and_fetch(&(qsp->returnValCount[1]),
+ 1);
+ else if (rv == 2)
+ __sync_add_and_fetch(&(qsp->returnValCount[2]),
+ 1);
+ else if (rv == 3)
+ __sync_add_and_fetch(&(qsp->returnValCount[3]),
+ 1);
+ }
+ }
+}
diff --git a/samples/bpf/hbm_out_kern.c b/samples/bpf/hbm_out_kern.c
new file mode 100644
index 000000000000..829934bd43cb
--- /dev/null
+++ b/samples/bpf/hbm_out_kern.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample Host Bandwidth Manager (HBM) BPF program.
+ *
+ * A cgroup skb BPF egress program to limit cgroup output bandwidth.
+ * It uses a modified virtual token bucket queue to limit average
+ * egress bandwidth. The implementation uses credits instead of tokens.
+ * Negative credits imply that queueing would have happened (this is
+ * a virtual queue, so no queueing is done by it. However, queueing may
+ * occur at the actual qdisc (which is not used for rate limiting).
+ *
+ * This implementation uses 3 thresholds, one to start marking packets and
+ * the other two to drop packets:
+ * CREDIT
+ * - <--------------------------|------------------------> +
+ * | | | 0
+ * | Large pkt |
+ * | drop thresh |
+ * Small pkt drop Mark threshold
+ * thresh
+ *
+ * The effect of marking depends on the type of packet:
+ * a) If the packet is ECN enabled and it is a TCP packet, then the packet
+ * is ECN marked.
+ * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
+ * to reduce the congestion window. The current implementation uses a linear
+ * distribution (0% probability at marking threshold, 100% probability
+ * at drop threshold).
+ * c) If the packet is not a TCP packet, then it is dropped.
+ *
+ * If the credit is below the drop threshold, the packet is dropped. If it
+ * is a TCP packet, then it also calls tcp_cwr since packets dropped by
+ * by a cgroup skb BPF program do not automatically trigger a call to
+ * tcp_cwr in the current kernel code.
+ *
+ * This BPF program actually uses 2 drop thresholds, one threshold
+ * for larger packets (>= 120 bytes) and another for smaller packets. This
+ * protects smaller packets such as SYNs, ACKs, etc.
+ *
+ * The default bandwidth limit is set at 1Gbps but this can be changed by
+ * a user program through a shared BPF map. In addition, by default this BPF
+ * program does not limit connections using loopback. This behavior can be
+ * overwritten by the user program. There is also an option to calculate
+ * some statistics, such as percent of packets marked or dropped, which
+ * the user program can access.
+ *
+ * A latter patch provides such a program (hbm.c)
+ */
+
+#include "hbm_kern.h"
+
+SEC("cgroup_skb/egress")
+int _hbm_out_cg(struct __sk_buff *skb)
+{
+ struct hbm_pkt_info pkti;
+ int len = skb->len;
+ unsigned int queue_index = 0;
+ unsigned long long curtime;
+ int credit;
+ signed long long delta = 0, new_credit;
+ int max_credit = MAX_CREDIT;
+ bool congestion_flag = false;
+ bool drop_flag = false;
+ bool cwr_flag = false;
+ bool ecn_ce_flag = false;
+ struct hbm_vqueue *qdp;
+ struct hbm_queue_stats *qsp = NULL;
+ int rv = ALLOW_PKT;
+
+ qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
+ if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
+ return ALLOW_PKT;
+
+ hbm_get_pkt_info(skb, &pkti);
+
+ // We may want to account for the length of headers in len
+ // calculation, like ETH header + overhead, specially if it
+ // is a gso packet. But I am not doing it right now.
+
+ qdp = bpf_get_local_storage(&queue_state, 0);
+ if (!qdp)
+ return ALLOW_PKT;
+ else if (qdp->lasttime == 0)
+ hbm_init_vqueue(qdp, 1024);
+
+ curtime = bpf_ktime_get_ns();
+
+ // Begin critical section
+ bpf_spin_lock(&qdp->lock);
+ credit = qdp->credit;
+ delta = curtime - qdp->lasttime;
+ /* delta < 0 implies that another process with a curtime greater
+ * than ours beat us to the critical section and already added
+ * the new credit, so we should not add it ourselves
+ */
+ if (delta > 0) {
+ qdp->lasttime = curtime;
+ new_credit = credit + CREDIT_PER_NS(delta, qdp->rate);
+ if (new_credit > MAX_CREDIT)
+ credit = MAX_CREDIT;
+ else
+ credit = new_credit;
+ }
+ credit -= len;
+ qdp->credit = credit;
+ bpf_spin_unlock(&qdp->lock);
+ // End critical section
+
+ // Check if we should update rate
+ if (qsp != NULL && (qsp->rate * 128) != qdp->rate) {
+ qdp->rate = qsp->rate * 128;
+ bpf_printk("Updating rate: %d (1sec:%llu bits)\n",
+ (int)qdp->rate,
+ CREDIT_PER_NS(1000000000, qdp->rate) * 8);
+ }
+
+ // Set flags (drop, congestion, cwr)
+ // Dropping => we are congested, so ignore congestion flag
+ if (credit < -DROP_THRESH ||
+ (len > LARGE_PKT_THRESH && credit < -LARGE_PKT_DROP_THRESH)) {
+ // Very congested, set drop packet
+ drop_flag = true;
+ if (pkti.ecn)
+ congestion_flag = true;
+ else if (pkti.is_tcp)
+ cwr_flag = true;
+ } else if (credit < 0) {
+ // Congested, set congestion flag
+ if (pkti.ecn || pkti.is_tcp) {
+ if (credit < -MARK_THRESH)
+ congestion_flag = true;
+ else
+ congestion_flag = false;
+ } else {
+ congestion_flag = true;
+ }
+ }
+
+ if (congestion_flag) {
+ if (bpf_skb_ecn_set_ce(skb)) {
+ ecn_ce_flag = true;
+ } else {
+ if (pkti.is_tcp) {
+ unsigned int rand = bpf_get_prandom_u32();
+
+ if (-credit >= MARK_THRESH +
+ (rand % MARK_REGION_SIZE)) {
+ // Do congestion control
+ cwr_flag = true;
+ }
+ } else if (len > LARGE_PKT_THRESH) {
+ // Problem if too many small packets?
+ drop_flag = true;
+ }
+ }
+ }
+
+ if (qsp != NULL)
+ if (qsp->no_cn)
+ cwr_flag = false;
+
+ hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
+ cwr_flag, ecn_ce_flag, &pkti, credit);
+
+ if (drop_flag) {
+ __sync_add_and_fetch(&(qdp->credit), len);
+ rv = DROP_PKT;
+ }
+
+ if (cwr_flag)
+ rv |= 2;
+ return rv;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
new file mode 100644
index 000000000000..f07474c72525
--- /dev/null
+++ b/samples/bpf/ibumad_kern.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/*
+ * ibumad BPF sample kernel side
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Copyright(c) 2018 Ira Weiny, Intel Corporation
+ */
+
+#define KBUILD_MODNAME "ibumad_count_pkts_by_class"
+#include <uapi/linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32); /* class; u32 required */
+ __type(value, u64); /* count of mads read */
+ __uint(max_entries, 256); /* Room for all Classes */
+} read_count SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32); /* class; u32 required */
+ __type(value, u64); /* count of mads written */
+ __uint(max_entries, 256); /* Room for all Classes */
+} write_count SEC(".maps");
+
+#undef DEBUG
+#ifndef DEBUG
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
+#endif
+
+/* Taken from the current format defined in
+ * include/trace/events/ib_umad.h
+ * and
+ * /sys/kernel/tracing/events/ib_umad/ib_umad_read/format
+ * /sys/kernel/tracing/events/ib_umad/ib_umad_write/format
+ */
+struct ib_umad_rw_args {
+ u64 pad;
+ u8 port_num;
+ u8 sl;
+ u8 path_bits;
+ u8 grh_present;
+ u32 id;
+ u32 status;
+ u32 timeout_ms;
+ u32 retires;
+ u32 length;
+ u32 qpn;
+ u32 qkey;
+ u8 gid_index;
+ u8 hop_limit;
+ u16 lid;
+ u16 attr_id;
+ u16 pkey_index;
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ u32 flow_label;
+ u16 mad_status;
+ u16 class_specific;
+ u32 attr_mod;
+ u64 tid;
+ u8 gid[16];
+ u32 dev_index;
+ u8 traffic_class;
+};
+
+SEC("tracepoint/ib_umad/ib_umad_read_recv")
+int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx)
+{
+ u64 zero = 0, *val;
+ u8 class = ctx->mgmt_class;
+
+ bpf_printk("ib_umad read recv : class 0x%x\n", class);
+
+ val = bpf_map_lookup_elem(&read_count, &class);
+ if (!val) {
+ bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST);
+ val = bpf_map_lookup_elem(&read_count, &class);
+ if (!val)
+ return 0;
+ }
+
+ (*val) += 1;
+
+ return 0;
+}
+SEC("tracepoint/ib_umad/ib_umad_read_send")
+int on_ib_umad_read_send(struct ib_umad_rw_args *ctx)
+{
+ u64 zero = 0, *val;
+ u8 class = ctx->mgmt_class;
+
+ bpf_printk("ib_umad read send : class 0x%x\n", class);
+
+ val = bpf_map_lookup_elem(&read_count, &class);
+ if (!val) {
+ bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST);
+ val = bpf_map_lookup_elem(&read_count, &class);
+ if (!val)
+ return 0;
+ }
+
+ (*val) += 1;
+
+ return 0;
+}
+SEC("tracepoint/ib_umad/ib_umad_write")
+int on_ib_umad_write(struct ib_umad_rw_args *ctx)
+{
+ u64 zero = 0, *val;
+ u8 class = ctx->mgmt_class;
+
+ bpf_printk("ib_umad write : class 0x%x\n", class);
+
+ val = bpf_map_lookup_elem(&write_count, &class);
+ if (!val) {
+ bpf_map_update_elem(&write_count, &class, &zero, BPF_NOEXIST);
+ val = bpf_map_lookup_elem(&write_count, &class);
+ if (!val)
+ return 0;
+ }
+
+ (*val) += 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
new file mode 100644
index 000000000000..d074c978aac7
--- /dev/null
+++ b/samples/bpf/ibumad_user.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/*
+ * ibumad BPF sample user side
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Copyright(c) 2018 Ira Weiny, Intel Corporation
+ */
+
+#include <linux/bpf.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <limits.h>
+
+#include <getopt.h>
+#include <net/if.h>
+
+#include <bpf/bpf.h>
+#include "bpf_util.h"
+#include <bpf/libbpf.h>
+
+static struct bpf_link *tp_links[3];
+static struct bpf_object *obj;
+static int map_fd[2];
+static int tp_cnt;
+
+static void dump_counts(int fd)
+{
+ __u32 key;
+ __u64 value;
+
+ for (key = 0; key < 256; key++) {
+ if (bpf_map_lookup_elem(fd, &key, &value)) {
+ printf("failed to read key %u\n", key);
+ continue;
+ }
+ if (value)
+ printf("0x%02x : %llu\n", key, value);
+ }
+}
+
+static void dump_all_counts(void)
+{
+ printf("Read 'Class : count'\n");
+ dump_counts(map_fd[0]);
+ printf("Write 'Class : count'\n");
+ dump_counts(map_fd[1]);
+}
+
+static void dump_exit(int sig)
+{
+ dump_all_counts();
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
+ exit(0);
+}
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h'},
+ {"delay", required_argument, NULL, 'd'},
+};
+
+static void usage(char *cmd)
+{
+ printf("eBPF test program to count packets from various IP addresses\n"
+ "Usage: %s <options>\n"
+ " --help, -h this menu\n"
+ " --delay, -d <delay> wait <delay> sec between prints [1 - 1000000]\n"
+ , cmd
+ );
+}
+
+int main(int argc, char **argv)
+{
+ struct bpf_program *prog;
+ unsigned long delay = 5;
+ char filename[256];
+ int longindex = 0;
+ int opt, err = -1;
+
+ while ((opt = getopt_long(argc, argv, "hd:rSw",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'd':
+ delay = strtoul(optarg, NULL, 0);
+ if (delay == ULONG_MAX || delay < 0 ||
+ delay > 1000000) {
+ fprintf(stderr, "ERROR: invalid delay : %s\n",
+ optarg);
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ default:
+ case 'h':
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ /* Do one final dump when exiting */
+ signal(SIGINT, dump_exit);
+ signal(SIGTERM, dump_exit);
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return err;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "read_count");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "write_count");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ tp_links[tp_cnt] = bpf_program__attach(prog);
+ if (libbpf_get_error(tp_links[tp_cnt])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ tp_links[tp_cnt] = NULL;
+ goto cleanup;
+ }
+ tp_cnt++;
+ }
+
+ while (1) {
+ sleep(delay);
+ dump_all_counts();
+ }
+ err = 0;
+
+cleanup:
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
+ return err;
+}
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
index 18fa088473cd..4adfcbbe6ef4 100644
--- a/samples/bpf/lathist_kern.c
+++ b/samples/bpf/lathist_kern.c
@@ -8,7 +8,7 @@
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -18,12 +18,12 @@
* trace_preempt_[on|off] tracepoints hooks is not supported.
*/
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU);
+} my_map SEC(".maps");
SEC("kprobe/trace_preempt_off")
int bpf_prog1(struct pt_regs *ctx)
@@ -61,12 +61,12 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
-struct bpf_map_def SEC("maps") my_lat = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long),
- .max_entries = MAX_CPU * MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, long);
+ __uint(max_entries, MAX_CPU * MAX_ENTRIES);
+} my_lat SEC(".maps");
SEC("kprobe/trace_preempt_on")
int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index 6477bad5b4e2..7d8ff2418303 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c
@@ -1,17 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
* Copyright (c) 2015 BMW Car IT GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -84,20 +80,51 @@ static void get_data(int fd)
int main(int argc, char **argv)
{
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd, i = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
while (1) {
- get_data(map_fd[1]);
+ get_data(map_fd);
print_hist();
sleep(5);
}
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c
deleted file mode 100644
index e5da6cf71a3e..000000000000
--- a/samples/bpf/load_sock_ops.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/unistd.h>
-
-static void usage(char *pname)
-{
- printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname);
- printf("\tLoad and attach a sock_ops program to the specified "
- "cgroup\n");
- printf("\tIf \"-l\" is used, the program will continue to run\n");
- printf("\tprinting the BPF log buffer\n");
- printf("\tIf the specified filename does not end in \".o\", it\n");
- printf("\tappends \"_kern.o\" to the name\n");
- printf("\n");
- printf(" %s -r <cg-path>\n", pname);
- printf("\tDetaches the currently attached sock_ops program\n");
- printf("\tfrom the specified cgroup\n");
- printf("\n");
- exit(1);
-}
-
-int main(int argc, char **argv)
-{
- int logFlag = 0;
- int error = 0;
- char *cg_path;
- char fn[500];
- char *prog;
- int cg_fd;
-
- if (argc < 3)
- usage(argv[0]);
-
- if (!strcmp(argv[1], "-r")) {
- cg_path = argv[2];
- cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
- error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
- if (error) {
- printf("ERROR: bpf_prog_detach: %d (%s)\n",
- error, strerror(errno));
- return 2;
- }
- return 0;
- } else if (!strcmp(argv[1], "-h")) {
- usage(argv[0]);
- } else if (!strcmp(argv[1], "-l")) {
- logFlag = 1;
- if (argc < 4)
- usage(argv[0]);
- }
-
- prog = argv[argc - 1];
- cg_path = argv[argc - 2];
- if (strlen(prog) > 480) {
- fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
- return 3;
- }
- cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
-
- if (!strcmp(prog + strlen(prog)-2, ".o"))
- strcpy(fn, prog);
- else
- sprintf(fn, "%s_kern.o", prog);
- if (logFlag)
- printf("loading bpf file:%s\n", fn);
- if (load_bpf_file(fn)) {
- printf("ERROR: load_bpf_file failed for: %s\n", fn);
- printf("%s", bpf_log_buf);
- return 4;
- }
- if (logFlag)
- printf("TCP BPF Loaded %s\n", fn);
-
- error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
- if (error) {
- printf("ERROR: bpf_prog_attach: %d (%s)\n",
- error, strerror(errno));
- return 5;
- } else if (logFlag) {
- read_trace_pipe();
- }
-
- return error;
-}
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist.bpf.c
index df75383280f9..dbab80e813fe 100644
--- a/samples/bpf/lwt_len_hist_kern.c
+++ b/samples/bpf/lwt_len_hist.bpf.c
@@ -10,36 +10,16 @@
* General Public License for more details.
*/
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/in.h>
-#include "bpf_helpers.h"
-
-# define printk(fmt, ...) \
- ({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
- })
-
-struct bpf_elf_map {
- __u32 type;
- __u32 size_key;
- __u32 size_value;
- __u32 max_elem;
- __u32 flags;
- __u32 id;
- __u32 pinning;
-};
-
-struct bpf_elf_map SEC("maps") lwt_len_hist_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .size_key = sizeof(__u64),
- .size_value = sizeof(__u64),
- .pinning = 2,
- .max_elem = 1024,
-};
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, u64);
+ __type(value, u64);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+ __uint(max_entries, 1024);
+} lwt_len_hist_map SEC(".maps");
static unsigned int log2(unsigned int v)
{
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
index 7d567744c7fa..381b2c634784 100644..100755
--- a/samples/bpf/lwt_len_hist.sh
+++ b/samples/bpf/lwt_len_hist.sh
@@ -1,12 +1,15 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
NS1=lwt_ns1
VETH0=tst_lwt1a
VETH1=tst_lwt1b
-
-TRACE_ROOT=/sys/kernel/debug/tracing
+BPF_PROG=lwt_len_hist.bpf.o
+TRACE_ROOT=/sys/kernel/tracing
function cleanup {
+ # To reset saved histogram, remove pinned map
+ rm /sys/fs/bpf/tc/globals/lwt_len_hist_map
ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null
ip link del $VETH0 2> /dev/null
ip link del $VETH1 2> /dev/null
@@ -27,7 +30,7 @@ ip netns exec $NS1 netserver
echo 1 > ${TRACE_ROOT}/tracing_on
cp /dev/null ${TRACE_ROOT}/trace
-ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0
+ip route add 192.168.253.2/32 encap bpf out obj $BPF_PROG section len_hist dev $VETH0
netperf -H 192.168.253.2 -t TCP_STREAM
cat ${TRACE_ROOT}/trace | grep -v '^#'
./lwt_len_hist
diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c
index ec8f3bbcbef3..430a4b7e353e 100644
--- a/samples/bpf/lwt_len_hist_user.c
+++ b/samples/bpf/lwt_len_hist_user.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/unistd.h>
#include <linux/bpf.h>
@@ -8,14 +9,12 @@
#include <errno.h>
#include <arpa/inet.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
#include "bpf_util.h"
#define MAX_INDEX 64
#define MAX_STARS 38
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
static void stars(char *str, long val, long max, int width)
{
int i;
diff --git a/samples/bpf/map_perf_test.bpf.c b/samples/bpf/map_perf_test.bpf.c
new file mode 100644
index 000000000000..3cdeba2afe12
--- /dev/null
+++ b/samples/bpf/map_perf_test.bpf.c
@@ -0,0 +1,297 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include "vmlinux.h"
+#include <errno.h>
+#include <linux/version.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_ENTRIES 1000
+#define MAX_NR_CPUS 1024
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+} lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_COMMON_LRU);
+} nocommon_lru_hash_map SEC(".maps");
+
+struct inner_lru {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NUMA_NODE);
+ __uint(numa_node, 0);
+} inner_lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_CPUS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_lru); /* use inner_lru as inner map */
+} array_of_lru_hashs SEC(".maps") = {
+ /* statically initialize the first element */
+ .values = { &inner_lru_hash_map },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+} percpu_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_map_alloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} percpu_hash_map_alloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_trie_map_alloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} lru_hash_lookup_map SEC(".maps");
+
+SEC("ksyscall/getuid")
+int BPF_KSYSCALL(stress_hmap)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map, &key);
+ }
+
+ return 0;
+}
+
+SEC("ksyscall/geteuid")
+int BPF_KSYSCALL(stress_percpu_hmap)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map, &key);
+ }
+ return 0;
+}
+
+SEC("ksyscall/getgid")
+int BPF_KSYSCALL(stress_hmap_alloc)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map_alloc, &key);
+ }
+ return 0;
+}
+
+SEC("ksyscall/getegid")
+int BPF_KSYSCALL(stress_percpu_hmap_alloc)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
+ }
+ return 0;
+}
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(stress_lru_hmap_alloc, int fd, struct sockaddr_in *uservaddr,
+ int addrlen)
+{
+ char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
+ union {
+ u16 dst6[8];
+ struct {
+ u16 magic0;
+ u16 magic1;
+ u16 tcase;
+ u16 unused16;
+ u32 unused32;
+ u32 key;
+ };
+ } test_params;
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)uservaddr;
+ u16 test_case;
+ long val = 1;
+ u32 key = 0;
+ int ret;
+
+ if (addrlen != sizeof(*in6))
+ return 0;
+
+ ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
+ &in6->sin6_addr);
+ if (ret)
+ goto done;
+
+ if (test_params.magic0 != 0xdead ||
+ test_params.magic1 != 0xbeef)
+ return 0;
+
+ test_case = test_params.tcase;
+ if (test_case != 3)
+ key = bpf_get_prandom_u32();
+
+ if (test_case == 0) {
+ ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+ } else if (test_case == 1) {
+ ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val,
+ BPF_ANY);
+ } else if (test_case == 2) {
+ void *nolocal_lru_map;
+ int cpu = bpf_get_smp_processor_id();
+
+ nolocal_lru_map = bpf_map_lookup_elem(&array_of_lru_hashs,
+ &cpu);
+ if (!nolocal_lru_map) {
+ ret = -ENOENT;
+ goto done;
+ }
+
+ ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
+ BPF_ANY);
+ } else if (test_case == 3) {
+ u32 i;
+
+ key = test_params.key;
+
+#pragma clang loop unroll(full)
+ for (i = 0; i < 32; i++) {
+ bpf_map_lookup_elem(&lru_hash_lookup_map, &key);
+ key++;
+ }
+ } else {
+ ret = -EINVAL;
+ }
+
+done:
+ if (ret)
+ bpf_trace_printk(fmt, sizeof(fmt), ret);
+
+ return 0;
+}
+
+SEC("ksyscall/gettid")
+int BPF_KSYSCALL(stress_lpm_trie_map_alloc)
+{
+ union {
+ u32 b32[2];
+ u8 b8[8];
+ } key;
+ unsigned int i;
+
+ key.b32[0] = 32;
+ key.b8[4] = 192;
+ key.b8[5] = 168;
+ key.b8[6] = 0;
+ key.b8[7] = 1;
+
+#pragma clang loop unroll(full)
+ for (i = 0; i < 32; ++i)
+ bpf_map_lookup_elem(&lpm_trie_map_alloc, &key);
+
+ return 0;
+}
+
+SEC("ksyscall/getpgid")
+int BPF_KSYSCALL(stress_hash_map_lookup)
+{
+ u32 key = 1, i;
+ long *value;
+
+#pragma clang loop unroll(full)
+ for (i = 0; i < 64; ++i)
+ value = bpf_map_lookup_elem(&hash_map, &key);
+
+ return 0;
+}
+
+SEC("ksyscall/getppid")
+int BPF_KSYSCALL(stress_array_map_lookup)
+{
+ u32 key = 1, i;
+ long *value;
+
+#pragma clang loop unroll(full)
+ for (i = 0; i < 64; ++i)
+ value = bpf_map_lookup_elem(&array_map, &key);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
deleted file mode 100644
index 245165817fbe..000000000000
--- a/samples/bpf/map_perf_test_kern.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-#define MAX_ENTRIES 1000
-#define MAX_NR_CPUS 1024
-
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def SEC("maps") lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
-};
-
-struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_COMMON_LRU,
-};
-
-struct bpf_map_def SEC("maps") inner_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def SEC("maps") array_of_lru_hashs = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .max_entries = MAX_NR_CPUS,
-};
-
-struct bpf_map_def SEC("maps") percpu_hash_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def SEC("maps") hash_map_alloc = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
- .type = BPF_MAP_TYPE_LPM_TRIE,
- .key_size = 8,
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def SEC("maps") array_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-SEC("kprobe/sys_getuid")
-int stress_hmap(struct pt_regs *ctx)
-{
- u32 key = bpf_get_current_pid_tgid();
- long init_val = 1;
- long *value;
-
- bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&hash_map, &key);
- if (value)
- bpf_map_delete_elem(&hash_map, &key);
-
- return 0;
-}
-
-SEC("kprobe/sys_geteuid")
-int stress_percpu_hmap(struct pt_regs *ctx)
-{
- u32 key = bpf_get_current_pid_tgid();
- long init_val = 1;
- long *value;
-
- bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&percpu_hash_map, &key);
- if (value)
- bpf_map_delete_elem(&percpu_hash_map, &key);
- return 0;
-}
-
-SEC("kprobe/sys_getgid")
-int stress_hmap_alloc(struct pt_regs *ctx)
-{
- u32 key = bpf_get_current_pid_tgid();
- long init_val = 1;
- long *value;
-
- bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&hash_map_alloc, &key);
- if (value)
- bpf_map_delete_elem(&hash_map_alloc, &key);
- return 0;
-}
-
-SEC("kprobe/sys_getegid")
-int stress_percpu_hmap_alloc(struct pt_regs *ctx)
-{
- u32 key = bpf_get_current_pid_tgid();
- long init_val = 1;
- long *value;
-
- bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
- if (value)
- bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
- return 0;
-}
-
-SEC("kprobe/sys_connect")
-int stress_lru_hmap_alloc(struct pt_regs *ctx)
-{
- struct sockaddr_in6 *in6;
- u16 test_case, dst6[8];
- int addrlen, ret;
- char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n";
- long val = 1;
- u32 key = bpf_get_prandom_u32();
-
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
-
- if (addrlen != sizeof(*in6))
- return 0;
-
- ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
- if (ret)
- goto done;
-
- if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
- return 0;
-
- test_case = dst6[7];
-
- if (test_case == 0) {
- ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
- } else if (test_case == 1) {
- ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val,
- BPF_ANY);
- } else if (test_case == 2) {
- void *nolocal_lru_map;
- int cpu = bpf_get_smp_processor_id();
-
- nolocal_lru_map = bpf_map_lookup_elem(&array_of_lru_hashs,
- &cpu);
- if (!nolocal_lru_map) {
- ret = -ENOENT;
- goto done;
- }
-
- ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
- BPF_ANY);
- } else {
- ret = -EINVAL;
- }
-
-done:
- if (ret)
- bpf_trace_printk(fmt, sizeof(fmt), ret);
-
- return 0;
-}
-
-SEC("kprobe/sys_gettid")
-int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
-{
- union {
- u32 b32[2];
- u8 b8[8];
- } key;
- unsigned int i;
-
- key.b32[0] = 32;
- key.b8[4] = 192;
- key.b8[5] = 168;
- key.b8[6] = 0;
- key.b8[7] = 1;
-
-#pragma clang loop unroll(full)
- for (i = 0; i < 32; ++i)
- bpf_map_lookup_elem(&lpm_trie_map_alloc, &key);
-
- return 0;
-}
-
-SEC("kprobe/sys_getpgid")
-int stress_hash_map_lookup(struct pt_regs *ctx)
-{
- u32 key = 1, i;
- long *value;
-
-#pragma clang loop unroll(full)
- for (i = 0; i < 64; ++i)
- value = bpf_map_lookup_elem(&hash_map, &key);
-
- return 0;
-}
-
-SEC("kprobe/sys_getpgrp")
-int stress_array_map_lookup(struct pt_regs *ctx)
-{
- u32 key = 1, i;
- long *value;
-
-#pragma clang loop unroll(full)
- for (i = 0; i < 64; ++i)
- value = bpf_map_lookup_elem(&array_map, &key);
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 1a8894b5ac51..07ff471ed6ae 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#define _GNU_SOURCE
#include <sched.h>
@@ -14,15 +11,13 @@
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <time.h>
-#include <sys/resource.h>
#include <arpa/inet.h>
#include <errno.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#define TEST_BIT(t) (1U << (t))
#define MAX_NR_CPUS 1024
@@ -46,6 +41,7 @@ enum test_type {
HASH_LOOKUP,
ARRAY_LOOKUP,
INNER_LRU_HASH_PREALLOC,
+ LRU_HASH_LOOKUP,
NR_TESTS,
};
@@ -60,14 +56,23 @@ const char *test_map_names[NR_TESTS] = {
[HASH_LOOKUP] = "hash_map",
[ARRAY_LOOKUP] = "array_map",
[INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
+ [LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
};
+enum map_idx {
+ array_of_lru_hashs_idx,
+ hash_map_alloc_idx,
+ lru_hash_lookup_idx,
+ NR_IDXES,
+};
+
+static int map_fd[NR_IDXES];
+
static int test_flags = ~0;
static uint32_t num_map_entries;
static uint32_t inner_lru_hash_size;
-static int inner_lru_hash_idx = -1;
-static int array_of_lru_hashs_idx = -1;
-static uint32_t max_cnt = 1000000;
+static int lru_hash_lookup_test_entries = 32;
+static uint32_t max_cnt = 10000;
static int check_test_flags(enum test_type t)
{
@@ -86,6 +91,32 @@ static void test_hash_prealloc(int cpu)
cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
}
+static int pre_test_lru_hash_lookup(int tasks)
+{
+ int fd = map_fd[lru_hash_lookup_idx];
+ uint32_t key;
+ long val = 1;
+ int ret;
+
+ if (num_map_entries > lru_hash_lookup_test_entries)
+ lru_hash_lookup_test_entries = num_map_entries;
+
+ /* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test.
+ *
+ * It is fine that the user requests for a map with
+ * num_map_entries < 32 and some of the later lru hash lookup
+ * may return not found. For LRU map, we are not interested
+ * in such small map performance.
+ */
+ for (key = 0; key < lru_hash_lookup_test_entries; key++) {
+ ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static void do_test_lru(enum test_type test, int cpu)
{
static int inner_lru_map_fds[MAX_NR_CPUS];
@@ -95,23 +126,33 @@ static void do_test_lru(enum test_type test, int cpu)
__u64 start_time;
int i, ret;
- if (test == INNER_LRU_HASH_PREALLOC) {
+ if (test == INNER_LRU_HASH_PREALLOC && cpu) {
+ /* If CPU is not 0, create inner_lru hash map and insert the fd
+ * value into the array_of_lru_hash map. In case of CPU 0,
+ * 'inner_lru_hash_map' was statically inserted on the map init
+ */
int outer_fd = map_fd[array_of_lru_hashs_idx];
+ unsigned int mycpu, mynode;
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = BPF_F_NUMA_NODE,
+ );
assert(cpu < MAX_NR_CPUS);
- if (cpu) {
- inner_lru_map_fds[cpu] =
- bpf_create_map(BPF_MAP_TYPE_LRU_HASH,
- sizeof(uint32_t), sizeof(long),
- inner_lru_hash_size, 0);
- if (inner_lru_map_fds[cpu] == -1) {
- printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
- strerror(errno), errno);
- exit(1);
- }
- } else {
- inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+ ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
+ assert(!ret);
+
+ opts.numa_node = mynode;
+ inner_lru_map_fds[cpu] =
+ bpf_map_create(BPF_MAP_TYPE_LRU_HASH,
+ test_map_names[INNER_LRU_HASH_PREALLOC],
+ sizeof(uint32_t),
+ sizeof(long),
+ inner_lru_hash_size, &opts);
+ if (inner_lru_map_fds[cpu] == -1) {
+ printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+ strerror(errno), errno);
+ exit(1);
}
ret = bpf_map_update_elem(outer_fd, &cpu,
@@ -129,13 +170,17 @@ static void do_test_lru(enum test_type test, int cpu)
if (test == LRU_HASH_PREALLOC) {
test_name = "lru_hash_map_perf";
- in6.sin6_addr.s6_addr16[7] = 0;
+ in6.sin6_addr.s6_addr16[2] = 0;
} else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
test_name = "nocommon_lru_hash_map_perf";
- in6.sin6_addr.s6_addr16[7] = 1;
+ in6.sin6_addr.s6_addr16[2] = 1;
} else if (test == INNER_LRU_HASH_PREALLOC) {
test_name = "inner_lru_hash_map_perf";
- in6.sin6_addr.s6_addr16[7] = 2;
+ in6.sin6_addr.s6_addr16[2] = 2;
+ } else if (test == LRU_HASH_LOOKUP) {
+ test_name = "lru_hash_lookup_perf";
+ in6.sin6_addr.s6_addr16[2] = 3;
+ in6.sin6_addr.s6_addr32[3] = 0;
} else {
assert(0);
}
@@ -144,6 +189,11 @@ static void do_test_lru(enum test_type test, int cpu)
for (i = 0; i < max_cnt; i++) {
ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
assert(ret == -1 && errno == EBADF);
+ if (in6.sin6_addr.s6_addr32[3] <
+ lru_hash_lookup_test_entries - 32)
+ in6.sin6_addr.s6_addr32[3] += 32;
+ else
+ in6.sin6_addr.s6_addr32[3] = 0;
}
printf("%d:%s pre-alloc %lld events per sec\n",
cpu, test_name,
@@ -165,6 +215,11 @@ static void test_inner_lru_hash_prealloc(int cpu)
do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
}
+static void test_lru_hash_lookup(int cpu)
+{
+ do_test_lru(LRU_HASH_LOOKUP, cpu);
+}
+
static void test_percpu_hash_prealloc(int cpu)
{
__u64 start_time;
@@ -232,11 +287,16 @@ static void test_array_lookup(int cpu)
start_time = time_get_ns();
for (i = 0; i < max_cnt; i++)
- syscall(__NR_getpgrp, 0);
+ syscall(__NR_getppid, 0);
printf("%d:array_lookup %lld lookups per sec\n",
cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
}
+typedef int (*pre_test_func)(int tasks);
+const pre_test_func pre_test_funcs[] = {
+ [LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup,
+};
+
typedef void (*test_func)(int cpu);
const test_func test_funcs[] = {
[HASH_PREALLOC] = test_hash_prealloc,
@@ -249,8 +309,25 @@ const test_func test_funcs[] = {
[HASH_LOOKUP] = test_hash_lookup,
[ARRAY_LOOKUP] = test_array_lookup,
[INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
+ [LRU_HASH_LOOKUP] = test_lru_hash_lookup,
};
+static int pre_test(int tasks)
+{
+ int i;
+
+ for (i = 0; i < NR_TESTS; i++) {
+ if (pre_test_funcs[i] && check_test_flags(i)) {
+ int ret = pre_test_funcs[i](tasks);
+
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static void loop(int cpu)
{
cpu_set_t cpuset;
@@ -271,6 +348,8 @@ static void run_perf_test(int tasks)
pid_t pid[tasks];
int i;
+ assert(!pre_test(tasks));
+
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
@@ -291,7 +370,7 @@ static void run_perf_test(int tasks)
static void fill_lpm_trie(void)
{
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
unsigned long value = 0;
unsigned int i;
int r;
@@ -305,7 +384,8 @@ static void fill_lpm_trie(void)
key->data[1] = rand() & 0xff;
key->data[2] = rand() & 0xff;
key->data[3] = rand() & 0xff;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx],
+ key, &value, 0);
assert(!r);
}
@@ -316,56 +396,46 @@ static void fill_lpm_trie(void)
key->data[3] = 1;
value = 128;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], key, &value, 0);
assert(!r);
}
-static void fixup_map(struct bpf_map_data *map, int idx)
+static void fixup_map(struct bpf_object *obj)
{
+ struct bpf_map *map;
int i;
- if (!strcmp("inner_lru_hash_map", map->name)) {
- inner_lru_hash_idx = idx;
- inner_lru_hash_size = map->def.max_entries;
- }
+ bpf_object__for_each_map(map, obj) {
+ const char *name = bpf_map__name(map);
- if (!strcmp("array_of_lru_hashs", map->name)) {
- if (inner_lru_hash_idx == -1) {
- printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
- exit(1);
+ /* Only change the max_entries for the enabled test(s) */
+ for (i = 0; i < NR_TESTS; i++) {
+ if (!strcmp(test_map_names[i], name) &&
+ (check_test_flags(i))) {
+ bpf_map__set_max_entries(map, num_map_entries);
+ continue;
+ }
}
- map->def.inner_map_idx = inner_lru_hash_idx;
- array_of_lru_hashs_idx = idx;
}
- if (num_map_entries <= 0)
- return;
-
inner_lru_hash_size = num_map_entries;
-
- /* Only change the max_entries for the enabled test(s) */
- for (i = 0; i < NR_TESTS; i++) {
- if (!strcmp(test_map_names[i], map->name) &&
- (check_test_flags(i))) {
- map->def.max_entries = num_map_entries;
- }
- }
}
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link *links[8];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_map *map;
char filename[256];
- int num_cpu = 8;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ int i = 0;
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
if (argc > 2)
- num_cpu = atoi(argv[2]) ? : num_cpu;
+ nr_cpus = atoi(argv[2]) ? : nr_cpus;
if (argc > 3)
num_map_entries = atoi(argv[3]);
@@ -373,14 +443,61 @@ int main(int argc, char **argv)
if (argc > 4)
max_cnt = atoi(argv[4]);
- if (load_bpf_file_fixup_map(filename, fixup_map)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "inner_lru_hash_map");
+ if (libbpf_get_error(map)) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ inner_lru_hash_size = bpf_map__max_entries(map);
+ if (!inner_lru_hash_size) {
+ fprintf(stderr, "ERROR: failed to get map attribute\n");
+ goto cleanup;
+ }
+
+ /* resize BPF map prior to loading */
+ if (num_map_entries > 0)
+ fixup_map(obj);
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "array_of_lru_hashs");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "hash_map_alloc");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "lru_hash_lookup_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
fill_lpm_trie();
- run_perf_test(num_cpu);
+ run_perf_test(nr_cpus);
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/net_shared.h b/samples/bpf/net_shared.h
new file mode 100644
index 000000000000..88cc52461c98
--- /dev/null
+++ b/samples/bpf/net_shared.h
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _NET_SHARED_H
+#define _NET_SHARED_H
+
+#define AF_INET 2
+#define AF_INET6 10
+
+#define ETH_ALEN 6
+#define ETH_P_802_3_MIN 0x0600
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define ETH_P_ARP 0x0806
+#define IPPROTO_ICMPV6 58
+
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define IFNAMSIZ 16
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_ntohs(x) __builtin_bswap16(x)
+#define bpf_htons(x) __builtin_bswap16(x)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define bpf_ntohs(x) (x)
+#define bpf_htons(x) (x)
+#else
+# error "Endianness detection needs to be set up for your compiler?!"
+#endif
+
+#endif
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime.bpf.c
index e7d9a0a3d45b..4a65ba76c1b1 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime.bpf.c
@@ -4,16 +4,18 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-#include <uapi/linux/ptrace.h>
-#include <uapi/linux/perf_event.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <linux/sched.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
-#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
#define MINBLOCK_US 1
+#define MAX_ENTRIES 10000
struct key_t {
char waker[TASK_COMM_LEN];
@@ -22,49 +24,47 @@ struct key_t {
u32 tret;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") start = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} start SEC(".maps");
struct wokeby_t {
char name[TASK_COMM_LEN];
u32 ret;
};
-struct bpf_map_def SEC("maps") wokeby = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct wokeby_t),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, struct wokeby_t);
+ __uint(max_entries, MAX_ENTRIES);
+} wokeby SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, MAX_ENTRIES);
+} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
SEC("kprobe/try_to_wake_up")
int waker(struct pt_regs *ctx)
{
- struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
+ struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
+ u32 pid = BPF_CORE_READ(p, pid);
struct wokeby_t woke;
- u32 pid;
-
- pid = _(p->pid);
bpf_get_current_comm(&woke.name, sizeof(woke.name));
woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
@@ -103,29 +103,19 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
}
#if 1
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
-struct sched_switch_args {
- unsigned long long pad;
- char prev_comm[16];
- int prev_pid;
- int prev_prio;
- long long prev_state;
- char next_comm[16];
- int next_pid;
- int next_prio;
-};
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
SEC("tracepoint/sched/sched_switch")
-int oncpu(struct sched_switch_args *ctx)
+int oncpu(struct trace_event_raw_sched_switch *ctx)
{
/* record previous thread sleep time */
u32 pid = ctx->prev_pid;
#else
-SEC("kprobe/finish_task_switch")
+SEC("kprobe.multi/finish_task_switch*")
int oncpu(struct pt_regs *ctx)
{
- struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
+ struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
/* record previous thread sleep time */
- u32 pid = _(p->pid);
+ u32 pid = BPF_CORE_READ(p, pid);
#endif
u64 delta, ts, *tsp;
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 512f87a5fd20..5557b5393642 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -1,25 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
#include <stdbool.h>
-#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "trace_helpers.h"
#define PRINT_RAW_ADDR 0
+/* counts, stackmap */
+static int map_fd[2];
+
static void print_ksym(__u64 addr)
{
struct ksym *sym;
@@ -27,6 +24,11 @@ static void print_ksym(__u64 addr)
if (!addr)
return;
sym = ksym_search(addr);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ return;
+ }
+
if (PRINT_RAW_ADDR)
printf("%s/%llx;", sym->name, addr);
else
@@ -49,14 +51,14 @@ static void print_stack(struct key_t *key, __u64 count)
int i;
printf("%s;", key->target);
- if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) {
printf("---;");
} else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]);
}
printf("-;");
- if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) {
printf("---;");
} else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
@@ -92,24 +94,49 @@ static void int_exit(int sig)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ int delay = 1, i = 0;
char filename[256];
- int delay = 1;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
if (argc > 1)
@@ -117,5 +144,10 @@ int main(int argc, char **argv)
sleep(delay);
print_stacks(map_fd[0]);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c
index 6db6b21fdc6d..c6f65f90a097 100644
--- a/samples/bpf/parse_ldabs.c
+++ b/samples/bpf/parse_ldabs.c
@@ -11,7 +11,8 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define DEFAULT_PKTGEN_UDP_PORT 9
#define IP_MF 0x2000
diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c
index 10af53d33cc2..4a486cb1e0df 100644
--- a/samples/bpf/parse_simple.c
+++ b/samples/bpf/parse_simple.c
@@ -12,7 +12,7 @@
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
#include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_PKTGEN_UDP_PORT 9
diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c
index 95c16324760c..d8623846e810 100644
--- a/samples/bpf/parse_varlen.c
+++ b/samples/bpf/parse_varlen.c
@@ -6,6 +6,7 @@
*/
#define KBUILD_MODNAME "foo"
#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
@@ -13,7 +14,7 @@
#include <linux/udp.h>
#include <uapi/linux/bpf.h>
#include <net/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define DEFAULT_PKTGEN_UDP_PORT 9
#define DEBUG 0
@@ -108,11 +109,6 @@ static int parse_ipv6(void *data, uint64_t nh_off, void *data_end)
return 0;
}
-struct vlan_hdr {
- uint16_t h_vlan_TCI;
- uint16_t h_vlan_encapsulated_proto;
-};
-
SEC("varlen")
int handle_ingress(struct __sk_buff *skb)
{
diff --git a/samples/bpf/run_cookie_uid_helper_example.sh b/samples/bpf/run_cookie_uid_helper_example.sh
index f898cfa2b1aa..fc6bc0451ab4 100755
--- a/samples/bpf/run_cookie_uid_helper_example.sh
+++ b/samples/bpf/run_cookie_uid_helper_example.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
local_dir="$(pwd)"
root_dir=$local_dir/../..
mnt_dir=$(mktemp -d --tmp)
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index ceabf31079cf..a3f8a3998e0a 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -4,20 +4,20 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#define MAX_IPS 8192
-struct bpf_map_def SEC("maps") ip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u64),
- .value_size = sizeof(u32),
- .max_entries = MAX_IPS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, u32);
+ __uint(max_entries, MAX_IPS);
+} ip_map SEC(".maps");
SEC("perf_event")
int do_sample(struct bpf_perf_event_data *ctx)
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 4ed690b907ff..9283f47844fb 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -1,34 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* sampleip: sample instruction pointer and frequency count in a BPF map.
*
* Copyright 2016 Netflix, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <stdlib.h>
-#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
-#include <assert.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
-#include <sys/ioctl.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
+#include "trace_helpers.h"
#define DEFAULT_FREQ 99
#define DEFAULT_SECS 5
#define MAX_IPS 8192
-#define PAGE_OFFSET 0xffff880000000000
+static int map_fd;
static int nr_cpus;
+static long _text_addr;
static void usage(void)
{
@@ -37,9 +33,10 @@ static void usage(void)
printf(" duration # sampling duration (seconds), default 5\n");
}
-static int sampling_start(int *pmu_fd, int freq)
+static int sampling_start(int freq, struct bpf_program *prog,
+ struct bpf_link *links[])
{
- int i;
+ int i, pmu_fd;
struct perf_event_attr pe_sample_attr = {
.type = PERF_TYPE_SOFTWARE,
@@ -50,26 +47,30 @@ static int sampling_start(int *pmu_fd, int freq)
};
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
+ pmu_fd = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
-1 /* group_fd */, 0 /* flags */);
- if (pmu_fd[i] < 0) {
+ if (pmu_fd < 0) {
fprintf(stderr, "ERROR: Initializing perf sampling\n");
return 1;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF,
- prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: Attach perf event\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ return 1;
+ }
}
return 0;
}
-static void sampling_end(int *pmu_fd)
+static void sampling_end(struct bpf_link *links[])
{
int i;
for (i = 0; i < nr_cpus; i++)
- close(pmu_fd[i]);
+ bpf_link__destroy(links[i]);
}
struct ipcount {
@@ -107,8 +108,13 @@ static void print_ip_map(int fd)
/* sort and print */
qsort(counts, max, sizeof(struct ipcount), count_cmp);
for (i = 0; i < max; i++) {
- if (counts[i].ip > PAGE_OFFSET) {
+ if (counts[i].ip > _text_addr) {
sym = ksym_search(counts[i].ip);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ continue;
+ }
+
printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
counts[i].count);
} else {
@@ -126,14 +132,17 @@ static void print_ip_map(int fd)
static void int_exit(int sig)
{
printf("\n");
- print_ip_map(map_fd[0]);
+ print_ip_map(map_fd);
exit(0);
}
int main(int argc, char **argv)
{
+ int opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS, error = 1;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_link **links;
char filename[256];
- int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS;
/* process arguments */
while ((opt = getopt(argc, argv, "F:h")) != -1) {
@@ -160,39 +169,66 @@ int main(int argc, char **argv)
return 2;
}
+ /* used to determine whether the address is kernel space */
+ _text_addr = ksym_get_addr("_text");
+ if (!_text_addr) {
+ fprintf(stderr, "ERROR: no '_text' in /proc/kallsyms\n");
+ return 3;
+ }
+
/* create perf FDs for each CPU */
- nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- pmu_fd = malloc(nr_cpus * sizeof(int));
- if (pmu_fd == NULL) {
- fprintf(stderr, "ERROR: malloc of pmu_fd\n");
- return 1;
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ if (!links) {
+ fprintf(stderr, "ERROR: malloc of links\n");
+ goto cleanup;
}
- /* load BPF program */
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- fprintf(stderr, "ERROR: loading BPF program (errno %d):\n",
- errno);
- if (strcmp(bpf_log_buf, "") == 0)
- fprintf(stderr, "Try: ulimit -l unlimited\n");
- else
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
}
+
+ prog = bpf_object__find_program_by_name(obj, "do_sample");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "ip_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
/* do sampling */
printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
freq, secs);
- if (sampling_start(pmu_fd, freq) != 0)
- return 1;
+ if (sampling_start(freq, prog, links) != 0)
+ goto cleanup;
+
sleep(secs);
- sampling_end(pmu_fd);
- free(pmu_fd);
+ error = 0;
+cleanup:
+ sampling_end(links);
/* output sample counts */
- print_ip_map(map_fd[0]);
+ if (!error)
+ print_ip_map(map_fd);
- return 0;
+ free(links);
+ bpf_object__close(obj);
+ return error;
}
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index 6fc6e193ef1b..5b66f2401b96 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -9,10 +9,10 @@
* if (value)
* (*(u64*)value) += 1;
*
- * - attaches this program to eth0 raw socket
+ * - attaches this program to loopback interface "lo" raw socket
*
* - every second user space reads map[tcp], map[udp], map[icmp] to see
- * how many packets of given protocol were seen on eth0
+ * how many packets of given protocol were seen on "lo"
*/
#include <stdio.h>
#include <unistd.h>
@@ -26,8 +26,10 @@
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <stddef.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include "bpf_insn.h"
#include "sock_example.h"
+#include "bpf_util.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
@@ -36,8 +38,8 @@ static int test_sock(void)
int sock = -1, map_fd, prog_fd, i, key;
long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 256, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value),
+ 256, NULL);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
goto cleanup;
@@ -53,14 +55,18 @@ static int test_sock(void)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = bpf_log_buf,
+ .log_size = BPF_LOG_BUF_SIZE,
+ );
- prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt,
- "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, insns_cnt, &opts);
if (prog_fd < 0) {
printf("failed to load prog '%s'\n", strerror(errno));
goto cleanup;
@@ -98,7 +104,7 @@ int main(void)
{
FILE *f;
- f = popen("ping -c5 localhost", "r");
+ f = popen("ping -4 -c5 localhost", "r");
(void)f;
return test_sock();
diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h
index d8014065d479..a27d7579bc73 100644
--- a/samples/bpf/sock_example.h
+++ b/samples/bpf/sock_example.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <stdlib.h>
#include <stdio.h>
#include <linux/unistd.h>
@@ -8,7 +9,6 @@
#include <net/if.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
-#include "libbpf.h"
static inline int open_raw_sock(const char *name)
{
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
deleted file mode 100644
index 533dd11a6baa..000000000000
--- a/samples/bpf/sock_flags_kern.c
+++ /dev/null
@@ -1,44 +0,0 @@
-#include <uapi/linux/bpf.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
-
-SEC("cgroup/sock1")
-int bpf_prog1(struct bpf_sock *sk)
-{
- char fmt[] = "socket: family %d type %d protocol %d\n";
-
- bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
-
- /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
- * ie., make ping6 fail
- */
- if (sk->family == PF_INET6 &&
- sk->type == SOCK_RAW &&
- sk->protocol == IPPROTO_ICMPV6)
- return 0;
-
- return 1;
-}
-
-SEC("cgroup/sock2")
-int bpf_prog2(struct bpf_sock *sk)
-{
- char fmt[] = "socket: family %d type %d protocol %d\n";
-
- bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
-
- /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
- * ie., make ping fail
- */
- if (sk->family == PF_INET &&
- sk->type == SOCK_RAW &&
- sk->protocol == IPPROTO_ICMP)
- return 0;
-
- return 1;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
index ed18e9a4909c..431c956460ad 100644
--- a/samples/bpf/sockex1_kern.c
+++ b/samples/bpf/sockex1_kern.c
@@ -2,14 +2,15 @@
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 256,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 256);
+} my_map SEC(".maps");
SEC("socket1")
int bpf_prog1(struct __sk_buff *skb)
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index 6cd2feb3e9b3..9e8d39e245c1 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c
@@ -1,31 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
#include <linux/bpf.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ int map_fd, prog_fd;
char filename[256];
+ int i, sock, err;
FILE *f;
- int i, sock;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
- }
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER);
+
+ err = bpf_object__load(obj);
+ if (err)
+ return 1;
+
+ prog_fd = bpf_program__fd(prog);
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
sock = open_raw_sock("lo");
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
- sizeof(prog_fd[0])) == 0);
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+ sizeof(prog_fd)) == 0);
- f = popen("ping -c5 localhost", "r");
+ f = popen("ping -4 -c5 localhost", "r");
(void) f;
for (i = 0; i < 5; i++) {
@@ -33,13 +46,13 @@ int main(int ac, char **argv)
int key;
key = IPPROTO_TCP;
- assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
key = IPPROTO_UDP;
- assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0);
key = IPPROTO_ICMP;
- assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
printf("TCP %lld UDP %lld ICMP %lld bytes\n",
tcp_cnt, udp_cnt, icmp_cnt);
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index f58acfc92556..f93d9145ab8a 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -1,11 +1,12 @@
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
@@ -14,7 +15,7 @@ struct vlan_hdr {
__be16 h_vlan_encapsulated_proto;
};
-struct bpf_flow_keys {
+struct flow_key_record {
__be32 src;
__be32 dst;
union {
@@ -30,7 +31,6 @@ static inline int proto_ports_offset(__u64 proto)
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
- case IPPROTO_DCCP:
case IPPROTO_ESP:
case IPPROTO_SCTP:
case IPPROTO_UDPLITE:
@@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
}
static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
- struct bpf_flow_keys *flow)
+ struct flow_key_record *flow)
{
__u64 verlen;
@@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto
}
static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
- struct bpf_flow_keys *flow)
+ struct flow_key_record *flow)
{
*ip_proto = load_byte(skb,
nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -96,7 +96,8 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro
return nhoff;
}
-static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow)
+static inline bool flow_dissector(struct __sk_buff *skb,
+ struct flow_key_record *flow)
{
__u64 nhoff = ETH_HLEN;
__u64 ip_proto;
@@ -188,17 +189,17 @@ struct pair {
long bytes;
};
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__be32),
- .value_size = sizeof(struct pair),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, struct pair);
+ __uint(max_entries, 1024);
+} hash_map SEC(".maps");
SEC("socket2")
int bpf_prog2(struct __sk_buff *skb)
{
- struct bpf_flow_keys flow = {};
+ struct flow_key_record flow = {};
struct pair *value;
u32 key;
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index 0e0207c90841..2c18471336f0 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -1,12 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
#include <linux/bpf.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
-#include <sys/resource.h>
struct pair {
__u64 packets;
@@ -15,33 +15,42 @@ struct pair {
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int map_fd, prog_fd;
char filename[256];
+ int i, sock, err;
FILE *f;
- int i, sock;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
+ err = bpf_object__load(obj);
+ if (err)
return 1;
- }
+
+ prog_fd = bpf_program__fd(prog);
+ map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
sock = open_raw_sock("lo");
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
- sizeof(prog_fd[0])) == 0);
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+ sizeof(prog_fd)) == 0);
- f = popen("ping -c5 localhost", "r");
+ f = popen("ping -4 -c5 localhost", "r");
(void) f;
for (i = 0; i < 5; i++) {
int key = 0, next_key;
struct pair value;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
printf("ip %s bytes %lld packets %lld\n",
inet_ntoa((struct in_addr){htonl(next_key)}),
value.bytes, value.packets);
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 95907f8d2b17..822c13242251 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -5,7 +5,6 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
@@ -13,55 +12,22 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
#include <uapi/linux/mpls.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
-#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
-
-struct bpf_map_def SEC("maps") jmp_table = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 8,
-};
-
#define PARSE_VLAN 1
#define PARSE_MPLS 2
#define PARSE_IP 3
#define PARSE_IPV6 4
-/* protocol dispatch routine.
- * It tail-calls next BPF program depending on eth proto
- * Note, we could have used:
- * bpf_tail_call(skb, &jmp_table, proto);
- * but it would need large prog_array
- */
-static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
-{
- switch (proto) {
- case ETH_P_8021Q:
- case ETH_P_8021AD:
- bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
- break;
- case ETH_P_MPLS_UC:
- case ETH_P_MPLS_MC:
- bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
- break;
- case ETH_P_IP:
- bpf_tail_call(skb, &jmp_table, PARSE_IP);
- break;
- case ETH_P_IPV6:
- bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
- break;
- }
-}
-
struct vlan_hdr {
__be16 h_vlan_TCI;
__be16 h_vlan_encapsulated_proto;
};
-struct bpf_flow_keys {
+struct flow_key_record {
__be32 src;
__be32 dst;
union {
@@ -71,6 +37,8 @@ struct bpf_flow_keys {
__u32 ip_proto;
};
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
+
static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
{
return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
@@ -88,15 +56,15 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
}
struct globals {
- struct bpf_flow_keys flow;
+ struct flow_key_record flow;
};
-struct bpf_map_def SEC("maps") percpu_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct globals),
- .max_entries = 32,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct globals);
+ __uint(max_entries, 32);
+} percpu_map SEC(".maps");
/* user poor man's per_cpu until native support is ready */
static struct globals *this_cpu_globals(void)
@@ -112,16 +80,16 @@ struct pair {
__u64 bytes;
};
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct bpf_flow_keys),
- .value_size = sizeof(struct pair),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct flow_key_record);
+ __type(value, struct pair);
+ __uint(max_entries, 1024);
+} hash_map SEC(".maps");
static void update_stats(struct __sk_buff *skb, struct globals *g)
{
- struct bpf_flow_keys key = g->flow;
+ struct flow_key_record key = g->flow;
struct pair *value;
value = bpf_map_lookup_elem(&hash_map, &key);
@@ -186,7 +154,8 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb,
}
}
-PROG(PARSE_IP)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_ip(struct __sk_buff *skb)
{
struct globals *g = this_cpu_globals();
__u32 nhoff, verlen, ip_proto;
@@ -214,7 +183,8 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
return 0;
}
-PROG(PARSE_IPV6)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_ipv6(struct __sk_buff *skb)
{
struct globals *g = this_cpu_globals();
__u32 nhoff, ip_proto;
@@ -237,7 +207,8 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
return 0;
}
-PROG(PARSE_VLAN)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_vlan(struct __sk_buff *skb)
{
__u32 nhoff, proto;
@@ -253,7 +224,8 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb)
return 0;
}
-PROG(PARSE_MPLS)(struct __sk_buff *skb)
+SEC("socket")
+int bpf_func_mpls(struct __sk_buff *skb)
{
__u32 nhoff, label;
@@ -276,7 +248,49 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb)
return 0;
}
-SEC("socket/0")
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(max_entries, 8);
+ __array(values, u32 (void *));
+} prog_array_init SEC(".maps") = {
+ .values = {
+ [PARSE_VLAN] = (void *)&bpf_func_vlan,
+ [PARSE_IP] = (void *)&bpf_func_ip,
+ [PARSE_IPV6] = (void *)&bpf_func_ipv6,
+ [PARSE_MPLS] = (void *)&bpf_func_mpls,
+ },
+};
+
+/* Protocol dispatch routine. It tail-calls next BPF program depending
+ * on eth proto. Note, we could have used ...
+ *
+ * bpf_tail_call(skb, &prog_array_init, proto);
+ *
+ * ... but it would need large prog_array and cannot be optimised given
+ * the map key is not static.
+ */
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
+{
+ switch (proto) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
+ break;
+ case ETH_P_MPLS_UC:
+ case ETH_P_MPLS_MC:
+ bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
+ break;
+ case ETH_P_IP:
+ bpf_tail_call(skb, &prog_array_init, PARSE_IP);
+ break;
+ case ETH_P_IPV6:
+ bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
+ break;
+ }
+}
+
+SEC("socket")
int main_prog(struct __sk_buff *skb)
{
__u32 nhoff = ETH_HLEN;
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index 877ecf8fc5ac..56044acbd25d 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -1,18 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
-#include <sys/resource.h>
-#define PARSE_IP 3
-#define PARSE_IP_PROG_FD (prog_fd[0])
-#define PROG_ARRAY_FD (map_fd[0])
-
-struct bpf_flow_keys {
+struct flow_key_record {
__be32 src;
__be32 dst;
union {
@@ -29,47 +24,66 @@ struct pair {
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int i, sock, fd, main_prog_fd, hash_map_fd;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
FILE *f;
- int i, sock, err, id, key = PARSE_IP;
- struct bpf_prog_info info = {};
- uint32_t info_len = sizeof(info);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ hash_map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
+ if (hash_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
}
- /* Test fd array lookup which returns the id of the bpf_prog */
- err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
- assert(!err);
- err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
- assert(!err);
- assert(id == info.id);
+ /* find BPF main program */
+ main_prog_fd = 0;
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+
+ if (!strcmp(bpf_program__name(prog), "main_prog"))
+ main_prog_fd = fd;
+ }
+
+ if (main_prog_fd == 0) {
+ fprintf(stderr, "ERROR: can't find main_prog\n");
+ goto cleanup;
+ }
sock = open_raw_sock("lo");
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ /* attach BPF program to socket */
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &main_prog_fd,
sizeof(__u32)) == 0);
if (argc > 1)
- f = popen("ping -c5 localhost", "r");
+ f = popen("ping -4 -c5 localhost", "r");
else
f = popen("netperf -l 4 localhost", "r");
(void) f;
for (i = 0; i < 5; i++) {
- struct bpf_flow_keys key = {}, next_key;
+ struct flow_key_record key = {}, next_key;
struct pair value;
sleep(1);
printf("IP src.port -> dst.port bytes packets\n");
- while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[2], &next_key, &value);
+ while (bpf_map_get_next_key(hash_map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(hash_map_fd, &next_key, &value);
printf("%s.%05d -> %s.%05d %12lld %12lld\n",
inet_ntoa((struct in_addr){htonl(next_key.src)}),
next_key.port16[0],
@@ -79,5 +93,8 @@ int main(int argc, char **argv)
key = next_key;
}
}
+
+cleanup:
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/spintest.bpf.c b/samples/bpf/spintest.bpf.c
new file mode 100644
index 000000000000..cba5a9d50783
--- /dev/null
+++ b/samples/bpf/spintest.bpf.c
@@ -0,0 +1,60 @@
+/* Copyright (c) 2016, Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include "vmlinux.h"
+#include <linux/version.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_map2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
+
+#define PROG(foo) \
+int foo(struct pt_regs *ctx) \
+{ \
+ long v = PT_REGS_IP(ctx), *val; \
+\
+ val = bpf_map_lookup_elem(&my_map, &v); \
+ bpf_map_update_elem(&my_map, &v, &v, BPF_ANY); \
+ bpf_map_update_elem(&my_map2, &v, &v, BPF_ANY); \
+ bpf_map_delete_elem(&my_map2, &v); \
+ bpf_get_stackid(ctx, &stackmap, BPF_F_REUSE_STACKID); \
+ return 0; \
+}
+
+/* add kprobes to all possible *spin* functions */
+SEC("kprobe.multi/spin_*lock*")PROG(spin_lock)
+SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner)
+SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock)
+
+/* and to inner bpf helpers */
+SEC("kprobe/htab_map_update_elem")PROG(p15)
+SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
+SEC("kprobe/htab_map_alloc")PROG(p17)
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
deleted file mode 100644
index ce0167d09cdc..000000000000
--- a/samples/bpf/spintest_kern.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Copyright (c) 2016, Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-struct bpf_map_def SEC("maps") my_map2 = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
-
-#define PROG(foo) \
-int foo(struct pt_regs *ctx) \
-{ \
- long v = PT_REGS_IP(ctx), *val; \
-\
- val = bpf_map_lookup_elem(&my_map, &v); \
- bpf_map_update_elem(&my_map, &v, &v, BPF_ANY); \
- bpf_map_update_elem(&my_map2, &v, &v, BPF_ANY); \
- bpf_map_delete_elem(&my_map2, &v); \
- bpf_get_stackid(ctx, &stackmap, BPF_F_REUSE_STACKID); \
- return 0; \
-}
-
-/* add kprobes to all possible *spin* functions */
-SEC("kprobe/spin_unlock")PROG(p1)
-SEC("kprobe/spin_lock")PROG(p2)
-SEC("kprobe/mutex_spin_on_owner")PROG(p3)
-SEC("kprobe/rwsem_spin_on_owner")PROG(p4)
-SEC("kprobe/spin_unlock_irqrestore")PROG(p5)
-SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6)
-SEC("kprobe/_raw_spin_unlock_bh")PROG(p7)
-SEC("kprobe/_raw_spin_unlock")PROG(p8)
-SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9)
-SEC("kprobe/_raw_spin_trylock_bh")PROG(p10)
-SEC("kprobe/_raw_spin_lock_irq")PROG(p11)
-SEC("kprobe/_raw_spin_trylock")PROG(p12)
-SEC("kprobe/_raw_spin_lock")PROG(p13)
-SEC("kprobe/_raw_spin_lock_bh")PROG(p14)
-/* and to inner bpf helpers */
-SEC("kprobe/htab_map_update_elem")PROG(p15)
-SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
-SEC("kprobe/htab_map_alloc")PROG(p17)
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 80676c25fa50..55971edb1088 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -1,50 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <unistd.h>
-#include <linux/bpf.h>
#include <string.h>
#include <assert.h>
-#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "trace_helpers.h"
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[20];
long key, next_key, value;
+ struct bpf_program *prog;
+ int map_fd, i, j = 0;
char filename[256];
struct ksym *sym;
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; i < 5; i++) {
key = 0;
printf("kprobing funcs:");
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
- printf(" %s", sym->name);
key = next_key;
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ continue;
+ }
+
+ printf(" %s", sym->name);
}
if (key)
printf("\n");
key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
- bpf_map_delete_elem(map_fd[0], &next_key);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0)
+ bpf_map_delete_elem(map_fd, &next_key);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c
index ce2a30b11248..a6e600f3d477 100644
--- a/samples/bpf/syscall_nrs.c
+++ b/samples/bpf/syscall_nrs.c
@@ -1,6 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
#include <uapi/linux/unistd.h>
#include <linux/kbuild.h>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR)
void syscall_defines(void)
@@ -8,5 +12,13 @@ void syscall_defines(void)
COMMENT("Linux system call numbers.");
SYSNR(__NR_write);
SYSNR(__NR_read);
+#ifdef __NR_mmap2
+ SYSNR(__NR_mmap2);
+#endif
+#ifdef __NR_mmap
SYSNR(__NR_mmap);
+#endif
+
}
+
+#pragma GCC diagnostic pop
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
new file mode 100644
index 000000000000..58fef969a60e
--- /dev/null
+++ b/samples/bpf/syscall_tp_kern.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2017 Facebook
+ */
+#include <uapi/linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#if !defined(__aarch64__)
+struct syscalls_enter_open_args {
+ unsigned long long unused;
+ long syscall_nr;
+ long filename_ptr;
+ long flags;
+ long mode;
+};
+#endif
+
+struct syscalls_exit_open_args {
+ unsigned long long unused;
+ long syscall_nr;
+ long ret;
+};
+
+struct syscalls_enter_open_at_args {
+ unsigned long long unused;
+ long syscall_nr;
+ long long dfd;
+ long filename_ptr;
+ long flags;
+ long mode;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} enter_open_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} exit_open_map SEC(".maps");
+
+static __always_inline void count(void *map)
+{
+ u32 key = 0;
+ u32 *value, init_val = 1;
+
+ value = bpf_map_lookup_elem(map, &key);
+ if (value)
+ *value += 1;
+ else
+ bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST);
+}
+
+#if !defined(__aarch64__)
+SEC("tracepoint/syscalls/sys_enter_open")
+int trace_enter_open(struct syscalls_enter_open_args *ctx)
+{
+ count(&enter_open_map);
+ return 0;
+}
+#endif
+
+SEC("tracepoint/syscalls/sys_enter_openat")
+int trace_enter_open_at(struct syscalls_enter_open_at_args *ctx)
+{
+ count(&enter_open_map);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_openat2")
+int trace_enter_open_at2(struct syscalls_enter_open_at_args *ctx)
+{
+ count(&enter_open_map);
+ return 0;
+}
+
+#if !defined(__aarch64__)
+SEC("tracepoint/syscalls/sys_exit_open")
+int trace_enter_exit(struct syscalls_exit_open_args *ctx)
+{
+ count(&exit_open_map);
+ return 0;
+}
+#endif
+
+SEC("tracepoint/syscalls/sys_exit_openat")
+int trace_enter_exit_at(struct syscalls_exit_open_args *ctx)
+{
+ count(&exit_open_map);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_exit_openat2")
+int trace_enter_exit_at2(struct syscalls_exit_open_args *ctx)
+{
+ count(&exit_open_map);
+ return 0;
+}
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
new file mode 100644
index 000000000000..7a09ac74fac0
--- /dev/null
+++ b/samples/bpf/syscall_tp_user.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2017 Facebook
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/perf_event.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
+ * This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
+ */
+
+static void usage(const char *cmd)
+{
+ printf("USAGE: %s [-i nr_tests] [-h]\n", cmd);
+ printf(" -i nr_tests # rounds of test to run\n");
+ printf(" -h # help\n");
+}
+
+static void verify_map(int map_id)
+{
+ __u32 key = 0;
+ __u32 val;
+
+ if (bpf_map_lookup_elem(map_id, &key, &val) != 0) {
+ fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
+ return;
+ }
+ if (val == 0) {
+ fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
+ return;
+ }
+
+ printf("verify map:%d val: %d\n", map_id, val);
+
+ val = 0;
+ if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) {
+ fprintf(stderr, "map_update failed: %s\n", strerror(errno));
+ return;
+ }
+}
+
+static int test(char *filename, int nr_tests)
+{
+ int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0;
+ struct bpf_link **links = NULL;
+ struct bpf_object *objs[nr_tests];
+ struct bpf_program *prog;
+
+ for (i = 0; i < nr_tests; i++) {
+ objs[i] = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(objs[i])) {
+ fprintf(stderr, "opening BPF object file failed\n");
+ objs[i] = NULL;
+ goto cleanup;
+ }
+
+ /* One-time initialization */
+ if (!links) {
+ int nr_progs = 0;
+
+ bpf_object__for_each_program(prog, objs[i])
+ nr_progs += 1;
+
+ links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *));
+
+ if (!links)
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(objs[i])) {
+ fprintf(stderr, "loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "enter_open_map");
+ map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "exit_open_map");
+ if (map0_fds[i] < 0 || map1_fds[i] < 0) {
+ fprintf(stderr, "finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, objs[i]) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+ printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]);
+ }
+
+ /* current load_bpf_file has perf_event_open default pid = -1
+ * and cpu = 0, which permits attached bpf execution on
+ * all cpus for all pid's. bpf program execution ignores
+ * cpu affinity.
+ */
+ /* trigger some "open" operations */
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "open failed: %s\n", strerror(errno));
+ return 1;
+ }
+ close(fd);
+
+ /* verify the map */
+ for (i = 0; i < nr_tests; i++) {
+ verify_map(map0_fds[i]);
+ verify_map(map1_fds[i]);
+ }
+
+cleanup:
+ if (links) {
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ free(links);
+ }
+
+ for (i--; i >= 0; i--)
+ bpf_object__close(objs[i]);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int opt, nr_tests = 1;
+ char filename[256];
+
+ while ((opt = getopt(argc, argv, "i:h")) != -1) {
+ switch (opt) {
+ case 'i':
+ nr_tests = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ return 0;
+ }
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ return test(filename, nr_tests);
+}
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
new file mode 100644
index 000000000000..186ac0a79c0a
--- /dev/null
+++ b/samples/bpf/task_fd_query_kern.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("kprobe/blk_mq_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kretprobe/__blk_account_io_done")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
new file mode 100644
index 000000000000..1e61f2180470
--- /dev/null
+++ b/samples/bpf/task_fd_query_user.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/perf_event.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+static struct bpf_program *progs[2];
+static struct bpf_link *links[2];
+
+#define CHECK_PERROR_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("FAIL: %s:\n", __func__); \
+ perror(" "); \
+ return -1; \
+ } \
+})
+
+#define CHECK_AND_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) \
+ return -1; \
+})
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ errno = 0;
+ ret = (int)strtol(buf, NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+ CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
+
+ errno = 0;
+ ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+static int test_debug_fs_kprobe(int link_idx, const char *fn_name,
+ __u32 expected_fd_type)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, event_fd;
+ char buf[256];
+
+ len = sizeof(buf);
+ event_fd = bpf_link__fd(links[link_idx]);
+ err = bpf_task_fd_query(getpid(), event_fd, 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
+ __func__, link_idx, fn_name);
+ perror(" :");
+ return -1;
+ }
+ if (strcmp(buf, fn_name) != 0 ||
+ fd_type != expected_fd_type ||
+ probe_offset != 0x0 || probe_addr != 0x0) {
+ printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
+ link_idx);
+ printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
+ " probe_addr: 0x%llx\n",
+ buf, fd_type, probe_offset, probe_addr);
+ return -1;
+ }
+ return 0;
+}
+
+static int test_nondebug_fs_kuprobe_common(const char *event_type,
+ const char *name, __u64 offset, __u64 addr, bool is_return,
+ char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr)
+{
+ int is_return_bit = bpf_get_retprobe_bit(event_type);
+ int type = bpf_find_probe_type(event_type);
+ struct perf_event_attr attr = {};
+ struct bpf_link *link;
+ int fd, err = -1;
+
+ if (type < 0 || is_return_bit < 0) {
+ printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
+ __func__, type, is_return_bit);
+ return err;
+ }
+
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ if (is_return)
+ attr.config |= 1 << is_return_bit;
+
+ if (name) {
+ attr.config1 = ptr_to_u64((void *)name);
+ attr.config2 = offset;
+ } else {
+ attr.config1 = 0;
+ attr.config2 = addr;
+ }
+ attr.size = sizeof(attr);
+ attr.type = type;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ link = bpf_program__attach_perf_event(progs[0], fd);
+ if (libbpf_get_error(link)) {
+ printf("ERROR: bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(fd);
+ goto cleanup;
+ }
+
+ CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
+ prog_id, fd_type, probe_offset, probe_addr) < 0);
+ err = 0;
+
+cleanup:
+ bpf_link__destroy(link);
+ return err;
+}
+
+static int test_nondebug_fs_probe(const char *event_type, const char *name,
+ __u64 offset, __u64 addr, bool is_return,
+ __u32 expected_fd_type,
+ __u32 expected_ret_fd_type,
+ char *buf, __u32 buf_len)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 prog_id, fd_type;
+ int err;
+
+ err = test_nondebug_fs_kuprobe_common(event_type, name,
+ offset, addr, is_return,
+ buf, &buf_len, &prog_id,
+ &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, "
+ "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
+ __func__, name ? name : "", offset, addr, is_return);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != expected_ret_fd_type) ||
+ (!is_return && fd_type != expected_fd_type)) {
+ printf("FAIL: %s, incorrect fd_type %u\n",
+ __func__, fd_type);
+ return -1;
+ }
+ if (name) {
+ if (strcmp(name, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
+ __func__, probe_offset);
+ return -1;
+ }
+ } else {
+ if (buf_len != 0) {
+ printf("FAIL: %s, incorrect buf %p\n",
+ __func__, buf);
+ return -1;
+ }
+
+ if (probe_addr != addr) {
+ printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
+ __func__, probe_addr);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
+{
+ char buf[256], event_alias[sizeof("test_1234567890")];
+ const char *event_type = "uprobe";
+ struct perf_event_attr attr = {};
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err = -1, res, kfd, efd;
+ struct bpf_link *link;
+ ssize_t bytes;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events",
+ event_type);
+ kfd = open(buf, O_WRONLY | O_TRUNC, 0);
+ CHECK_PERROR_RET(kfd < 0);
+
+ res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
+
+ res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
+ is_return ? 'r' : 'p', event_type, event_alias,
+ binary_path, offset);
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
+ CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
+
+ close(kfd);
+ kfd = -1;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%ss/%s/id",
+ event_type, event_alias);
+ efd = open(buf, O_RDONLY, 0);
+ CHECK_PERROR_RET(efd < 0);
+
+ bytes = read(efd, buf, sizeof(buf));
+ CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
+ close(efd);
+ buf[bytes] = '\0';
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ link = bpf_program__attach_perf_event(progs[0], kfd);
+ if (libbpf_get_error(link)) {
+ printf("ERROR: bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(kfd);
+ goto cleanup;
+ }
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
+ (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
+ printf("FAIL: %s, incorrect fd_type %u\n", __func__,
+ fd_type);
+ return -1;
+ }
+ if (strcmp(binary_path, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
+ probe_offset);
+ return -1;
+ }
+ err = 0;
+
+cleanup:
+ bpf_link__destroy(link);
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ extern char __executable_start;
+ char filename[256], buf[256];
+ __u64 uprobe_file_offset;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int i = 0, err = -1;
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return err;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return err;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ progs[i] = prog;
+ links[i] = bpf_program__attach(progs[i]);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
+ }
+
+ /* test two functions in the corresponding *_kern.c file */
+ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
+ BPF_FD_TYPE_KPROBE));
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "__blk_account_io_done",
+ BPF_FD_TYPE_KRETPROBE));
+
+ /* test nondebug fs kprobe */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#ifdef __x86_64__
+ /* set a kprobe on "bpf_check + 0x5", which is x64 specific */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#endif
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ true, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ NULL, 0));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ 0, 0));
+
+ /* test nondebug fs uprobe */
+ /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
+ * and the default linker script, which defines __executable_start as
+ * the start of the .text section. The calculation could be different
+ * on different systems with different compilers. The right way is
+ * to parse the ELF file. We took a shortcut here.
+ */
+ uprobe_file_offset = (unsigned long)main - (unsigned long)&__executable_start;
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, false,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, true,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+
+ /* test debug fs uprobe */
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ false));
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ true));
+ err = 0;
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
+ return err;
+}
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
index 80a05591a140..a28a8fc99dbe 100755
--- a/samples/bpf/tc_l2_redirect.sh
+++ b/samples/bpf/tc_l2_redirect.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
[[ -z $TC ]] && TC='tc'
[[ -z $IP ]] && IP='ip'
@@ -7,6 +8,7 @@ REDIRECT_USER='./tc_l2_redirect'
REDIRECT_BPF='./tc_l2_redirect_kern.o'
RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
+IPV6_DISABLED=$(< /proc/sys/net/ipv6/conf/all/disable_ipv6)
IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
function config_common {
@@ -63,6 +65,7 @@ function config_common {
sysctl -q -w net.ipv4.conf.all.rp_filter=0
sysctl -q -w net.ipv6.conf.all.forwarding=1
+ sysctl -q -w net.ipv6.conf.all.disable_ipv6=0
}
function cleanup {
@@ -76,6 +79,7 @@ function cleanup {
$IP link del ip6t >& /dev/null
sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
+ sysctl -q -w net.ipv6.conf.all.disable_ipv6=$IPV6_DISABLED
rm -f /sys/fs/bpf/tc/globals/tun_iface
[[ -z $DEBUG ]] || set -x
set -e
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
index 7ef2a12b25b2..b19fa9b88fe0 100644
--- a/samples/bpf/tc_l2_redirect_kern.c
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -15,7 +15,7 @@
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define _htonl __builtin_bswap32
@@ -58,14 +58,11 @@ static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr)
SEC("l2_to_iptun_ingress_forward")
int _l2_to_iptun_ingress_forward(struct __sk_buff *skb)
{
- struct bpf_tunnel_key tkey = {};
void *data = (void *)(long)skb->data;
struct eth_hdr *eth = data;
void *data_end = (void *)(long)skb->data_end;
int key = 0, *ifindex;
- int ret;
-
if (data + sizeof(*eth) > data_end)
return TC_ACT_OK;
@@ -115,8 +112,6 @@ int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb)
void *data_end = (void *)(long)skb->data_end;
int key = 0, *ifindex;
- int ret;
-
if (data + sizeof(*eth) > data_end)
return TC_ACT_OK;
@@ -205,7 +200,6 @@ int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb)
SEC("drop_non_tun_vip")
int _drop_non_tun_vip(struct __sk_buff *skb)
{
- struct bpf_tunnel_key tkey = {};
void *data = (void *)(long)skb->data;
struct eth_hdr *eth = data;
void *data_end = (void *)(long)skb->data_end;
diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c
index 28995a776560..d11a6e1e9912 100644
--- a/samples/bpf/tc_l2_redirect_user.c
+++ b/samples/bpf/tc_l2_redirect_user.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <linux/unistd.h>
#include <linux/bpf.h>
@@ -13,7 +10,7 @@
#include <string.h>
#include <errno.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
static void usage(void)
{
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 274c884c87fe..e9356130f84e 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -7,7 +7,8 @@
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
/* compiler workaround */
#define _htonl __builtin_bswap32
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
deleted file mode 100644
index 270edcc149a1..000000000000
--- a/samples/bpf/tcbpf2_kern.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/* Copyright (c) 2016 VMware
- * Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/filter.h>
-#include <uapi/linux/pkt_cls.h>
-#include <net/ipv6.h>
-#include "bpf_helpers.h"
-
-#define _htonl __builtin_bswap32
-#define ERROR(ret) do {\
- char fmt[] = "ERROR line:%d ret:%d\n";\
- bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
- } while(0)
-
-struct geneve_opt {
- __be16 opt_class;
- u8 type;
- u8 length:5;
- u8 r3:1;
- u8 r2:1;
- u8 r1:1;
- u8 opt_data[8]; /* hard-coded to 8 byte */
-};
-
-struct vxlan_metadata {
- u32 gbp;
-};
-
-SEC("gre_set_tunnel")
-int _gre_set_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("gre_get_tunnel")
-int _gre_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "key %d remote ip 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
- return TC_ACT_OK;
-}
-
-SEC("vxlan_set_tunnel")
-int _vxlan_set_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- struct vxlan_metadata md;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
- ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("vxlan_get_tunnel")
-int _vxlan_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- struct vxlan_metadata md;
- char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.gbp);
-
- return TC_ACT_OK;
-}
-
-SEC("geneve_set_tunnel")
-int _geneve_set_tunnel(struct __sk_buff *skb)
-{
- int ret, ret2;
- struct bpf_tunnel_key key;
- struct geneve_opt gopt;
-
- __builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- key.tunnel_id = 2;
- key.tunnel_tos = 0;
- key.tunnel_ttl = 64;
-
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = 0xdeadbeef;
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("geneve_get_tunnel")
-int _geneve_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- struct geneve_opt gopt;
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
- return TC_ACT_OK;
-}
-
-SEC("ipip_set_tunnel")
-int _ipip_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key = {};
- void *data = (void *)(long)skb->data;
- struct iphdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
- void *data_end = (void *)(long)skb->data_end;
- int ret;
-
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
- return TC_ACT_SHOT;
- }
-
- key.tunnel_ttl = 64;
- if (iph->protocol == IPPROTO_ICMP) {
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- } else {
- if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
- return TC_ACT_SHOT;
-
- if (tcp->dest == htons(5200))
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- else if (tcp->dest == htons(5201))
- key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
- else
- return TC_ACT_SHOT;
- }
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ipip_get_tunnel")
-int _ipip_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip 0x%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
- return TC_ACT_OK;
-}
-
-SEC("ipip6_set_tunnel")
-int _ipip6_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key = {};
- void *data = (void *)(long)skb->data;
- struct iphdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
- void *data_end = (void *)(long)skb->data_end;
- int ret;
-
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
- return TC_ACT_SHOT;
- }
-
- key.remote_ipv6[0] = _htonl(0x2401db00);
- key.tunnel_ttl = 64;
-
- if (iph->protocol == IPPROTO_ICMP) {
- key.remote_ipv6[3] = _htonl(1);
- } else {
- if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) {
- ERROR(iph->protocol);
- return TC_ACT_SHOT;
- }
-
- if (tcp->dest == htons(5200)) {
- key.remote_ipv6[3] = _htonl(1);
- } else if (tcp->dest == htons(5201)) {
- key.remote_ipv6[3] = _htonl(2);
- } else {
- ERROR(tcp->dest);
- return TC_ACT_SHOT;
- }
- }
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ipip6_get_tunnel")
-int _ipip6_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
- _htonl(key.remote_ipv6[3]));
- return TC_ACT_OK;
-}
-
-SEC("ip6ip6_set_tunnel")
-int _ip6ip6_set_tunnel(struct __sk_buff *skb)
-{
- struct bpf_tunnel_key key = {};
- void *data = (void *)(long)skb->data;
- struct ipv6hdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
- void *data_end = (void *)(long)skb->data_end;
- int ret;
-
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
- return TC_ACT_SHOT;
- }
-
- key.remote_ipv6[0] = _htonl(0x2401db00);
- key.tunnel_ttl = 64;
-
- if (iph->nexthdr == NEXTHDR_ICMP) {
- key.remote_ipv6[3] = _htonl(1);
- } else {
- if (iph->nexthdr != NEXTHDR_TCP) {
- ERROR(iph->nexthdr);
- return TC_ACT_SHOT;
- }
-
- if (tcp->dest == htons(5200)) {
- key.remote_ipv6[3] = _htonl(1);
- } else if (tcp->dest == htons(5201)) {
- key.remote_ipv6[3] = _htonl(2);
- } else {
- ERROR(tcp->dest);
- return TC_ACT_SHOT;
- }
- }
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- return TC_ACT_OK;
-}
-
-SEC("ip6ip6_get_tunnel")
-int _ip6ip6_get_tunnel(struct __sk_buff *skb)
-{
- int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
-
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
-
- bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
- _htonl(key.remote_ipv6[3]));
- return TC_ACT_OK;
-}
-
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
new file mode 100644
index 000000000000..822b0742b815
--- /dev/null
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -0,0 +1,71 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set base_rtt to 80us when host is running TCP-NV and
+ * both hosts are in the same datacenter (as determined by IPv6 prefix).
+ *
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define DEBUG 1
+
+SEC("sockops")
+int bpf_basertt(struct bpf_sock_ops *skops)
+{
+ char cong[20];
+ char nv[] = "nv";
+ int rv = 0, n;
+ int op;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check if both hosts are in the same datacenter. For this
+ * example they are if the 1st 5.5 bytes in the IPv6 address
+ * are the same.
+ */
+ if (skops->family == AF_INET6 &&
+ skops->local_ip6[0] == skops->remote_ip6[0] &&
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+ switch (op) {
+ case BPF_SOCK_OPS_BASE_RTT:
+ n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION,
+ cong, sizeof(cong));
+ if (!n && !__builtin_memcmp(cong, nv, sizeof(nv))) {
+ /* Set base_rtt to 80us */
+ rv = 80;
+ } else if (n) {
+ rv = n;
+ } else {
+ rv = -1;
+ }
+ break;
+ default:
+ rv = -1;
+ }
+ } else {
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme
new file mode 100644
index 000000000000..78e247f62108
--- /dev/null
+++ b/samples/bpf/tcp_bpf.readme
@@ -0,0 +1,28 @@
+This file describes how to run the tcp_*_kern.o tcp_bpf (or socket_ops)
+programs. These programs attach to a cgroupv2. The following commands create
+a cgroupv2 and attach a bash shell to the group.
+
+ mkdir -p /tmp/cgroupv2
+ mount -t cgroup2 none /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2/foo
+ bash
+ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+
+Anything that runs under this shell belongs to the foo cgroupv2. To load
+(attach) one of the tcp_*_kern.o programs:
+
+ bpftool prog load tcp_basertt_kern.o /sys/fs/bpf/tcp_prog
+ bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
+ bpftool prog tracelog
+
+"bpftool prog tracelog" will continue to run printing the BPF log buffer.
+The tcp_*_kern.o programs use special print functions to print logging
+information (if enabled by the ifdef).
+
+If using netperf/netserver to create traffic, you need to run them under the
+cgroupv2 to which the BPF programs are attached (i.e. under bash shell
+attached to the cgroupv2).
+
+To remove (unattach) a socket_ops BPF program from a cgroupv2:
+
+ bpftool cgroup detach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
index ee83bbabd17c..6a80d08952ad 100644
--- a/samples/bpf/tcp_bufs_kern.c
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -9,7 +9,7 @@
* doing appropriate checks that indicate the hosts are far enough
* away (i.e. large RTT).
*
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>
@@ -17,18 +17,11 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
SEC("sockops")
int bpf_bufs(struct bpf_sock_ops *skops)
{
@@ -41,8 +34,10 @@ int bpf_bufs(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
@@ -61,8 +56,8 @@ int bpf_bufs(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of active connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
/* Nothing to do */
@@ -71,8 +66,8 @@ int bpf_bufs(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of passive connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
default:
rv = -1;
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
index d68eadd9ca2d..e88bd9ab0695 100644
--- a/samples/bpf/tcp_clamp_kern.c
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -9,7 +9,7 @@
* the same datacenter. For his example, we assume they are within the same
* datacenter when the first 5.5 bytes of their IPv6 addresses are the same.
*
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>
@@ -17,18 +17,11 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
SEC("sockops")
int bpf_clamp(struct bpf_sock_ops *skops)
{
@@ -41,8 +34,10 @@ int bpf_clamp(struct bpf_sock_ops *skops)
/* For testing purposes, only execute rest of BPF program
* if neither port numberis 55601
*/
- if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601)
- return -1;
+ if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) {
+ skops->reply = -1;
+ return 0;
+ }
op = (int) skops->op;
@@ -66,9 +61,9 @@ int bpf_clamp(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of active connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF,
&bufsize, sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
- SO_RCVBUF, &bufsize,
- sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET,
+ SO_RCVBUF, &bufsize,
+ sizeof(bufsize));
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
rv = bpf_setsockopt(skops, SOL_TCP,
@@ -80,12 +75,12 @@ int bpf_clamp(struct bpf_sock_ops *skops)
rv = bpf_setsockopt(skops, SOL_TCP,
TCP_BPF_SNDCWND_CLAMP,
&clamp, sizeof(clamp));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
- SO_SNDBUF, &bufsize,
- sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
- SO_RCVBUF, &bufsize,
- sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET,
+ SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET,
+ SO_RCVBUF, &bufsize,
+ sizeof(bufsize));
break;
default:
rv = -1;
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index dac15bce1fa9..339415eac477 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c
@@ -5,9 +5,9 @@
* License as published by the Free Software Foundation.
*
* BPF program to set congestion control to dctcp when both hosts are
- * in the same datacenter (as deteremined by IPv6 prefix).
+ * in the same datacenter (as determined by IPv6 prefix).
*
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>
@@ -16,18 +16,11 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
SEC("sockops")
int bpf_cong(struct bpf_sock_ops *skops)
{
@@ -39,8 +32,10 @@ int bpf_cong(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
new file mode 100644
index 000000000000..e80d3afd24bd
--- /dev/null
+++ b/samples/bpf/tcp_dumpstats_kern.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Refer to samples/bpf/tcp_bpf.readme for the instructions on
+ * how to run this sample program.
+ */
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define INTERVAL 1000000000ULL
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __u32 type;
+ __u32 map_flags;
+ int *key;
+ __u64 *value;
+} bpf_next_dump SEC(".maps") = {
+ .type = BPF_MAP_TYPE_SK_STORAGE,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+ __u64 *next_dump;
+ __u64 now;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+ return 1;
+ case BPF_SOCK_OPS_RTT_CB:
+ break;
+ default:
+ return 1;
+ }
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!next_dump)
+ return 1;
+
+ now = bpf_ktime_get_ns();
+ if (now < *next_dump)
+ return 1;
+
+ tcp_sk = bpf_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ *next_dump = now + INTERVAL;
+
+ bpf_printk("dsack_dups=%u delivered=%u\n",
+ tcp_sk->dsack_dups, tcp_sk->delivered);
+ bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
+ tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
+
+ return 1;
+}
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
index 23c5122ef819..d1444557358e 100644
--- a/samples/bpf/tcp_iw_kern.c
+++ b/samples/bpf/tcp_iw_kern.c
@@ -9,7 +9,7 @@
* would usually be done after doing appropriate checks that indicate
* the hosts are far enough away (i.e. large RTT).
*
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>
@@ -17,18 +17,11 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
SEC("sockops")
int bpf_iw(struct bpf_sock_ops *skops)
{
@@ -42,8 +35,10 @@ int bpf_iw(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
@@ -62,8 +57,8 @@ int bpf_iw(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of active connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw,
@@ -73,8 +68,8 @@ int bpf_iw(struct bpf_sock_ops *skops)
/* Set sndbuf and rcvbuf of passive connections */
rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
sizeof(bufsize));
- rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
- &bufsize, sizeof(bufsize));
+ rv += bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
break;
default:
rv = -1;
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
index 3f2a228f81ce..223d9c23b10c 100644
--- a/samples/bpf/tcp_rwnd_kern.c
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -8,7 +8,7 @@
* and the first 5.5 bytes of the IPv6 addresses are not the same (in this
* example that means both hosts are not the same datacenter).
*
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>
@@ -16,18 +16,11 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
SEC("sockops")
int bpf_rwnd(struct bpf_sock_ops *skops)
{
@@ -38,8 +31,10 @@ int bpf_rwnd(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) !=
- 55601 && skops->local_port != 55601)
- return -1;
+ 55601 && skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
index 3c3fc83d81cb..d58004eef124 100644
--- a/samples/bpf/tcp_synrto_kern.c
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -8,7 +8,7 @@
* and the first 5.5 bytes of the IPv6 addresses are the same (in this example
* that means both hosts are in the same datacenter).
*
- * Use load_sock_ops to load this BPF program.
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>
@@ -16,18 +16,11 @@
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <linux/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define DEBUG 1
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
SEC("sockops")
int bpf_synrto(struct bpf_sock_ops *skops)
{
@@ -38,8 +31,10 @@ int bpf_synrto(struct bpf_sock_ops *skops)
* if neither port numberis 55601
*/
if (bpf_ntohl(skops->remote_port) != 55601 &&
- skops->local_port != 55601)
- return -1;
+ skops->local_port != 55601) {
+ skops->reply = -1;
+ return 1;
+ }
op = (int) skops->op;
diff --git a/samples/bpf/tcp_tos_reflect_kern.c b/samples/bpf/tcp_tos_reflect_kern.c
new file mode 100644
index 000000000000..953fedc79ce1
--- /dev/null
+++ b/samples/bpf/tcp_tos_reflect_kern.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Facebook
+ *
+ * BPF program to automatically reflect TOS option from received syn packet
+ *
+ * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <linux/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define DEBUG 1
+
+SEC("sockops")
+int bpf_basertt(struct bpf_sock_ops *skops)
+{
+ char header[sizeof(struct ipv6hdr)];
+ struct ipv6hdr *hdr6;
+ struct iphdr *hdr;
+ int hdr_size = 0;
+ int save_syn = 1;
+ int tos = 0;
+ int rv = 0;
+ int op;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+ switch (op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ rv = bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &save_syn, sizeof(save_syn));
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ if (skops->family == AF_INET)
+ hdr_size = sizeof(struct iphdr);
+ else
+ hdr_size = sizeof(struct ipv6hdr);
+ rv = bpf_getsockopt(skops, SOL_TCP, TCP_SAVED_SYN,
+ header, hdr_size);
+ if (!rv) {
+ if (skops->family == AF_INET) {
+ hdr = (struct iphdr *) header;
+ tos = hdr->tos;
+ if (tos != 0)
+ bpf_setsockopt(skops, SOL_IP, IP_TOS,
+ &tos, sizeof(tos));
+ } else {
+ hdr6 = (struct ipv6hdr *) header;
+ tos = ((hdr6->priority) << 4 |
+ (hdr6->flow_lbl[0]) >> 4);
+ if (tos)
+ bpf_setsockopt(skops, SOL_IPV6,
+ IPV6_TCLASS,
+ &tos, sizeof(tos));
+ }
+ rv = 0;
+ }
+ break;
+ default:
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
deleted file mode 100644
index 8a1b8b5d8def..000000000000
--- a/samples/bpf/test_cgrp2_array_pin.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/unistd.h>
-#include <linux/bpf.h>
-
-#include <stdio.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <fcntl.h>
-
-#include "libbpf.h"
-
-static void usage(void)
-{
- printf("Usage: test_cgrp2_array_pin [...]\n");
- printf(" -F <file> File to pin an BPF cgroup array\n");
- printf(" -U <file> Update an already pinned BPF cgroup array\n");
- printf(" -v <value> Full path of the cgroup2\n");
- printf(" -h Display this help\n");
-}
-
-int main(int argc, char **argv)
-{
- const char *pinned_file = NULL, *cg2 = NULL;
- int create_array = 1;
- int array_key = 0;
- int array_fd = -1;
- int cg2_fd = -1;
- int ret = -1;
- int opt;
-
- while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
- switch (opt) {
- /* General args */
- case 'F':
- pinned_file = optarg;
- break;
- case 'U':
- pinned_file = optarg;
- create_array = 0;
- break;
- case 'v':
- cg2 = optarg;
- break;
- default:
- usage();
- goto out;
- }
- }
-
- if (!cg2 || !pinned_file) {
- usage();
- goto out;
- }
-
- cg2_fd = open(cg2, O_RDONLY);
- if (cg2_fd < 0) {
- fprintf(stderr, "open(%s,...): %s(%d)\n",
- cg2, strerror(errno), errno);
- goto out;
- }
-
- if (create_array) {
- array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
- sizeof(uint32_t), sizeof(uint32_t),
- 1, 0);
- if (array_fd < 0) {
- fprintf(stderr,
- "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
- strerror(errno), errno);
- goto out;
- }
- } else {
- array_fd = bpf_obj_get(pinned_file);
- if (array_fd < 0) {
- fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
- pinned_file, strerror(errno), errno);
- goto out;
- }
- }
-
- ret = bpf_map_update_elem(array_fd, &array_key, &cg2_fd, 0);
- if (ret) {
- perror("bpf_map_update_elem");
- goto out;
- }
-
- if (create_array) {
- ret = bpf_obj_pin(array_fd, pinned_file);
- if (ret) {
- fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
- pinned_file, strerror(errno), errno);
- goto out;
- }
- }
-
-out:
- if (array_fd != -1)
- close(array_fd);
- if (cg2_fd != -1)
- close(cg2_fd);
- return ret;
-}
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
deleted file mode 100644
index 4bfcaf93fcf3..000000000000
--- a/samples/bpf/test_cgrp2_attach.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- * The eBPF program accesses the map passed in to store two pieces of
- * information. The number of invocations of the program, which maps
- * to the number of packets received, is stored to key 0. Key 1 is
- * incremented on each iteration by the number of bytes stored in
- * the skb.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- * packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-
-#include <linux/bpf.h>
-
-#include "libbpf.h"
-
-enum {
- MAP_KEY_PACKETS,
- MAP_KEY_BYTES,
-};
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(int map_fd, int verdict)
-{
- struct bpf_insn prog[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
-
- /* Count packets */
- BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
-
- /* Count bytes */
- BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
-
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
-}
-
-static int usage(const char *argv0)
-{
- printf("Usage: %s [-d] [-D] <cg-path> <egress|ingress>\n", argv0);
- printf(" -d Drop Traffic\n");
- printf(" -D Detach filter, and exit\n");
- return EXIT_FAILURE;
-}
-
-static int attach_filter(int cg_fd, int type, int verdict)
-{
- int prog_fd, map_fd, ret, key;
- long long pkt_cnt, byte_cnt;
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
- sizeof(key), sizeof(byte_cnt),
- 256, 0);
- if (map_fd < 0) {
- printf("Failed to create map: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- prog_fd = prog_load(map_fd, verdict);
- printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
-
- if (prog_fd < 0) {
- printf("Failed to load prog: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- ret = bpf_prog_attach(prog_fd, cg_fd, type, 0);
- if (ret < 0) {
- printf("Failed to attach prog to cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
- }
- while (1) {
- key = MAP_KEY_PACKETS;
- assert(bpf_map_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
-
- key = MAP_KEY_BYTES;
- assert(bpf_map_lookup_elem(map_fd, &key, &byte_cnt) == 0);
-
- printf("cgroup received %lld packets, %lld bytes\n",
- pkt_cnt, byte_cnt);
- sleep(1);
- }
-
- return EXIT_SUCCESS;
-}
-
-int main(int argc, char **argv)
-{
- int detach_only = 0, verdict = 1;
- enum bpf_attach_type type;
- int opt, cg_fd, ret;
-
- while ((opt = getopt(argc, argv, "Dd")) != -1) {
- switch (opt) {
- case 'd':
- verdict = 0;
- break;
- case 'D':
- detach_only = 1;
- break;
- default:
- return usage(argv[0]);
- }
- }
-
- if (argc - optind < 2)
- return usage(argv[0]);
-
- if (strcmp(argv[optind + 1], "ingress") == 0)
- type = BPF_CGROUP_INET_INGRESS;
- else if (strcmp(argv[optind + 1], "egress") == 0)
- type = BPF_CGROUP_INET_EGRESS;
- else
- return usage(argv[0]);
-
- cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY);
- if (cg_fd < 0) {
- printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- if (detach_only) {
- ret = bpf_prog_detach(cg_fd, type);
- printf("bpf_prog_detach() returned '%s' (%d)\n",
- strerror(errno), errno);
- } else
- ret = attach_filter(cg_fd, type, verdict);
-
- return ret;
-}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
deleted file mode 100644
index 3049b1f26267..000000000000
--- a/samples/bpf/test_cgrp2_attach2.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- * The eBPF program accesses the map passed in to store two pieces of
- * information. The number of invocations of the program, which maps
- * to the number of packets received, is stored to key 0. Key 1 is
- * incremented on each iteration by the number of bytes stored in
- * the skb.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- * packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <unistd.h>
-
-#include <linux/bpf.h>
-
-#include "libbpf.h"
-#include "cgroup_helpers.h"
-
-#define FOO "/foo"
-#define BAR "/foo/bar/"
-#define PING_CMD "ping -c1 -w1 127.0.0.1"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(int verdict)
-{
- int ret;
- struct bpf_insn prog[] = {
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
-
- if (ret < 0) {
- log_err("Loading program");
- printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
- return 0;
- }
- return ret;
-}
-
-
-int main(int argc, char **argv)
-{
- int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
-
- allow_prog = prog_load(1);
- if (!allow_prog)
- goto err;
-
- drop_prog = prog_load(0);
- if (!drop_prog)
- goto err;
-
- if (setup_cgroup_environment())
- goto err;
-
- /* Create cgroup /foo, get fd, and join it */
- foo = create_and_get_cgroup(FOO);
- if (!foo)
- goto err;
-
- if (join_cgroup(FOO))
- goto err;
-
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
- log_err("Attaching prog to /foo");
- goto err;
- }
-
- printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
- assert(system(PING_CMD) != 0);
-
- /* Create cgroup /foo/bar, get fd, and join it */
- bar = create_and_get_cgroup(BAR);
- if (!bar)
- goto err;
-
- if (join_cgroup(BAR))
- goto err;
-
- printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
- assert(system(PING_CMD) != 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
- assert(system(PING_CMD) == 0);
-
- if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo/bar");
- goto err;
- }
-
- printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
- "This ping in cgroup /foo/bar should fail...\n");
- assert(system(PING_CMD) != 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo");
- goto err;
- }
-
- printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
- "This ping in cgroup /foo/bar should pass...\n");
- assert(system(PING_CMD) == 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
- errno = 0;
- log_err("Unexpected success attaching prog to /foo/bar");
- goto err;
- }
-
- if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
- errno = 0;
- log_err("Unexpected success in double detach from /foo");
- goto err;
- }
-
- if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching non-overridable prog to /foo");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
- errno = 0;
- log_err("Unexpected success attaching non-overridable prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
- errno = 0;
- log_err("Unexpected success attaching overridable prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
- errno = 0;
- log_err("Unexpected success attaching overridable prog to /foo");
- goto err;
- }
-
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching different non-overridable prog to /foo");
- goto err;
- }
-
- goto out;
-
-err:
- rc = 1;
-
-out:
- close(foo);
- close(bar);
- cleanup_cgroup_environment();
- if (!rc)
- printf("PASS\n");
- else
- printf("FAIL\n");
- return rc;
-}
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
deleted file mode 100644
index c3cfb23e23b5..000000000000
--- a/samples/bpf/test_cgrp2_sock.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/* eBPF example program:
- *
- * - Loads eBPF program
- *
- * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
- * sockets opened by processes in the cgroup.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <net/if.h>
-#include <linux/bpf.h>
-
-#include "libbpf.h"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(int idx)
-{
- struct bpf_insn prog[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_3, idx),
- BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
- BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
- "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
-}
-
-static int usage(const char *argv0)
-{
- printf("Usage: %s cg-path device-index\n", argv0);
- return EXIT_FAILURE;
-}
-
-int main(int argc, char **argv)
-{
- int cg_fd, prog_fd, ret;
- unsigned int idx;
-
- if (argc < 2)
- return usage(argv[0]);
-
- idx = if_nametoindex(argv[2]);
- if (!idx) {
- printf("Invalid device name\n");
- return EXIT_FAILURE;
- }
-
- cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
- if (cg_fd < 0) {
- printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- prog_fd = prog_load(idx);
- printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
-
- if (prog_fd < 0) {
- printf("Failed to load prog: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0);
- if (ret < 0) {
- printf("Failed to attach prog to cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
deleted file mode 100755
index 925fd467c7cc..000000000000
--- a/samples/bpf/test_cgrp2_sock.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-function config_device {
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth0b
- ip link set veth0b up
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link add foo type vrf table 1234
- ip link set foo up
- ip addr add 172.16.1.101/24 dev veth0b
- ip addr add 2401:db00::2/64 dev veth0b nodad
- ip link set veth0b master foo
-}
-
-function attach_bpf {
- rm -rf /tmp/cgroupv2
- mkdir -p /tmp/cgroupv2
- mount -t cgroup2 none /tmp/cgroupv2
- mkdir -p /tmp/cgroupv2/foo
- test_cgrp2_sock /tmp/cgroupv2/foo foo
- echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
-}
-
-function cleanup {
- set +ex
- ip netns delete at_ns0
- ip link del veth0
- ip link del foo
- umount /tmp/cgroupv2
- rm -rf /tmp/cgroupv2
- set -ex
-}
-
-function do_test {
- ping -c1 -w1 172.16.1.100
- ping6 -c1 -w1 2401:db00::1
-}
-
-cleanup 2>/dev/null
-config_device
-attach_bpf
-do_test
-cleanup
-echo "*** PASS ***"
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
deleted file mode 100644
index db036077b644..000000000000
--- a/samples/bpf/test_cgrp2_sock2.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/* eBPF example program:
- *
- * - Loads eBPF program
- *
- * The eBPF program loads a filter from file and attaches the
- * program to a cgroup using BPF_PROG_ATTACH
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <net/if.h>
-#include <linux/bpf.h>
-
-#include "libbpf.h"
-#include "bpf_load.h"
-
-static int usage(const char *argv0)
-{
- printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
- return EXIT_FAILURE;
-}
-
-int main(int argc, char **argv)
-{
- int cg_fd, ret, filter_id = 0;
-
- if (argc < 3)
- return usage(argv[0]);
-
- cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
- if (cg_fd < 0) {
- printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- if (load_bpf_file(argv[2]))
- return EXIT_FAILURE;
-
- printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
-
- if (argc > 3)
- filter_id = atoi(argv[3]);
-
- if (filter_id > prog_cnt) {
- printf("Invalid program id; program not found in file\n");
- return EXIT_FAILURE;
- }
-
- ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
- BPF_CGROUP_INET_SOCK_CREATE, 0);
- if (ret < 0) {
- printf("Failed to attach prog to cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
deleted file mode 100755
index 891f12a0e26f..000000000000
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/bash
-
-function config_device {
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth0b
- ip link set veth0b up
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add 172.16.1.101/24 dev veth0b
- ip addr add 2401:db00::2/64 dev veth0b nodad
-}
-
-function config_cgroup {
- rm -rf /tmp/cgroupv2
- mkdir -p /tmp/cgroupv2
- mount -t cgroup2 none /tmp/cgroupv2
- mkdir -p /tmp/cgroupv2/foo
- echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
-}
-
-
-function attach_bpf {
- test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
- [ $? -ne 0 ] && exit 1
-}
-
-function cleanup {
- ip link del veth0b
- ip netns delete at_ns0
- umount /tmp/cgroupv2
- rm -rf /tmp/cgroupv2
-}
-
-cleanup 2>/dev/null
-
-set -e
-config_device
-config_cgroup
-set +e
-
-#
-# Test 1 - fail ping6
-#
-attach_bpf 0
-ping -c1 -w1 172.16.1.100
-if [ $? -ne 0 ]; then
- echo "ping failed when it should succeed"
- cleanup
- exit 1
-fi
-
-ping6 -c1 -w1 2401:db00::1
-if [ $? -eq 0 ]; then
- echo "ping6 succeeded when it should not"
- cleanup
- exit 1
-fi
-
-#
-# Test 2 - fail ping
-#
-attach_bpf 1
-ping6 -c1 -w1 2401:db00::1
-if [ $? -ne 0 ]; then
- echo "ping6 failed when it should succeed"
- cleanup
- exit 1
-fi
-
-ping -c1 -w1 172.16.1.100
-if [ $? -eq 0 ]; then
- echo "ping succeeded when it should not"
- cleanup
- exit 1
-fi
-
-cleanup
-echo
-echo "*** PASS ***"
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
deleted file mode 100755
index 0b119eeaf85c..000000000000
--- a/samples/bpf/test_cgrp2_tc.sh
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/bin/bash
-
-MY_DIR=$(dirname $0)
-# Details on the bpf prog
-BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
-BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
-BPF_SECTION='filter'
-
-[ -z "$TC" ] && TC='tc'
-[ -z "$IP" ] && IP='ip'
-
-# Names of the veth interface, net namespace...etc.
-HOST_IFC='ve'
-NS_IFC='vens'
-NS='ns'
-
-find_mnt() {
- cat /proc/mounts | \
- awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
-}
-
-# Init cgroup2 vars
-init_cgrp2_vars() {
- CGRP2_ROOT=$(find_mnt cgroup2)
- if [ -z "$CGRP2_ROOT" ]
- then
- CGRP2_ROOT='/mnt/cgroup2'
- MOUNT_CGRP2="yes"
- fi
- CGRP2_TC="$CGRP2_ROOT/tc"
- CGRP2_TC_LEAF="$CGRP2_TC/leaf"
-}
-
-# Init bpf fs vars
-init_bpf_fs_vars() {
- local bpf_fs_root=$(find_mnt bpf)
- [ -n "$bpf_fs_root" ] || return -1
- BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
-}
-
-setup_cgrp2() {
- case $1 in
- start)
- if [ "$MOUNT_CGRP2" == 'yes' ]
- then
- [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
- mount -t cgroup2 none $CGRP2_ROOT || return $?
- fi
- mkdir -p $CGRP2_TC_LEAF
- ;;
- *)
- rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
- [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
- ;;
- esac
-}
-
-setup_bpf_cgrp2_array() {
- local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
- case $1 in
- start)
- $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
- ;;
- *)
- [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
- ;;
- esac
-}
-
-setup_net() {
- case $1 in
- start)
- $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
- $IP link set dev $HOST_IFC up || return $?
- sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
-
- $IP netns add ns || return $?
- $IP link set dev $NS_IFC netns ns || return $?
- $IP -n $NS link set dev $NS_IFC up || return $?
- $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
- $TC qdisc add dev $HOST_IFC clsact || return $?
- $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
- ;;
- *)
- $IP netns del $NS
- $IP link del $HOST_IFC
- ;;
- esac
-}
-
-run_in_cgrp() {
- # Fork another bash and move it under the specified cgroup.
- # It makes the cgroup cleanup easier at the end of the test.
- cmd='echo $$ > '
- cmd="$cmd $1/cgroup.procs; exec $2"
- bash -c "$cmd"
-}
-
-do_test() {
- run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
- local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
- awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
- if [[ $dropped -eq 0 ]]
- then
- echo "FAIL"
- return 1
- else
- echo "Successfully filtered $dropped packets"
- return 0
- fi
-}
-
-do_exit() {
- if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
- then
- echo "------ DEBUG ------"
- echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
- echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
- if [ -d "$BPF_FS_TC_SHARE" ]
- then
- echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
- fi
- echo "Host net:"
- $IP netns
- $IP link show dev $HOST_IFC
- $IP -6 a show dev $HOST_IFC
- $TC -s qdisc show dev $HOST_IFC
- echo
- echo "$NS net:"
- $IP -n $NS link show dev $NS_IFC
- $IP -n $NS -6 link show dev $NS_IFC
- echo "------ DEBUG ------"
- echo
- fi
-
- if [ "$MODE" != 'nocleanup' ]
- then
- setup_net stop
- setup_bpf_cgrp2_array stop
- setup_cgrp2 stop
- fi
-}
-
-init_cgrp2_vars
-init_bpf_fs_vars
-
-while [[ $# -ge 1 ]]
-do
- a="$1"
- case $a in
- debug)
- DEBUG='yes'
- shift 1
- ;;
- cleanup-only)
- MODE='cleanuponly'
- shift 1
- ;;
- no-cleanup)
- MODE='nocleanup'
- shift 1
- ;;
- *)
- echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
- echo " debug: Print cgrp and network setup details at the end of the test"
- echo " cleanup-only: Try to cleanup things from last test. No test will be run"
- echo " no-cleanup: Run the test but don't do cleanup at the end"
- echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
- echo
- exit -1
- ;;
- esac
-done
-
-trap do_exit 0
-
-[ "$MODE" == 'cleanuponly' ] && exit
-
-setup_cgrp2 start || exit $?
-setup_net start || exit $?
-init_bpf_fs_vars || exit $?
-setup_bpf_cgrp2_array start || exit $?
-do_test
-echo
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
deleted file mode 100644
index 1547b36a7b7b..000000000000
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/in6.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/pkt_cls.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-/* copy of 'struct ethhdr' without __packed */
-struct eth_hdr {
- unsigned char h_dest[ETH_ALEN];
- unsigned char h_source[ETH_ALEN];
- unsigned short h_proto;
-};
-
-#define PIN_GLOBAL_NS 2
-struct bpf_elf_map {
- __u32 type;
- __u32 size_key;
- __u32 size_value;
- __u32 max_elem;
- __u32 flags;
- __u32 id;
- __u32 pinning;
-};
-
-struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .size_key = sizeof(uint32_t),
- .size_value = sizeof(uint32_t),
- .pinning = PIN_GLOBAL_NS,
- .max_elem = 1,
-};
-
-SEC("filter")
-int handle_egress(struct __sk_buff *skb)
-{
- void *data = (void *)(long)skb->data;
- struct eth_hdr *eth = data;
- struct ipv6hdr *ip6h = data + sizeof(*eth);
- void *data_end = (void *)(long)skb->data_end;
- char dont_care_msg[] = "dont care %04x %d\n";
- char pass_msg[] = "pass\n";
- char reject_msg[] = "reject\n";
-
- /* single length check */
- if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
- return TC_ACT_OK;
-
- if (eth->h_proto != htons(ETH_P_IPV6) ||
- ip6h->nexthdr != IPPROTO_ICMPV6) {
- bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
- eth->h_proto, ip6h->nexthdr);
- return TC_ACT_OK;
- } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
- bpf_trace_printk(pass_msg, sizeof(pass_msg));
- return TC_ACT_OK;
- } else {
- bpf_trace_printk(reject_msg, sizeof(reject_msg));
- return TC_ACT_SHOT;
- }
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_cls_bpf.sh b/samples/bpf/test_cls_bpf.sh
index 0365d5ee512c..aaddd67b37ff 100755
--- a/samples/bpf/test_cls_bpf.sh
+++ b/samples/bpf/test_cls_bpf.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
function pktgen {
../pktgen/pktgen_bench_xmit_mode_netif_receive.sh -i $IFC -s 64 \
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
deleted file mode 100644
index 86b28d7d6c99..000000000000
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#include <linux/ptrace.h>
-#include <uapi/linux/bpf.h>
-#include <linux/version.h>
-#include "bpf_helpers.h"
-#include <uapi/linux/utsname.h>
-
-struct bpf_map_def SEC("maps") cgroup_map = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") perf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 1,
-};
-
-/* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/sys_sync")
-int bpf_prog1(struct pt_regs *ctx)
-{
- u64 pid = bpf_get_current_pid_tgid();
- int idx = 0;
-
- if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
- return 0;
-
- bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
deleted file mode 100644
index 65b5fb51c1db..000000000000
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <linux/bpf.h>
-#include <unistd.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-#include <linux/bpf.h>
-#include "cgroup_helpers.h"
-
-#define CGROUP_PATH "/my-cgroup"
-
-int main(int argc, char **argv)
-{
- pid_t remote_pid, local_pid = getpid();
- int cg2, idx = 0, rc = 0;
- char filename[256];
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
-
- if (setup_cgroup_environment())
- goto err;
-
- cg2 = create_and_get_cgroup(CGROUP_PATH);
-
- if (!cg2)
- goto err;
-
- if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
- log_err("Adding target cgroup to map");
- goto err;
- }
-
- if (join_cgroup(CGROUP_PATH))
- goto err;
-
- /*
- * The installed helper program catched the sync call, and should
- * write it to the map.
- */
-
- sync();
- bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid);
-
- if (local_pid != remote_pid) {
- fprintf(stderr,
- "BPF Helper didn't write correct PID to map, but: %d\n",
- remote_pid);
- goto err;
- }
-
- /* Verify the negative scenario; leave the cgroup */
- if (join_cgroup("/"))
- goto err;
-
- remote_pid = 0;
- bpf_map_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
-
- sync();
- bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid);
-
- if (local_pid == remote_pid) {
- fprintf(stderr, "BPF cgroup negative test did not work\n");
- goto err;
- }
-
- goto out;
-err:
- rc = 1;
-
-out:
- close(cg2);
- cleanup_cgroup_environment();
- return rc;
-}
diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh
deleted file mode 100755
index 196925403ab4..000000000000
--- a/samples/bpf/test_ipip.sh
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/bin/bash
-
-function config_device {
- ip netns add at_ns0
- ip netns add at_ns1
- ip netns add at_ns2
- ip link add veth0 type veth peer name veth0b
- ip link add veth1 type veth peer name veth1b
- ip link add veth2 type veth peer name veth2b
- ip link set veth0b up
- ip link set veth1b up
- ip link set veth2b up
- ip link set dev veth0b mtu 1500
- ip link set dev veth1b mtu 1500
- ip link set dev veth2b mtu 1500
- ip link set veth0 netns at_ns0
- ip link set veth1 netns at_ns1
- ip link set veth2 netns at_ns2
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
- ip netns exec at_ns0 ip link set dev veth0 up
- ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1
- ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad
- ip netns exec at_ns1 ip link set dev veth1 up
- ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2
- ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad
- ip netns exec at_ns2 ip link set dev veth2 up
- ip link add br0 type bridge
- ip link set br0 up
- ip link set dev br0 mtu 1500
- ip link set veth0b master br0
- ip link set veth1b master br0
- ip link set veth2b master br0
-}
-
-function add_ipip_tunnel {
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns1 \
- ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200
- ip netns exec at_ns1 ip link set dev $DEV_NS up
- # same inner IP address in at_ns0 and at_ns1
- ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
-
- ip netns exec at_ns2 ip link add dev $DEV type ipip external
- ip netns exec at_ns2 ip link set dev $DEV up
- ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ipip6_tunnel {
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns1 \
- ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64
- ip netns exec at_ns1 ip link set dev $DEV_NS up
- # same inner IP address in at_ns0 and at_ns1
- ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
-
- ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external
- ip netns exec at_ns2 ip link set dev $DEV up
- ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ip6ip6_tunnel {
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64
- ip netns exec at_ns1 \
- ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64
- ip netns exec at_ns1 ip link set dev $DEV_NS up
- # same inner IP address in at_ns0 and at_ns1
- ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64
-
- ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external
- ip netns exec at_ns2 ip link set dev $DEV up
- ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64
-}
-
-function attach_bpf {
- DEV=$1
- SET_TUNNEL=$2
- GET_TUNNEL=$3
- ip netns exec at_ns2 tc qdisc add dev $DEV clsact
- ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
- ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
-}
-
-function test_ipip {
- DEV_NS=ipip_std
- DEV=ipip_bpf
- config_device
-# tcpdump -nei br0 &
- cat /sys/kernel/debug/tracing/trace_pipe &
-
- add_ipip_tunnel
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
-
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ip netns exec at_ns2 ping -c 1 10.1.1.100
- ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
- ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
- sleep 0.2
- # tcp check _same_ IP over different tunnels
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
- cleanup
-}
-
-# IPv4 over IPv6 tunnel
-function test_ipip6 {
- DEV_NS=ipip_std
- DEV=ipip_bpf
- config_device
-# tcpdump -nei br0 &
- cat /sys/kernel/debug/tracing/trace_pipe &
-
- add_ipip6_tunnel
- attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
-
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ip netns exec at_ns2 ping -c 1 10.1.1.100
- ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
- ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
- sleep 0.2
- # tcp check _same_ IP over different tunnels
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
- cleanup
-}
-
-# IPv6 over IPv6 tunnel
-function test_ip6ip6 {
- DEV_NS=ipip_std
- DEV=ipip_bpf
- config_device
-# tcpdump -nei br0 &
- cat /sys/kernel/debug/tracing/trace_pipe &
-
- add_ip6ip6_tunnel
- attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
-
- ip netns exec at_ns0 ping -6 -c 1 2601:646::2
- ip netns exec at_ns2 ping -6 -c 1 2601:646::1
- ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null
- ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null
- sleep 0.2
- # tcp check _same_ IP over different tunnels
- ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200
- ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201
- cleanup
-}
-
-function cleanup {
- set +ex
- pkill iperf
- ip netns delete at_ns0
- ip netns delete at_ns1
- ip netns delete at_ns2
- ip link del veth0
- ip link del veth1
- ip link del veth2
- ip link del br0
- pkill tcpdump
- pkill cat
- set -ex
-}
-
-cleanup
-echo "Testing IP tunnels..."
-test_ipip
-test_ipip6
-test_ip6ip6
-echo "*** PASS ***"
diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
index 73c357142268..1c161276d57b 100644
--- a/samples/bpf/test_lru_dist.c
+++ b/samples/bpf/test_lru_dist.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#define _GNU_SOURCE
#include <linux/types.h>
@@ -16,12 +13,11 @@
#include <sched.h>
#include <sys/wait.h>
#include <sys/stat.h>
-#include <sys/resource.h>
#include <fcntl.h>
#include <stdlib.h>
#include <time.h>
-#include "libbpf.h"
+#include <bpf/bpf.h>
#include "bpf_util.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -46,11 +42,6 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
list->prev = list;
}
-static inline int list_empty(const struct list_head *head)
-{
- return head->next == head;
-}
-
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
@@ -108,10 +99,10 @@ struct pfect_lru {
static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
unsigned int nr_possible_elems)
{
- lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
sizeof(unsigned long long),
sizeof(struct pfect_lru_node *),
- nr_possible_elems, 0);
+ nr_possible_elems, NULL);
assert(lru->map_fd != -1);
lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
@@ -210,10 +201,13 @@ static unsigned int read_keys(const char *dist_file,
static int create_map(int map_type, int map_flags, unsigned int size)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = map_flags,
+ );
int map_fd;
- map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
- sizeof(unsigned long long), size, map_flags);
+ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, &opts);
if (map_fd == -1)
perror("bpf_create_map");
@@ -492,7 +486,6 @@ static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
const char *dist_file;
int nr_tasks = 1;
@@ -511,8 +504,6 @@ int main(int argc, char **argv)
setbuf(stdout, NULL);
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
srand(time(NULL));
nr_cpus = bpf_num_possible_cpus();
@@ -527,7 +518,7 @@ int main(int argc, char **argv)
return -1;
}
- for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c
index bacc8013436b..9a13dbb81847 100644
--- a/samples/bpf/test_lwt_bpf.c
+++ b/samples/bpf/test_lwt_bpf.c
@@ -10,17 +10,9 @@
* General Public License for more details.
*/
-#include <stdint.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmpv6.h>
-#include <linux/if_ether.h>
-#include "bpf_helpers.h"
+#include "vmlinux.h"
+#include "net_shared.h"
+#include <bpf/bpf_helpers.h>
#include <string.h>
# define printk(fmt, ...) \
@@ -44,9 +36,9 @@ SEC("test_ctx")
int do_test_ctx(struct __sk_buff *skb)
{
skb->cb[0] = CB_MAGIC;
- printk("len %d hash %d protocol %d\n", skb->len, skb->hash,
+ printk("len %d hash %d protocol %d", skb->len, skb->hash,
skb->protocol);
- printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0],
+ printk("cb %d ingress_ifindex %d ifindex %d", skb->cb[0],
skb->ingress_ifindex, skb->ifindex);
return BPF_OK;
@@ -56,9 +48,9 @@ int do_test_ctx(struct __sk_buff *skb)
SEC("test_cb")
int do_test_cb(struct __sk_buff *skb)
{
- printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1],
+ printk("cb0: %x cb1: %x cb2: %x", skb->cb[0], skb->cb[1],
skb->cb[2]);
- printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]);
+ printk("cb3: %x cb4: %x", skb->cb[3], skb->cb[4]);
return BPF_OK;
}
@@ -72,11 +64,11 @@ int do_test_data(struct __sk_buff *skb)
struct iphdr *iph = data;
if (data + sizeof(*iph) > data_end) {
- printk("packet truncated\n");
+ printk("packet truncated");
return BPF_DROP;
}
- printk("src: %x dst: %x\n", iph->saddr, iph->daddr);
+ printk("src: %x dst: %x", iph->saddr, iph->daddr);
return BPF_OK;
}
@@ -97,7 +89,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1);
if (ret < 0) {
- printk("bpf_l4_csum_replace failed: %d\n", ret);
+ printk("bpf_l4_csum_replace failed: %d", ret);
return BPF_DROP;
}
@@ -120,14 +112,14 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip,
flags | sizeof(new_ip));
if (ret < 0) {
- printk("bpf_l4_csum_replace failed: %d\n");
+ printk("bpf_l4_csum_replace failed: %d");
return BPF_DROP;
}
}
ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
if (ret < 0) {
- printk("bpf_l3_csum_replace failed: %d\n", ret);
+ printk("bpf_l3_csum_replace failed: %d", ret);
return BPF_DROP;
}
@@ -137,7 +129,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
if (ret < 0) {
- printk("bpf_skb_store_bytes() failed: %d\n", ret);
+ printk("bpf_skb_store_bytes() failed: %d", ret);
return BPF_DROP;
}
@@ -153,12 +145,12 @@ int do_test_rewrite(struct __sk_buff *skb)
ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4);
if (ret < 0) {
- printk("bpf_skb_load_bytes failed: %d\n", ret);
+ printk("bpf_skb_load_bytes failed: %d", ret);
return BPF_DROP;
}
if (old_ip == 0x2fea8c0) {
- printk("out: rewriting from %x to %x\n", old_ip, new_ip);
+ printk("out: rewriting from %x to %x", old_ip, new_ip);
return rewrite(skb, old_ip, new_ip, 1);
}
@@ -173,16 +165,16 @@ static inline int __do_push_ll_and_redirect(struct __sk_buff *skb)
ret = bpf_skb_change_head(skb, 14, 0);
if (ret < 0) {
- printk("skb_change_head() failed: %d\n", ret);
+ printk("skb_change_head() failed: %d", ret);
}
- ehdr.h_proto = __constant_htons(ETH_P_IP);
+ ehdr.h_proto = bpf_htons(ETH_P_IP);
memcpy(&ehdr.h_source, &smac, 6);
memcpy(&ehdr.h_dest, &dmac, 6);
ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0);
if (ret < 0) {
- printk("skb_store_bytes() failed: %d\n", ret);
+ printk("skb_store_bytes() failed: %d", ret);
return BPF_DROP;
}
@@ -202,7 +194,7 @@ int do_push_ll_and_redirect(struct __sk_buff *skb)
ret = __do_push_ll_and_redirect(skb);
if (ret >= 0)
- printk("redirected to %d\n", ifindex);
+ printk("redirected to %d", ifindex);
return ret;
}
@@ -229,7 +221,7 @@ SEC("fill_garbage")
int do_fill_garbage(struct __sk_buff *skb)
{
__fill_garbage(skb);
- printk("Set initial 96 bytes of header to FF\n");
+ printk("Set initial 96 bytes of header to FF");
return BPF_OK;
}
@@ -238,7 +230,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb)
{
int ifindex = DST_IFINDEX;
__fill_garbage(skb);
- printk("redirected to %d\n", ifindex);
+ printk("redirected to %d", ifindex);
return bpf_redirect(ifindex, 0);
}
@@ -246,7 +238,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb)
SEC("drop_all")
int do_drop_all(struct __sk_buff *skb)
{
- printk("dropping with: %d\n", BPF_DROP);
+ printk("dropping with: %d", BPF_DROP);
return BPF_DROP;
}
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
index a695ae268c40..148e2df6cdce 100644..100755
--- a/samples/bpf/test_lwt_bpf.sh
+++ b/samples/bpf/test_lwt_bpf.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
# Uncomment to see generated bytecode
#VERBOSE=verbose
@@ -18,7 +19,10 @@ IPVETH3="192.168.111.2"
IP_LOCAL="192.168.99.1"
-TRACE_ROOT=/sys/kernel/debug/tracing
+PROG_SRC="test_lwt_bpf.c"
+BPF_PROG="test_lwt_bpf.o"
+TRACE_ROOT=/sys/kernel/tracing
+CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context)
function lookup_mac()
{
@@ -35,7 +39,7 @@ function lookup_mac()
function cleanup {
set +ex
- rm test_lwt_bpf.o 2> /dev/null
+ rm $BPF_PROG 2> /dev/null
ip link del $VETH0 2> /dev/null
ip link del $VETH1 2> /dev/null
ip link del $VETH2 2> /dev/null
@@ -75,7 +79,7 @@ function install_test {
cleanup_routes
cp /dev/null ${TRACE_ROOT}/trace
- OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE"
+ OPTS="encap bpf headroom 14 $1 obj $BPF_PROG section $2 $VERBOSE"
if [ "$1" == "in" ]; then
ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo
@@ -95,7 +99,7 @@ function remove_prog {
function filter_trace {
# Add newline to allow starting EXPECT= variables on newline
NL=$'\n'
- echo "${NL}$*" | sed -e 's/^.*: : //g'
+ echo "${NL}$*" | sed -e 's/bpf_trace_printk: //g'
}
function expect_fail {
@@ -159,11 +163,11 @@ function test_ctx_out {
failure "test_ctx out: packets are dropped"
}
match_trace "$(get_trace)" "
-len 84 hash 0 protocol 0
+len 84 hash 0 protocol 8
cb 1234 ingress_ifindex 0 ifindex 0
-len 84 hash 0 protocol 0
+len 84 hash 0 protocol 8
cb 1234 ingress_ifindex 0 ifindex 0
-len 84 hash 0 protocol 0
+len 84 hash 0 protocol 8
cb 1234 ingress_ifindex 0 ifindex 0" || exit 1
remove_prog out
}
@@ -366,14 +370,15 @@ setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b
setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3
ip netns exec $NS1 netserver
echo 1 > ${TRACE_ROOT}/tracing_on
+echo nocontext-info > ${TRACE_ROOT}/trace_options
DST_MAC=$(lookup_mac $VETH1 $NS1)
SRC_MAC=$(lookup_mac $VETH0)
DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex)
-CLANG_OPTS="-O2 -target bpf -I ../include/"
+CLANG_OPTS="-O2 --target=bpf -I ../include/"
CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX"
-clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o
+clang $CLANG_OPTS -c $PROG_SRC -o $BPF_PROG
test_ctx_xmit
test_ctx_out
@@ -396,4 +401,5 @@ test_netperf_redirect
cleanup
echo 0 > ${TRACE_ROOT}/tracing_on
+echo $CONTEXT_INFO > ${TRACE_ROOT}/trace_options
exit 0
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map.bpf.c
index 42c44d091dd1..9f030f9c4e1b 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map.bpf.c
@@ -6,69 +6,72 @@
* License as published by the Free Software Foundation.
*/
#define KBUILD_MODNAME "foo"
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/in6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
#define MAX_NR_PORTS 65536
+#define EINVAL 22
+#define ENOENT 2
+
/* map #0 */
-struct bpf_map_def SEC("maps") port_a = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = MAX_NR_PORTS,
-};
+struct inner_a {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, MAX_NR_PORTS);
+} port_a SEC(".maps");
/* map #1 */
-struct bpf_map_def SEC("maps") port_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct inner_h {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} port_h SEC(".maps");
/* map #2 */
-struct bpf_map_def SEC("maps") reg_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} reg_result_h SEC(".maps");
/* map #3 */
-struct bpf_map_def SEC("maps") inline_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} inline_result_h SEC(".maps");
/* map #4 */ /* Test case #0 */
-struct bpf_map_def SEC("maps") a_of_port_a = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = MAX_NR_PORTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_PORTS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} a_of_port_a SEC(".maps");
/* map #5 */ /* Test case #1 */
-struct bpf_map_def SEC("maps") h_of_port_a = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} h_of_port_a SEC(".maps");
/* map #6 */ /* Test case #2 */
-struct bpf_map_def SEC("maps") h_of_port_h = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 1, /* map_fd[1] is port_h */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_h); /* use inner_h as inner map */
+} h_of_port_h SEC(".maps");
static __always_inline int do_reg_lookup(void *inner_map, u32 port)
{
@@ -100,23 +103,19 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
return result ? *result : -ENOENT;
}
-SEC("kprobe/sys_connect")
-int trace_sys_connect(struct pt_regs *ctx)
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(trace_sys_connect, unsigned int fd, struct sockaddr_in6 *in6, int addrlen)
{
- struct sockaddr_in6 *in6;
u16 test_case, port, dst6[8];
- int addrlen, ret, inline_ret, ret_key = 0;
+ int ret, inline_ret, ret_key = 0;
u32 port_key;
void *outer_map, *inner_map;
bool inline_hash = false;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
-
if (addrlen != sizeof(*in6))
return 0;
- ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+ ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
if (ret) {
inline_ret = ret;
goto done;
@@ -127,7 +126,7 @@ int trace_sys_connect(struct pt_regs *ctx)
test_case = dst6[7];
- ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+ ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
if (ret) {
inline_ret = ret;
goto done;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index 1aca18539d8d..55dca43f3723 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
-#include <sys/resource.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdint.h>
@@ -13,8 +9,12 @@
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+
+static int map_fd[7];
#define PORT_A (map_fd[0])
#define PORT_H (map_fd[1])
@@ -30,7 +30,7 @@ static const char * const test_names[] = {
"Hash of Hash",
};
-#define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
+#define NR_TESTS ARRAY_SIZE(test_names)
static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
{
@@ -38,7 +38,7 @@ static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
uint32_t info_len = sizeof(info);
int ret, id;
- ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len);
+ ret = bpf_map_get_info_by_fd(inner_map_fd, &info, &info_len);
assert(!ret);
ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id);
@@ -115,19 +115,54 @@ static void test_map_in_map(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "trace_sys_connect");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "port_a");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "port_h");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "reg_result_h");
+ map_fd[3] = bpf_object__find_map_fd_by_name(obj, "inline_result_h");
+ map_fd[4] = bpf_object__find_map_fd_by_name(obj, "a_of_port_a");
+ map_fd[5] = bpf_object__find_map_fd_by_name(obj, "h_of_port_a");
+ map_fd[6] = bpf_object__find_map_fd_by_name(obj, "h_of_port_h");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0 ||
+ map_fd[3] < 0 || map_fd[4] < 0 || map_fd[5] < 0 || map_fd[6] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
test_map_in_map();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
deleted file mode 100644
index 468a66a92ef9..000000000000
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/version.h>
-#include <linux/ptrace.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
-
-SEC("kprobe/__set_task_comm")
-int prog(struct pt_regs *ctx)
-{
- struct signal_struct *signal;
- struct task_struct *tsk;
- char oldcomm[16] = {};
- char newcomm[16] = {};
- u16 oom_score_adj;
- u32 pid;
-
- tsk = (void *)PT_REGS_PARM1(ctx);
-
- pid = _(tsk->pid);
- bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
- signal = _(tsk->signal);
- oom_score_adj = _(signal->oom_score_adj);
- return 0;
-}
-
-SEC("kprobe/urandom_read")
-int prog2(struct pt_regs *ctx)
-{
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
deleted file mode 100644
index 38f5c0b9da9f..000000000000
--- a/samples/bpf/test_overhead_tp_kern.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-/* from /sys/kernel/debug/tracing/events/task/task_rename/format */
-struct task_rename {
- __u64 pad;
- __u32 pid;
- char oldcomm[16];
- char newcomm[16];
- __u16 oom_score_adj;
-};
-SEC("tracepoint/task/task_rename")
-int prog(struct task_rename *ctx)
-{
- return 0;
-}
-
-/* from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct urandom_read {
- __u64 pad;
- int got_bits;
- int pool_left;
- int input_left;
-};
-SEC("tracepoint/random/urandom_read")
-int prog2(struct urandom_read *ctx)
-{
- return 0;
-}
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
deleted file mode 100644
index d291167fd3c7..000000000000
--- a/samples/bpf/test_overhead_user.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define _GNU_SOURCE
-#include <sched.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <asm/unistd.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <assert.h>
-#include <sys/wait.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
-#include <time.h>
-#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-
-#define MAX_CNT 1000000
-
-static __u64 time_get_ns(void)
-{
- struct timespec ts;
-
- clock_gettime(CLOCK_MONOTONIC, &ts);
- return ts.tv_sec * 1000000000ull + ts.tv_nsec;
-}
-
-static void test_task_rename(int cpu)
-{
- __u64 start_time;
- char buf[] = "test\n";
- int i, fd;
-
- fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
- if (fd < 0) {
- printf("couldn't open /proc\n");
- exit(1);
- }
- start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
- write(fd, buf, sizeof(buf));
- printf("task_rename:%d: %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
- close(fd);
-}
-
-static void test_urandom_read(int cpu)
-{
- __u64 start_time;
- char buf[4];
- int i, fd;
-
- fd = open("/dev/urandom", O_RDONLY);
- if (fd < 0) {
- printf("couldn't open /dev/urandom\n");
- exit(1);
- }
- start_time = time_get_ns();
- for (i = 0; i < MAX_CNT; i++)
- read(fd, buf, sizeof(buf));
- printf("urandom_read:%d: %lld events per sec\n",
- cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
- close(fd);
-}
-
-static void loop(int cpu, int flags)
-{
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
-
- if (flags & 1)
- test_task_rename(cpu);
- if (flags & 2)
- test_urandom_read(cpu);
-}
-
-static void run_perf_test(int tasks, int flags)
-{
- pid_t pid[tasks];
- int i;
-
- for (i = 0; i < tasks; i++) {
- pid[i] = fork();
- if (pid[i] == 0) {
- loop(i, flags);
- exit(0);
- } else if (pid[i] == -1) {
- printf("couldn't spawn #%d process\n", i);
- exit(1);
- }
- }
- for (i = 0; i < tasks; i++) {
- int status;
-
- assert(waitpid(pid[i], &status, 0) == pid[i]);
- assert(status == 0);
- }
-}
-
-static void unload_progs(void)
-{
- close(prog_fd[0]);
- close(prog_fd[1]);
- close(event_fd[0]);
- close(event_fd[1]);
-}
-
-int main(int argc, char **argv)
-{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- char filename[256];
- int num_cpu = 8;
- int test_flags = ~0;
-
- setrlimit(RLIMIT_MEMLOCK, &r);
-
- if (argc > 1)
- test_flags = atoi(argv[1]) ? : test_flags;
- if (argc > 2)
- num_cpu = atoi(argv[2]) ? : num_cpu;
-
- if (test_flags & 0x3) {
- printf("BASE\n");
- run_perf_test(num_cpu, test_flags);
- }
-
- if (test_flags & 0xC) {
- snprintf(filename, sizeof(filename),
- "%s_kprobe_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
- printf("w/KPROBE\n");
- run_perf_test(num_cpu, test_flags >> 2);
- unload_progs();
- }
-
- if (test_flags & 0x30) {
- snprintf(filename, sizeof(filename),
- "%s_tp_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
- printf("w/TRACEPOINT\n");
- run_perf_test(num_cpu, test_flags >> 4);
- unload_progs();
- }
-
- return 0;
-}
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
deleted file mode 100644
index 3a677c807044..000000000000
--- a/samples/bpf/test_probe_write_user_kern.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <uapi/linux/bpf.h>
-#include <linux/version.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") dnat_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct sockaddr_in),
- .value_size = sizeof(struct sockaddr_in),
- .max_entries = 256,
-};
-
-/* kprobe is NOT a stable ABI
- * kernel functions can be removed, renamed or completely change semantics.
- * Number of arguments and their positions can change, etc.
- * In such case this bpf+kprobe example will no longer be meaningful
- *
- * This example sits on a syscall, and the syscall ABI is relatively stable
- * of course, across platforms, and over time, the ABI may change.
- */
-SEC("kprobe/sys_connect")
-int bpf_prog1(struct pt_regs *ctx)
-{
- struct sockaddr_in new_addr, orig_addr = {};
- struct sockaddr_in *mapped_addr;
- void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
- int sockaddr_len = (int)PT_REGS_PARM3(ctx);
-
- if (sockaddr_len > sizeof(orig_addr))
- return 0;
-
- if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
- return 0;
-
- mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
- if (mapped_addr != NULL) {
- memcpy(&new_addr, mapped_addr, sizeof(new_addr));
- bpf_probe_write_user(sockaddr_arg, &new_addr,
- sizeof(new_addr));
- }
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
deleted file mode 100644
index b5bf178a6ecc..000000000000
--- a/samples/bpf/test_probe_write_user_user.c
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <stdio.h>
-#include <assert.h>
-#include <linux/bpf.h>
-#include <unistd.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-#include <sys/socket.h>
-#include <string.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-
-int main(int ac, char **argv)
-{
- int serverfd, serverconnfd, clientfd;
- socklen_t sockaddr_len;
- struct sockaddr serv_addr, mapped_addr, tmp_addr;
- struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
- char filename[256];
- char *ip;
-
- serv_addr_in = (struct sockaddr_in *)&serv_addr;
- mapped_addr_in = (struct sockaddr_in *)&mapped_addr;
- tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
-
- assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
- assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
-
- /* Bind server to ephemeral port on lo */
- memset(&serv_addr, 0, sizeof(serv_addr));
- serv_addr_in->sin_family = AF_INET;
- serv_addr_in->sin_port = 0;
- serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
- assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0);
-
- sockaddr_len = sizeof(serv_addr);
- assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0);
- ip = inet_ntoa(serv_addr_in->sin_addr);
- printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port));
-
- memset(&mapped_addr, 0, sizeof(mapped_addr));
- mapped_addr_in->sin_family = AF_INET;
- mapped_addr_in->sin_port = htons(5555);
- mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
-
- assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
-
- assert(listen(serverfd, 5) == 0);
-
- ip = inet_ntoa(mapped_addr_in->sin_addr);
- printf("Client connecting to: %s:%d\n",
- ip, ntohs(mapped_addr_in->sin_port));
- assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0);
-
- sockaddr_len = sizeof(tmp_addr);
- ip = inet_ntoa(tmp_addr_in->sin_addr);
- assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0);
- printf("Server received connection from: %s:%d\n",
- ip, ntohs(tmp_addr_in->sin_port));
-
- sockaddr_len = sizeof(tmp_addr);
- assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0);
- ip = inet_ntoa(tmp_addr_in->sin_addr);
- printf("Client's peer address: %s:%d\n",
- ip, ntohs(tmp_addr_in->sin_port));
-
- /* Is the server's getsockname = the socket getpeername */
- assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
-
- return 0;
-}
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
deleted file mode 100755
index a70d2ea90313..000000000000
--- a/samples/bpf/test_tunnel_bpf.sh
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/bin/bash
-# In Namespace 0 (at_ns0) using native tunnel
-# Overlay IP: 10.1.1.100
-# local 192.16.1.100 remote 192.16.1.200
-# veth0 IP: 172.16.1.100, tunnel dev <type>00
-
-# Out of Namespace using BPF set/get on lwtunnel
-# Overlay IP: 10.1.1.200
-# local 172.16.1.200 remote 172.16.1.100
-# veth1 IP: 172.16.1.200, tunnel dev <type>11
-
-function config_device {
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth1
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link set dev veth1 up mtu 1500
- ip addr add dev veth1 172.16.1.200/24
-}
-
-function add_gre_tunnel {
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE key 2 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_vxlan_tunnel {
- # Set static ARP entry here because iptables set-mark works
- # on L3 packet, as a result not applying to ARP packets,
- # causing errors at get_tunnel_{key/opt}.
-
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
- ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
- # out of namespace
- ip link add dev $DEV type $TYPE external gbp dstport 4789
- ip link set dev $DEV address 52:54:00:d9:02:00 up
- ip addr add dev $DEV 10.1.1.200/24
- arp -s 10.1.1.100 52:54:00:d9:01:00
-}
-
-function add_geneve_tunnel {
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE dstport 6081 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ipip_tunnel {
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function attach_bpf {
- DEV=$1
- SET_TUNNEL=$2
- GET_TUNNEL=$3
- tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
- tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
-}
-
-function test_gre {
- TYPE=gretap
- DEV_NS=gretap00
- DEV=gretap11
- config_device
- add_gre_tunnel
- attach_bpf $DEV gre_set_tunnel gre_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_vxlan {
- TYPE=vxlan
- DEV_NS=vxlan00
- DEV=vxlan11
- config_device
- add_vxlan_tunnel
- attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_geneve {
- TYPE=geneve
- DEV_NS=geneve00
- DEV=geneve11
- config_device
- add_geneve_tunnel
- attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_ipip {
- TYPE=ipip
- DEV_NS=ipip00
- DEV=ipip11
- config_device
- tcpdump -nei veth1 &
- cat /sys/kernel/debug/tracing/trace_pipe &
- add_ipip_tunnel
- ethtool -K veth1 gso off gro off rx off tx off
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
- sleep 0.2
- iperf -c 10.1.1.100 -n 5k -p 5200
- cleanup
-}
-
-function cleanup {
- set +ex
- pkill iperf
- ip netns delete at_ns0
- ip link del veth1
- ip link del ipip11
- ip link del gretap11
- ip link del vxlan11
- ip link del geneve11
- pkill tcpdump
- pkill cat
- set -ex
-}
-
-cleanup
-echo "Testing GRE tunnel..."
-test_gre
-echo "Testing VXLAN tunnel..."
-test_vxlan
-echo "Testing GENEVE tunnel..."
-test_geneve
-echo "Testing IPIP tunnel..."
-test_ipip
-echo "*** PASS ***"
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 41b6115a32eb..0bba5fcd7d24 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -5,11 +5,11 @@
* License as published by the Free Software Foundation.
*/
#include <linux/ptrace.h>
-#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/perf_event.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct key_t {
char comm[TASK_COMM_LEN];
@@ -17,19 +17,19 @@ struct key_t {
u32 userstack;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
@@ -37,10 +37,15 @@ struct bpf_map_def SEC("maps") stackmap = {
SEC("perf_event")
int bpf_prog1(struct bpf_perf_event_data *ctx)
{
+ char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
+ char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+ char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value value_buf;
struct key_t key;
u64 *val, one = 1;
+ int ret;
if (ctx->sample_period < 10000)
/* ignore warmup */
@@ -54,6 +59,15 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
return 0;
}
+ ret = bpf_perf_prog_read_value(ctx, (void *)&value_buf, sizeof(struct bpf_perf_event_value));
+ if (!ret)
+ bpf_trace_printk(time_fmt1, sizeof(time_fmt1), value_buf.enabled, value_buf.running);
+ else
+ bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
+
+ if (ctx->addr != 0)
+ bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 7bd827b84a67..9664749bf618 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -1,29 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
-#include <poll.h>
-#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/bpf.h>
#include <signal.h>
-#include <assert.h>
#include <errno.h>
#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
+#include "trace_helpers.h"
#define SAMPLE_FREQ 50
+static int pid;
+/* counts, stackmap */
+static int map_fd[2];
+struct bpf_program *prog;
static bool sys_read_seen, sys_write_seen;
static void print_ksym(__u64 addr)
@@ -33,10 +31,15 @@ static void print_ksym(__u64 addr)
if (!addr)
return;
sym = ksym_search(addr);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ return;
+ }
+
printf("%s;", sym->name);
- if (!strcmp(sym->name, "sys_read"))
+ if (!strstr(sym->name, "sys_read"))
sys_read_seen = true;
- else if (!strcmp(sym->name, "sys_write"))
+ else if (!strstr(sym->name, "sys_write"))
sys_write_seen = true;
}
@@ -88,10 +91,10 @@ static void print_stack(struct key_t *key, __u64 count)
}
}
-static void int_exit(int sig)
+static void err_exit(int err)
{
- kill(0, SIGKILL);
- exit(0);
+ kill(pid, SIGKILL);
+ exit(err);
}
static void print_stacks(void)
@@ -99,7 +102,7 @@ static void print_stacks(void)
struct key_t key = {}, next_key;
__u64 value;
__u32 stackid = 0, next_id;
- int fd = map_fd[0], stack_map = map_fd[1];
+ int error = 1, fd = map_fd[0], stack_map = map_fd[1];
sys_read_seen = sys_write_seen = false;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
@@ -111,7 +114,7 @@ static void print_stacks(void)
printf("\n");
if (!sys_read_seen || !sys_write_seen) {
printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
- int_exit(0);
+ err_exit(error);
}
/* clear stack map */
@@ -121,51 +124,93 @@ static void print_stacks(void)
}
}
+static inline int generate_load(void)
+{
+ if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) {
+ printf("failed to generate some load with dd: %s\n", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
static void test_perf_event_all_cpu(struct perf_event_attr *attr)
{
- int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- int *pmu_fd = malloc(nr_cpus * sizeof(int));
- int i, error = 0;
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link **links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ int i, pmu_fd, error = 1;
+
+ if (!links) {
+ printf("malloc of links failed\n");
+ goto err;
+ }
+
+ /* system wide perf event, no need to inherit */
+ attr->inherit = 0;
/* open perf_event on all cpus */
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
- if (pmu_fd[i] < 0) {
+ pmu_fd = sys_perf_event_open(attr, -1, i, -1, 0);
+ if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- error = 1;
goto all_cpu_err;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ printf("bpf_program__attach_perf_event failed\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ goto all_cpu_err;
+ }
}
- system("dd if=/dev/zero of=/dev/null count=5000k status=none");
+
+ if (generate_load() < 0)
+ goto all_cpu_err;
+
print_stacks();
+ error = 0;
all_cpu_err:
- for (i--; i >= 0; i--) {
- ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
- close(pmu_fd[i]);
- }
- free(pmu_fd);
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+err:
+ free(links);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_perf_event_task(struct perf_event_attr *attr)
{
- int pmu_fd;
+ struct bpf_link *link = NULL;
+ int pmu_fd, error = 1;
+
+ /* per task perf event, enable inherit so the "dd ..." command can be traced properly.
+ * Enabling inherit will cause bpf_perf_prog_read_time helper failure.
+ */
+ attr->inherit = 1;
/* open task bound event */
pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- int_exit(0);
+ goto err;
}
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
- system("dd if=/dev/zero of=/dev/null count=5000k status=none");
+ link = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(link)) {
+ printf("bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(pmu_fd);
+ goto err;
+ }
+
+ if (generate_load() < 0)
+ goto err;
+
print_stacks();
- ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
- close(pmu_fd);
+ error = 0;
+err:
+ bpf_link__destroy(link);
+ if (error)
+ err_exit(error);
}
static void test_bpf_perf_event(void)
@@ -175,14 +220,12 @@ static void test_bpf_perf_event(void)
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
- .inherit = 1,
};
struct perf_event_attr attr_type_sw = {
.sample_freq = SAMPLE_FREQ,
.freq = 1,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
- .inherit = 1,
};
struct perf_event_attr attr_hw_cache_l1d = {
.sample_freq = SAMPLE_FREQ,
@@ -192,7 +235,6 @@ static void test_bpf_perf_event(void)
PERF_COUNT_HW_CACHE_L1D |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
- .inherit = 1,
};
struct perf_event_attr attr_hw_cache_branch_miss = {
.sample_freq = SAMPLE_FREQ,
@@ -202,7 +244,6 @@ static void test_bpf_perf_event(void)
PERF_COUNT_HW_CACHE_BPU |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
- .inherit = 1,
};
struct perf_event_attr attr_type_raw = {
.sample_freq = SAMPLE_FREQ,
@@ -210,7 +251,17 @@ static void test_bpf_perf_event(void)
.type = PERF_TYPE_RAW,
/* Intel Instruction Retired */
.config = 0xc0,
- .inherit = 1,
+ };
+ struct perf_event_attr attr_type_raw_lock_load = {
+ .sample_freq = SAMPLE_FREQ,
+ .freq = 1,
+ .type = PERF_TYPE_RAW,
+ /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+ .config = 0x21d0,
+ /* Request to record lock address from PEBS */
+ .sample_type = PERF_SAMPLE_ADDR,
+ /* Record address value requires precise event */
+ .precise_ip = 2,
};
printf("Test HW_CPU_CYCLES\n");
@@ -233,36 +284,69 @@ static void test_bpf_perf_event(void)
test_perf_event_all_cpu(&attr_type_raw);
test_perf_event_task(&attr_type_raw);
+ printf("Test Lock Load\n");
+ test_perf_event_all_cpu(&attr_type_raw_lock_load);
+ test_perf_event_task(&attr_type_raw_lock_load);
+
printf("*** PASS ***\n");
}
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
char filename[256];
+ int error = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ signal(SIGINT, err_exit);
+ signal(SIGTERM, err_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
- return 1;
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 2;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
}
- if (fork() == 0) {
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ printf("finding a counts/stackmap map in obj file failed\n");
+ goto cleanup;
+ }
+
+ pid = fork();
+ if (pid == 0) {
read_trace_pipe();
return 0;
+ } else if (pid == -1) {
+ printf("couldn't spawn process\n");
+ goto cleanup;
}
+
test_bpf_perf_event();
- int_exit(0);
- return 0;
+ error = 0;
+
+cleanup:
+ bpf_object__close(obj);
+ err_exit(error);
}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output.bpf.c
index 9b96f4fb8cea..565a73b51b04 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output.bpf.c
@@ -1,16 +1,15 @@
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 2);
+} my_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("ksyscall/write")
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index ccca1e348017..d316fd2c8e24 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -1,119 +1,10 @@
-/* This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
#include <fcntl.h>
#include <poll.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <time.h>
#include <signal.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-#include "perf-sys.h"
-
-static int pmu_fd;
-
-int page_size;
-int page_cnt = 8;
-volatile struct perf_event_mmap_page *header;
-
-typedef void (*print_fn)(void *data, int size);
-
-static int perf_event_mmap(int fd)
-{
- void *base;
- int mmap_size;
-
- page_size = getpagesize();
- mmap_size = page_size * (page_cnt + 1);
-
- base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (base == MAP_FAILED) {
- printf("mmap err\n");
- return -1;
- }
-
- header = base;
- return 0;
-}
-
-static int perf_event_poll(int fd)
-{
- struct pollfd pfd = { .fd = fd, .events = POLLIN };
-
- return poll(&pfd, 1, 1000);
-}
-
-struct perf_event_sample {
- struct perf_event_header header;
- __u32 size;
- char data[];
-};
-
-static void perf_event_read(print_fn fn)
-{
- __u64 data_tail = header->data_tail;
- __u64 data_head = header->data_head;
- __u64 buffer_size = page_cnt * page_size;
- void *base, *begin, *end;
- char buf[256];
-
- asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
- if (data_head == data_tail)
- return;
-
- base = ((char *)header) + page_size;
-
- begin = base + data_tail % buffer_size;
- end = base + data_head % buffer_size;
-
- while (begin != end) {
- struct perf_event_sample *e;
-
- e = begin;
- if (begin + e->header.size > base + buffer_size) {
- long len = base + buffer_size - begin;
-
- assert(len < e->header.size);
- memcpy(buf, begin, len);
- memcpy(buf + len, base, e->header.size - len);
- e = (void *) buf;
- begin = base + e->header.size - len;
- } else if (begin + e->header.size == base + buffer_size) {
- begin = base;
- } else {
- begin += e->header.size;
- }
-
- if (e->header.type == PERF_RECORD_SAMPLE) {
- fn(e->data, e->size);
- } else if (e->header.type == PERF_RECORD_LOST) {
- struct {
- struct perf_event_header header;
- __u64 id;
- __u64 lost;
- } *lost = (void *) e;
- printf("lost %lld events\n", lost->lost);
- } else {
- printf("unknown event type=%d size=%d\n",
- e->header.type, e->header.size);
- }
- }
-
- __sync_synchronize(); /* smp_mb() */
- header->data_tail = data_head;
-}
+#include <bpf/libbpf.h>
static __u64 time_get_ns(void)
{
@@ -124,12 +15,12 @@ static __u64 time_get_ns(void)
}
static __u64 start_time;
+static __u64 cnt;
#define MAX_CNT 100000ll
-static void print_bpf_output(void *data, int size)
+static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
{
- static __u64 cnt;
struct {
__u64 pid;
__u64 cookie;
@@ -138,7 +29,7 @@ static void print_bpf_output(void *data, int size)
if (e->cookie != 0x12345678) {
printf("BUG pid %llx cookie %llx sized %d\n",
e->pid, e->cookie, size);
- kill(0, SIGINT);
+ return;
}
cnt++;
@@ -146,51 +37,69 @@ static void print_bpf_output(void *data, int size)
if (cnt == MAX_CNT) {
printf("recv %lld events per sec\n",
MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
- kill(0, SIGINT);
+ return;
}
}
-static void test_bpf_perf_event(void)
-{
- struct perf_event_attr attr = {
- .sample_type = PERF_SAMPLE_RAW,
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_BPF_OUTPUT,
- };
- int key = 0;
-
- pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
-
- assert(pmu_fd >= 0);
- assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
- ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-}
-
int main(int argc, char **argv)
{
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct perf_buffer *pb;
+ struct bpf_object *obj;
+ int map_fd, ret = 0;
char filename[256];
FILE *f;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
}
- test_bpf_perf_event();
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
- if (perf_event_mmap(pmu_fd) < 0)
+ pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL);
+ ret = libbpf_get_error(pb);
+ if (ret) {
+ printf("failed to setup perf_buffer: %d\n", ret);
return 1;
+ }
f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
(void) f;
start_time = time_get_ns();
- for (;;) {
- perf_event_poll(pmu_fd);
- perf_event_read(print_bpf_output);
+ while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
}
+ kill(0, SIGINT);
- return 0;
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+ return ret;
}
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1.bpf.c
index 107da148820f..ceedf0b1d479 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1.bpf.c
@@ -4,36 +4,35 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <uapi/linux/bpf.h>
+#include "vmlinux.h"
+#include "net_shared.h"
#include <linux/version.h>
-#include "bpf_helpers.h"
-
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
* Number of arguments and their positions can change, etc.
* In such case this bpf+kprobe example will no longer be meaningful
*/
-SEC("kprobe/__netif_receive_skb_core")
+SEC("kprobe.multi/__netif_receive_skb_core*")
int bpf_prog1(struct pt_regs *ctx)
{
- /* attaches to kprobe netif_receive_skb,
- * looks for packets on loobpack device and prints them
+ /* attaches to kprobe __netif_receive_skb_core,
+ * looks for packets on loopback device and prints them
+ * (wildcard is used for avoiding symbol mismatch due to optimization)
*/
char devname[IFNAMSIZ];
struct net_device *dev;
struct sk_buff *skb;
int len;
- /* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
- dev = _(skb->dev);
- len = _(skb->len);
+ bpf_core_read(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
+ dev = BPF_CORE_READ(skb, dev);
+ len = BPF_CORE_READ(skb, len);
- bpf_probe_read(devname, sizeof(devname), dev->name);
+ BPF_CORE_READ_STR_INTO(&devname, dev, name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
index 31a48183beea..8c3d9043a2b6 100644
--- a/samples/bpf/tracex1_user.c
+++ b/samples/bpf/tracex1_user.c
@@ -1,19 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include "trace_helpers.h"
int main(int ac, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ FILE *f;
+
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
f = popen("taskset 1 ping -c5 localhost", "r");
@@ -21,5 +43,8 @@ int main(int ac, char **argv)
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
deleted file mode 100644
index 5e11c20ce5ec..000000000000
--- a/samples/bpf/tracex2_kern.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-
-/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
- * example will no longer be meaningful
- */
-SEC("kprobe/kfree_skb")
-int bpf_prog2(struct pt_regs *ctx)
-{
- long loc = 0;
- long init_val = 1;
- long *value;
-
- /* read ip of kfree_skb caller.
- * non-portable version of __builtin_return_address(0)
- */
- BPF_KPROBE_READ_RET_IP(loc, ctx);
-
- value = bpf_map_lookup_elem(&my_map, &loc);
- if (value)
- *value += 1;
- else
- bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
- return 0;
-}
-
-static unsigned int log2(unsigned int v)
-{
- unsigned int r;
- unsigned int shift;
-
- r = (v > 0xFFFF) << 4; v >>= r;
- shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
- shift = (v > 0xF) << 2; v >>= shift; r |= shift;
- shift = (v > 0x3) << 1; v >>= shift; r |= shift;
- r |= (v >> 1);
- return r;
-}
-
-static unsigned int log2l(unsigned long v)
-{
- unsigned int hi = v >> 32;
- if (hi)
- return log2(hi) + 32;
- else
- return log2(v);
-}
-
-struct hist_key {
- char comm[16];
- u64 pid_tgid;
- u64 uid_gid;
- u64 index;
-};
-
-struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(struct hist_key),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-
-SEC("kprobe/sys_write")
-int bpf_prog3(struct pt_regs *ctx)
-{
- long write_size = PT_REGS_PARM3(ctx);
- long init_val = 1;
- long *value;
- struct hist_key key;
-
- key.index = log2l(write_size);
- key.pid_tgid = bpf_get_current_pid_tgid();
- key.uid_gid = bpf_get_current_uid_gid();
- bpf_get_current_comm(&key.comm, sizeof(key.comm));
-
- value = bpf_map_lookup_elem(&my_hist_map, &key);
- if (value)
- __sync_fetch_and_add(value, 1);
- else
- bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
- return 0;
-}
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
deleted file mode 100644
index 7321a3f253c9..000000000000
--- a/samples/bpf/tracex2_user.c
+++ /dev/null
@@ -1,159 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
-#include <sys/resource.h>
-
-#include "libbpf.h"
-#include "bpf_load.h"
-#include "bpf_util.h"
-
-#define MAX_INDEX 64
-#define MAX_STARS 38
-
-static void stars(char *str, long val, long max, int width)
-{
- int i;
-
- for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
- str[i] = '*';
- if (val > max)
- str[i - 1] = '+';
- str[i] = '\0';
-}
-
-struct task {
- char comm[16];
- __u64 pid_tgid;
- __u64 uid_gid;
-};
-
-struct hist_key {
- struct task t;
- __u32 index;
-};
-
-#define SIZE sizeof(struct task)
-
-static void print_hist_for_pid(int fd, void *task)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct hist_key key = {}, next_key;
- long values[nr_cpus];
- char starstr[MAX_STARS];
- long value;
- long data[MAX_INDEX] = {};
- int max_ind = -1;
- long max_value = 0;
- int i, ind;
-
- while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
- if (memcmp(&next_key, task, SIZE)) {
- key = next_key;
- continue;
- }
- bpf_map_lookup_elem(fd, &next_key, values);
- value = 0;
- for (i = 0; i < nr_cpus; i++)
- value += values[i];
- ind = next_key.index;
- data[ind] = value;
- if (value && ind > max_ind)
- max_ind = ind;
- if (value > max_value)
- max_value = value;
- key = next_key;
- }
-
- printf(" syscall write() stats\n");
- printf(" byte_size : count distribution\n");
- for (i = 1; i <= max_ind + 1; i++) {
- stars(starstr, data[i - 1], max_value, MAX_STARS);
- printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
- (1l << i) >> 1, (1l << i) - 1, data[i - 1],
- MAX_STARS, starstr);
- }
-}
-
-static void print_hist(int fd)
-{
- struct hist_key key = {}, next_key;
- static struct task tasks[1024];
- int task_cnt = 0;
- int i;
-
- while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
- int found = 0;
-
- for (i = 0; i < task_cnt; i++)
- if (memcmp(&tasks[i], &next_key, SIZE) == 0)
- found = 1;
- if (!found)
- memcpy(&tasks[task_cnt++], &next_key, SIZE);
- key = next_key;
- }
-
- for (i = 0; i < task_cnt; i++) {
- printf("\npid %d cmd %s uid %d\n",
- (__u32) tasks[i].pid_tgid,
- tasks[i].comm,
- (__u32) tasks[i].uid_gid);
- print_hist_for_pid(fd, &tasks[i]);
- }
-
-}
-
-static void int_exit(int sig)
-{
- print_hist(map_fd[1]);
- exit(0);
-}
-
-int main(int ac, char **argv)
-{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
- char filename[256];
- long key, next_key, value;
- FILE *f;
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- /* start 'ping' in the background to have some kfree_skb events */
- f = popen("ping -c5 localhost", "r");
- (void) f;
-
- /* start 'dd' in the background to have plenty of 'write' syscalls */
- f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
- (void) f;
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
-
- for (i = 0; i < 5; i++) {
- key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
- printf("location 0x%lx count %ld\n", next_key, value);
- key = next_key;
- }
- if (key)
- printf("\n");
- sleep(1);
- }
- print_hist(map_fd[1]);
-
- return 0;
-}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3.bpf.c
index 9974c3d7c18b..41f37966f5f5 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3.bpf.c
@@ -4,29 +4,35 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(u64),
- .max_entries = 4096,
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct start_key {
+ dev_t dev;
+ u32 _pad;
+ sector_t sector;
};
-/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
- * example will no longer be meaningful
- */
-SEC("kprobe/blk_start_request")
-int bpf_prog1(struct pt_regs *ctx)
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, u64);
+ __uint(max_entries, 4096);
+} my_map SEC(".maps");
+
+/* from /sys/kernel/tracing/events/block/block_io_start/format */
+SEC("tracepoint/block/block_io_start")
+int bpf_prog1(struct trace_event_raw_block_rq *ctx)
{
- long rq = PT_REGS_PARM1(ctx);
u64 val = bpf_ktime_get_ns();
+ struct start_key key = {
+ .dev = ctx->dev,
+ .sector = ctx->sector
+ };
- bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
+ bpf_map_update_elem(&my_map, &key, &val, BPF_ANY);
return 0;
}
@@ -41,28 +47,33 @@ static unsigned int log2l(unsigned long long n)
#define SLOTS 100
-struct bpf_map_def SEC("maps") lat_map = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = SLOTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+ __uint(max_entries, SLOTS);
+} lat_map SEC(".maps");
-SEC("kprobe/blk_account_io_completion")
-int bpf_prog2(struct pt_regs *ctx)
+/* from /sys/kernel/tracing/events/block/block_io_done/format */
+SEC("tracepoint/block/block_io_done")
+int bpf_prog2(struct trace_event_raw_block_rq *ctx)
{
- long rq = PT_REGS_PARM1(ctx);
+ struct start_key key = {
+ .dev = ctx->dev,
+ .sector = ctx->sector
+ };
+
u64 *value, l, base;
u32 index;
- value = bpf_map_lookup_elem(&my_map, &rq);
+ value = bpf_map_lookup_elem(&my_map, &key);
if (!value)
return 0;
u64 cur_time = bpf_ktime_get_ns();
u64 delta = cur_time - *value;
- bpf_map_delete_elem(&my_map, &rq);
+ bpf_map_delete_elem(&my_map, &key);
/* the lines below are computing index = log10(delta)*10
* using integer arithmetic
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index fe372239d505..1002eb0323b4 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <stdlib.h>
@@ -10,15 +7,11 @@
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
-#include <linux/bpf.h>
-#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "bpf_util.h"
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-
#define SLOTS 100
static void clear_stats(int fd)
@@ -113,21 +106,11 @@ static void print_hist(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
+ int map_fd, i, j = 0;
for (i = 1; i < ac; i++) {
if (strcmp(argv[i], "-a") == 0) {
@@ -142,6 +125,35 @@ int main(int ac, char **argv)
}
}
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "lat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+
printf(" heatmap of IO latency\n");
if (text_only)
printf(" %s", sym[num_colors - 1]);
@@ -158,9 +170,14 @@ int main(int ac, char **argv)
for (i = 0; ; i++) {
if (i % 20 == 0)
print_banner();
- print_hist(map_fd[1]);
+ print_hist(map_fd);
sleep(2);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4.bpf.c
index 6dd8e384de96..d786492fd926 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4.bpf.c
@@ -4,22 +4,22 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct pair {
u64 val;
u64 ip;
};
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(struct pair),
- .max_entries = 1000000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, struct pair);
+ __uint(max_entries, 1000000);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -33,13 +33,13 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
-SEC("kretprobe/kmem_cache_alloc_node")
+SEC("kretprobe/kmem_cache_alloc_node_noprof")
int bpf_prog2(struct pt_regs *ctx)
{
long ptr = PT_REGS_RC(ctx);
long ip = 0;
- /* get ip address of kmem_cache_alloc_node() caller */
+ /* get ip address of kmem_cache_alloc_node_noprof() caller */
BPF_KRETPROBE_READ_RET_IP(ip, ctx);
struct pair v = {
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index 22c644f1f4c3..a5145ad72cbf 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <stdlib.h>
@@ -11,11 +8,9 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
-#include <linux/bpf.h>
-#include <sys/resource.h>
-#include "libbpf.h"
-#include "bpf_load.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
struct pair {
long long val;
@@ -36,11 +31,11 @@ static void print_old_objects(int fd)
__u64 key, next_key;
struct pair v;
- key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+ key = write(1, "\e[1;1H\e[2J", 11); /* clear screen */
key = -1;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &v);
+ while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(fd, &next_key, &v);
key = next_key;
if (val - v.val < 1000000000ll)
/* object was allocated more then 1 sec ago */
@@ -52,26 +47,50 @@ static void print_old_objects(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
+ int map_fd, j = 0;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
- for (i = 0; ; i++) {
- print_old_objects(map_fd[1]);
+ while (1) {
+ print_old_objects(map_fd);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5.bpf.c
index f57f4e1ea1ec..4d3d6c9b25fa 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5.bpf.c
@@ -4,26 +4,27 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/ptrace.h>
+#include "vmlinux.h"
+#include "syscall_nrs.h"
#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/seccomp.h>
#include <uapi/linux/unistd.h>
-#include "syscall_nrs.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#define __stringify(x) #x
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") progs = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
#ifdef __mips__
- .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+ __uint(max_entries, 6000); /* MIPS n64 syscalls start at 5000 */
#else
- .max_entries = 1024,
+ __uint(max_entries, 1024);
#endif
-};
+} progs SEC(".maps");
SEC("kprobe/__seccomp_filter")
int bpf_prog1(struct pt_regs *ctx)
@@ -46,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -59,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -68,12 +69,25 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
return 0;
}
+#ifdef __NR_mmap2
+PROG(SYS__NR_mmap2)(struct pt_regs *ctx)
+{
+ char fmt[] = "mmap2\n";
+
+ bpf_trace_printk(fmt, sizeof(fmt));
+ return 0;
+}
+#endif
+
+#ifdef __NR_mmap
PROG(SYS__NR_mmap)(struct pt_regs *ctx)
{
char fmt[] = "mmap\n";
+
bpf_trace_printk(fmt, sizeof(fmt));
return 0;
}
+#endif
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index 36b5925bb137..7e2d8397fb98 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -1,12 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
+#include <stdlib.h>
#include <unistd.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
-#include "libbpf.h"
-#include "bpf_load.h"
-#include <sys/resource.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "trace_helpers.h"
+#include "bpf_util.h"
+
+#ifdef __mips__
+#define MAX_ENTRIES 6000 /* MIPS n64 syscalls start at 5000 */
+#else
+#define MAX_ENTRIES 1024
+#endif
/* install fake seccomp program to enable seccomp code path inside the kernel,
* so that our kprobe attached to seccomp_phase1() can be triggered
@@ -17,7 +25,7 @@ static void install_accept_all_seccomp(void)
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
- .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .len = (unsigned short)ARRAY_SIZE(filter),
.filter = filter,
};
if (prctl(PR_SET_SECCOMP, 2, &prog))
@@ -26,16 +34,54 @@ static void install_accept_all_seccomp(void)
int main(int ac, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int key, fd, progs_fd;
+ const char *section;
char filename[256];
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ FILE *f;
+
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
+
+ progs_fd = bpf_object__find_map_fd_by_name(obj, "progs");
+ if (progs_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ /* register only syscalls to PROG_ARRAY */
+ if (sscanf(section, "kprobe/%d", &key) != 1)
+ continue;
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ fd = bpf_program__fd(prog);
+ bpf_map_update_elem(progs_fd, &key, &fd, BPF_ANY);
}
install_accept_all_seccomp();
@@ -45,5 +91,8 @@ int main(int ac, char **argv)
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex6.bpf.c b/samples/bpf/tracex6.bpf.c
new file mode 100644
index 000000000000..9b23b4737cfb
--- /dev/null
+++ b/samples/bpf/tracex6.bpf.c
@@ -0,0 +1,81 @@
+#include "vmlinux.h"
+#include <linux/version.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 64);
+} counters SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, 64);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct bpf_perf_event_value);
+ __uint(max_entries, 64);
+} values2 SEC(".maps");
+
+SEC("kprobe/htab_map_get_next_key")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_smp_processor_id();
+ u64 count, *val;
+ s64 error;
+
+ count = bpf_perf_event_read(&counters, key);
+ error = (s64)count;
+ if (error <= -2 && error >= -22)
+ return 0;
+
+ val = bpf_map_lookup_elem(&values, &key);
+ if (val)
+ *val = count;
+ else
+ bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST);
+
+ return 0;
+}
+
+/*
+ * Since *_map_lookup_elem can't be expected to trigger bpf programs
+ * due to potential deadlocks (bpf_disable_instrumentation), this bpf
+ * program will be attached to bpf_map_copy_value (which is called
+ * from map_lookup_elem) and will only filter the hashtable type.
+ */
+SEC("kprobe/bpf_map_copy_value")
+int BPF_KPROBE(bpf_prog2, struct bpf_map *map)
+{
+ u32 key = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value *val, buf;
+ enum bpf_map_type type;
+ int error;
+
+ type = BPF_CORE_READ(map, map_type);
+ if (type != BPF_MAP_TYPE_HASH)
+ return 0;
+
+ error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf));
+ if (error)
+ return 0;
+
+ val = bpf_map_lookup_elem(&values2, &key);
+ if (val)
+ *val = buf;
+ else
+ bpf_map_update_elem(&values2, &key, &buf, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
deleted file mode 100644
index e7d180305974..000000000000
--- a/samples/bpf/tracex6_kern.c
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <linux/ptrace.h>
-#include <linux/version.h>
-#include <uapi/linux/bpf.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") counters = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = 64,
-};
-
-SEC("kprobe/htab_map_get_next_key")
-int bpf_prog1(struct pt_regs *ctx)
-{
- u32 key = bpf_get_smp_processor_id();
- u64 count, *val;
- s64 error;
-
- count = bpf_perf_event_read(&counters, key);
- error = (s64)count;
- if (error <= -2 && error >= -22)
- return 0;
-
- val = bpf_map_lookup_elem(&values, &key);
- if (val)
- *val = count;
- else
- bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST);
-
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index a05a99a0752f..ae811ac83bc2 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -1,27 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <assert.h>
#include <fcntl.h>
#include <linux/perf_event.h>
-#include <linux/bpf.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
-#include <sys/resource.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
-#include "bpf_load.h"
-#include "libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+/* counters, values, values2 */
+static int map_fd[3];
+
static void check_on_cpu(int cpu, struct perf_event_attr *attr)
{
+ struct bpf_perf_event_value value2;
int pmu_fd, error = 0;
cpu_set_t set;
__u64 value;
@@ -46,8 +49,18 @@ static void check_on_cpu(int cpu, struct perf_event_attr *attr)
fprintf(stderr, "Value missing for CPU %d\n", cpu);
error = 1;
goto on_exit;
+ } else {
+ fprintf(stderr, "CPU %d: %llu\n", cpu, value);
+ }
+ /* The above bpf_map_lookup_elem should trigger the second kprobe */
+ if (bpf_map_lookup_elem(map_fd[2], &cpu, &value2)) {
+ fprintf(stderr, "Value2 missing for CPU %d\n", cpu);
+ error = 1;
+ goto on_exit;
+ } else {
+ fprintf(stderr, "CPU %d: counter: %llu, enabled: %llu, running: %llu\n", cpu,
+ value2.counter, value2.enabled, value2.running);
}
- fprintf(stderr, "CPU %d: %llu\n", cpu, value);
on_exit:
assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error);
@@ -161,17 +174,49 @@ static void test_bpf_perf_event(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int i = 0;
+
+ snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counters");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "values");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "values2");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
- setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
test_bpf_perf_event();
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
deleted file mode 100644
index 219742106bfd..000000000000
--- a/samples/bpf/xdp1_kern.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright (c) 2016 PLUMgrid
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") rxcnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 256,
-};
-
-static int parse_ipv4(void *data, u64 nh_off, void *data_end)
-{
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- return iph->protocol;
-}
-
-static int parse_ipv6(void *data, u64 nh_off, void *data_end)
-{
- struct ipv6hdr *ip6h = data + nh_off;
-
- if (ip6h + 1 > data_end)
- return 0;
- return ip6h->nexthdr;
-}
-
-SEC("xdp1")
-int xdp_prog1(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- long *value;
- u16 h_proto;
- u64 nh_off;
- u32 ipproto;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- h_proto = eth->h_proto;
-
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
-
- if (h_proto == htons(ETH_P_IP))
- ipproto = parse_ipv4(data, nh_off, data_end);
- else if (h_proto == htons(ETH_P_IPV6))
- ipproto = parse_ipv6(data, nh_off, data_end);
- else
- ipproto = 0;
-
- value = bpf_map_lookup_elem(&rxcnt, &ipproto);
- if (value)
- *value += 1;
-
- return rc;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
deleted file mode 100644
index 2431c0321b71..000000000000
--- a/samples/bpf/xdp1_user.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/* Copyright (c) 2016 PLUMgrid
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <libgen.h>
-
-#include "bpf_load.h"
-#include "bpf_util.h"
-#include "libbpf.h"
-
-static int ifindex;
-static __u32 xdp_flags;
-
-static void int_exit(int sig)
-{
- set_link_xdp_fd(ifindex, -1, xdp_flags);
- exit(0);
-}
-
-/* simple per-protocol drop counter
- */
-static void poll_stats(int interval)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- const unsigned int nr_keys = 256;
- __u64 values[nr_cpus], prev[nr_keys][nr_cpus];
- __u32 key;
- int i;
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- sleep(interval);
-
- for (key = 0; key < nr_keys; key++) {
- __u64 sum = 0;
-
- assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[key][i]);
- if (sum)
- printf("proto %u: %10llu pkt/s\n",
- key, sum / interval);
- memcpy(prev[key], values, sizeof(values));
- }
- }
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] IFINDEX\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n",
- prog);
-}
-
-int main(int argc, char **argv)
-{
- const char *optstr = "SN";
- char filename[256];
- int opt;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- break;
- default:
- usage(basename(argv[0]));
- return 1;
- }
- }
-
- if (optind == argc) {
- usage(basename(argv[0]));
- return 1;
- }
- ifindex = strtoul(argv[optind], NULL, 0);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
-
- if (!prog_fd[0]) {
- printf("load_bpf_file: %s\n", strerror(errno));
- return 1;
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
- printf("link set xdp fd failed\n");
- return 1;
- }
-
- poll_stats(2);
-
- return 0;
-}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
deleted file mode 100644
index e01288867d15..000000000000
--- a/samples/bpf/xdp2_kern.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/* Copyright (c) 2016 PLUMgrid
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include "bpf_helpers.h"
-
-struct bpf_map_def SEC("maps") rxcnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 256,
-};
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-static int parse_ipv4(void *data, u64 nh_off, void *data_end)
-{
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- return iph->protocol;
-}
-
-static int parse_ipv6(void *data, u64 nh_off, void *data_end)
-{
- struct ipv6hdr *ip6h = data + nh_off;
-
- if (ip6h + 1 > data_end)
- return 0;
- return ip6h->nexthdr;
-}
-
-SEC("xdp1")
-int xdp_prog1(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- long *value;
- u16 h_proto;
- u64 nh_off;
- u32 ipproto;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- h_proto = eth->h_proto;
-
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
-
- if (h_proto == htons(ETH_P_IP))
- ipproto = parse_ipv4(data, nh_off, data_end);
- else if (h_proto == htons(ETH_P_IPV6))
- ipproto = parse_ipv6(data, nh_off, data_end);
- else
- ipproto = 0;
-
- value = bpf_map_lookup_elem(&rxcnt, &ipproto);
- if (value)
- *value += 1;
-
- if (ipproto == IPPROTO_UDP) {
- swap_src_dst_mac(data);
- rc = XDP_TX;
- }
-
- return rc;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh
new file mode 100755
index 000000000000..4bde9d066c46
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+#
+# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
+# eBPF programs, both for XDP and clsbpf. Shell script function
+# wrappers and even long options parsing is illustrated, for ease of
+# use.
+#
+# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
+# that need to collaborate between XDP and TC hooks. Thus, it is
+# convenient that the same tool load both programs that need to work
+# together.
+#
+BPF_FILE=xdp2skb_meta_kern.o
+DIR=$(dirname $0)
+
+[ -z "$TC" ] && TC=tc
+[ -z "$IP" ] && IP=ip
+
+function usage() {
+ echo ""
+ echo "Usage: $0 [-vfh] --dev ethX"
+ echo " -d | --dev : Network device (required)"
+ echo " --flush : Cleanup flush TC and XDP progs"
+ echo " --list : (\$LIST) List TC and XDP progs"
+ echo " -v | --verbose : (\$VERBOSE) Verbose"
+ echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)"
+ echo ""
+}
+
+## -- General shell logging cmds --
+function err() {
+ local exitcode=$1
+ shift
+ echo "ERROR: $@" >&2
+ exit $exitcode
+}
+
+function info() {
+ if [[ -n "$VERBOSE" ]]; then
+ echo "# $@"
+ fi
+}
+
+## -- Helper function calls --
+
+# Wrapper call for TC and IP
+# - Will display the offending command on failure
+function _call_cmd() {
+ local cmd="$1"
+ local allow_fail="$2"
+ shift 2
+ if [[ -n "$VERBOSE" ]]; then
+ echo "$cmd $@"
+ fi
+ if [[ -n "$DRYRUN" ]]; then
+ return
+ fi
+ $cmd "$@"
+ local status=$?
+ if (( $status != 0 )); then
+ if [[ "$allow_fail" == "" ]]; then
+ err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
+ fi
+ fi
+}
+function call_tc() {
+ _call_cmd "$TC" "" "$@"
+}
+function call_tc_allow_fail() {
+ _call_cmd "$TC" "allow_fail" "$@"
+}
+function call_ip() {
+ _call_cmd "$IP" "" "$@"
+}
+
+## --- Parse command line arguments / parameters ---
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o vfhd: \
+ --long verbose,flush,help,list,dev:,dry-run -- "$@")
+if (( $? != 0 )); then
+ err 4 "Error calling getopt"
+fi
+eval set -- "$OPTIONS"
+
+unset DEV
+unset FLUSH
+while true; do
+ case "$1" in
+ -d | --dev ) # device
+ DEV=$2
+ info "Device set to: DEV=$DEV" >&2
+ shift 2
+ ;;
+ -v | --verbose)
+ VERBOSE=yes
+ # info "Verbose mode: VERBOSE=$VERBOSE" >&2
+ shift
+ ;;
+ --dry-run )
+ DRYRUN=yes
+ VERBOSE=yes
+ info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
+ shift
+ ;;
+ -f | --flush )
+ FLUSH=yes
+ shift
+ ;;
+ --list )
+ LIST=yes
+ shift
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ -h | --help )
+ usage;
+ exit 0
+ ;;
+ * )
+ shift
+ break
+ ;;
+ esac
+done
+
+FILE="$DIR/$BPF_FILE"
+if [[ ! -e $FILE ]]; then
+ err 3 "Missing BPF object file ($FILE)"
+fi
+
+if [[ -z $DEV ]]; then
+ usage
+ err 2 "Please specify network device -- required option --dev"
+fi
+
+## -- Function calls --
+
+function list_tc()
+{
+ local device="$1"
+ shift
+ info "Listing current TC ingress rules"
+ call_tc filter show dev $device ingress
+}
+
+function list_xdp()
+{
+ local device="$1"
+ shift
+ info "Listing current XDP device($device) setting"
+ call_ip link show dev $device | grep --color=auto xdp
+}
+
+function flush_tc()
+{
+ local device="$1"
+ shift
+ info "Flush TC on device: $device"
+ call_tc_allow_fail filter del dev $device ingress
+ call_tc_allow_fail qdisc del dev $device clsact
+}
+
+function flush_xdp()
+{
+ local device="$1"
+ shift
+ info "Flush XDP on device: $device"
+ call_ip link set dev $device xdp off
+}
+
+function attach_tc_mark()
+{
+ local device="$1"
+ local file="$2"
+ local prog="tc_mark"
+ shift 2
+
+ # Re-attach clsact to clear/flush existing role
+ call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
+ call_tc qdisc add dev $device clsact
+
+ # Attach BPF prog
+ call_tc filter add dev $device ingress \
+ prio 1 handle 1 bpf da obj $file sec $prog
+}
+
+function attach_xdp_mark()
+{
+ local device="$1"
+ local file="$2"
+ local prog="xdp_mark"
+ shift 2
+
+ # Remove XDP prog in-case it's already loaded
+ # TODO: Need ip-link option to override/replace existing XDP prog
+ flush_xdp $device
+
+ # Attach XDP/BPF prog
+ call_ip link set dev $device xdp obj $file sec $prog
+}
+
+if [[ -n $FLUSH ]]; then
+ flush_tc $DEV
+ flush_xdp $DEV
+ exit 0
+fi
+
+if [[ -n $LIST ]]; then
+ list_tc $DEV
+ list_xdp $DEV
+ exit 0
+fi
+
+attach_tc_mark $DEV $FILE
+attach_xdp_mark $DEV $FILE
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
new file mode 100644
index 000000000000..3c36c25d9902
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * Example howto transfer info from XDP to SKB, e.g. skb->mark
+ * -----------------------------------------------------------
+ * This uses the XDP data_meta infrastructure, and is a cooperation
+ * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
+ *
+ * Notice: This example does not use the BPF C-loader,
+ * but instead rely on the iproute2 TC tool for loading BPF-objects.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+
+/*
+ * This struct is stored in the XDP 'data_meta' area, which is located
+ * just in-front-of the raw packet payload data. The meaning is
+ * specific to these two BPF programs that use it as a communication
+ * channel. XDP adjust/increase the area via a bpf-helper, and TC use
+ * boundary checks to see if data have been provided.
+ *
+ * The struct must be 4 byte aligned, which here is enforced by the
+ * struct __attribute__((aligned(4))).
+ */
+struct meta_info {
+ __u32 mark;
+} __attribute__((aligned(4)));
+
+SEC("xdp_mark")
+int _xdp_mark(struct xdp_md *ctx)
+{
+ struct meta_info *meta;
+ void *data;
+ int ret;
+
+ /* Reserve space in-front of data pointer for our meta info.
+ * (Notice drivers not supporting data_meta will fail here!)
+ */
+ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
+ if (ret < 0)
+ return XDP_ABORTED;
+
+ /* Notice: Kernel-side verifier requires that loading of
+ * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(),
+ * as pkt-data pointers are invalidated. Helpers that require
+ * this are determined/marked by bpf_helper_changes_pkt_data()
+ */
+ data = (void *)(unsigned long)ctx->data;
+
+ /* Check data_meta have room for meta_info struct */
+ meta = (void *)(unsigned long)ctx->data_meta;
+ if (meta + 1 > data)
+ return XDP_ABORTED;
+
+ meta->mark = 42;
+
+ return XDP_PASS;
+}
+
+SEC("tc_mark")
+int _tc_mark(struct __sk_buff *ctx)
+{
+ void *data = (void *)(unsigned long)ctx->data;
+ void *data_meta = (void *)(unsigned long)ctx->data_meta;
+ struct meta_info *meta = data_meta;
+
+ /* Check XDP gave us some data_meta */
+ if (meta + 1 > data) {
+ ctx->mark = 41;
+ /* Skip "accept" if no data_meta is avail */
+ return TC_ACT_OK;
+ }
+
+ /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
+ ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */
+
+ return TC_ACT_OK;
+}
+
+/* Manually attaching these programs:
+export DEV=ixgbe2
+export FILE=xdp2skb_meta_kern.o
+
+# via TC command
+tc qdisc del dev $DEV clsact 2> /dev/null
+tc qdisc add dev $DEV clsact
+tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
+tc filter show dev $DEV ingress
+
+# XDP via IP command:
+ip link set dev $DEV xdp off
+ip link set dev $DEV xdp obj $FILE sec xdp_mark
+
+# Use iptable to "see" if SKBs are marked
+iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29
+iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a
+
+# Hint: catch XDP_ABORTED errors via
+perf record -e xdp:*
+perf script
+
+*/
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
new file mode 100644
index 000000000000..da67bcad1c63
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program shows how to use bpf_xdp_adjust_tail() by
+ * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
+ * to be more preice in case of v4)" where receiving packets bigger then
+ * 600 bytes.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <bpf/bpf_helpers.h>
+
+#define DEFAULT_TTL 64
+#define MAX_PCKT_SIZE 600
+#define ICMP_TOOBIG_SIZE 98
+#define ICMP_TOOBIG_PAYLOAD_SIZE 92
+
+/* volatile to prevent compiler optimizations */
+static volatile __u32 max_pcktsz = MAX_PCKT_SIZE;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 1);
+} icmpcnt SEC(".maps");
+
+static __always_inline void count_icmp(void)
+{
+ u64 key = 0;
+ u64 *icmp_count;
+
+ icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
+ if (icmp_count)
+ *icmp_count += 1;
+}
+
+static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
+{
+ struct ethhdr *eth;
+
+ eth = data;
+ memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
+ memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
+ eth->h_proto = orig_eth->h_proto;
+}
+
+static __always_inline __u16 csum_fold_helper(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ return ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline void ipv4_csum(void *data_start, int data_size,
+ __u32 *csum)
+{
+ *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+ *csum = csum_fold_helper(*csum);
+}
+
+static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
+{
+ int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
+
+ if (bpf_xdp_adjust_head(xdp, 0 - headroom))
+ return XDP_DROP;
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+
+ if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
+ return XDP_DROP;
+
+ struct iphdr *iph, *orig_iph;
+ struct icmphdr *icmp_hdr;
+ struct ethhdr *orig_eth;
+ __u32 csum = 0;
+ __u64 off = 0;
+
+ orig_eth = data + headroom;
+ swap_mac(data, orig_eth);
+ off += sizeof(struct ethhdr);
+ iph = data + off;
+ off += sizeof(struct iphdr);
+ icmp_hdr = data + off;
+ off += sizeof(struct icmphdr);
+ orig_iph = data + off;
+ icmp_hdr->type = ICMP_DEST_UNREACH;
+ icmp_hdr->code = ICMP_FRAG_NEEDED;
+ icmp_hdr->un.frag.mtu = htons(max_pcktsz - sizeof(struct ethhdr));
+ icmp_hdr->checksum = 0;
+ ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
+ icmp_hdr->checksum = csum;
+ iph->ttl = DEFAULT_TTL;
+ iph->daddr = orig_iph->saddr;
+ iph->saddr = orig_iph->daddr;
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->protocol = IPPROTO_ICMP;
+ iph->tos = 0;
+ iph->tot_len = htons(
+ ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
+ iph->check = 0;
+ csum = 0;
+ ipv4_csum(iph, sizeof(struct iphdr), &csum);
+ iph->check = csum;
+ count_icmp();
+ return XDP_TX;
+}
+
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ int pckt_size = data_end - data;
+ int offset;
+
+ if (pckt_size > max(max_pcktsz, ICMP_TOOBIG_SIZE)) {
+ offset = pckt_size - ICMP_TOOBIG_SIZE;
+ if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+ return XDP_PASS;
+ return send_icmp4_too_big(xdp);
+ }
+ return XDP_PASS;
+}
+
+SEC("xdp_icmp")
+int _xdp_icmp(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
new file mode 100644
index 000000000000..e9426bd65420
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define STATS_INTERVAL_S 2U
+#define MAX_PCKT_SIZE 600
+
+static int ifindex = -1;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static __u32 prog_id;
+
+static void int_exit(int sig)
+{
+ __u32 curr_prog_id = 0;
+
+ if (ifindex > -1) {
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
+ exit(1);
+ }
+ if (prog_id == curr_prog_id)
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
+ else if (!curr_prog_id)
+ printf("couldn't find a prog id on a given iface\n");
+ else
+ printf("program on interface changed, not removing\n");
+ }
+ exit(0);
+}
+
+/* simple "icmp packet too big sent" counter
+ */
+static void poll_stats(unsigned int map_fd, unsigned int kill_after_s)
+{
+ time_t started_at = time(NULL);
+ __u64 value = 0;
+ int key = 0;
+
+
+ while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+ sleep(STATS_INTERVAL_S);
+
+ assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+
+ printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
+ }
+}
+
+static void usage(const char *cmd)
+{
+ printf("Start a XDP prog which send ICMP \"packet too big\" \n"
+ "messages if ingress packet is bigger then MAX_SIZE bytes\n");
+ printf("Usage: %s [...]\n", cmd);
+ printf(" -i <ifname|ifindex> Interface\n");
+ printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
+ printf(" -P <MAX_PCKT_SIZE> Default: %u\n", MAX_PCKT_SIZE);
+ printf(" -S use skb-mode\n");
+ printf(" -N enforce native mode\n");
+ printf(" -F force loading prog\n");
+ printf(" -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char opt_flags[256] = {};
+ const char *optstr = "i:T:P:SNFh";
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ unsigned int kill_after_s = 0;
+ int i, prog_fd, map_fd, opt;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 max_pckt_size = 0;
+ __u32 key = 0;
+ char filename[256];
+ int err;
+
+ for (i = 0; i < strlen(optstr); i++)
+ if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
+ opt_flags[(unsigned char)optstr[i]] = 1;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+
+ switch (opt) {
+ case 'i':
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = atoi(optarg);
+ break;
+ case 'T':
+ kill_after_s = atoi(optarg);
+ break;
+ case 'P':
+ max_pckt_size = atoi(optarg);
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'N':
+ /* default, set below */
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ opt_flags[opt] = 0;
+ }
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+ for (i = 0; i < strlen(optstr); i++) {
+ if (opt_flags[(unsigned int)optstr[i]]) {
+ fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (!ifindex) {
+ fprintf(stderr, "Invalid ifname\n");
+ return 1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err)
+ return 1;
+
+ prog_fd = bpf_program__fd(prog);
+
+ /* static global var 'max_pcktsz' is accessible from .data section */
+ if (max_pckt_size) {
+ map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
+ if (map_fd < 0) {
+ printf("finding a max_pcktsz map in obj file failed\n");
+ return 1;
+ }
+ bpf_map_update_elem(map_fd, &key, &max_pckt_size, BPF_ANY);
+ }
+
+ /* fetch icmpcnt map */
+ map_fd = bpf_object__find_map_fd_by_name(obj, "icmpcnt");
+ if (map_fd < 0) {
+ printf("finding a icmpcnt map in obj file failed\n");
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
+ printf("link set xdp fd failed\n");
+ return 1;
+ }
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err) {
+ printf("can't get prog info - %s\n", strerror(errno));
+ return 1;
+ }
+ prog_id = info.id;
+
+ poll_stats(map_fd, kill_after_s);
+ int_exit(0);
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
new file mode 100644
index 000000000000..54c099cbd639
--- /dev/null
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+
+#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 64);
+} xdp_tx_ports SEC(".maps");
+
+/* from include/net/ip.h */
+static __always_inline int ip_decrease_ttl(struct iphdr *iph)
+{
+ u32 check = (__force u32)iph->check;
+
+ check += (__force u32)htons(0x0100);
+ iph->check = (__force __sum16)(check + (check >= 0xFFFF));
+ return --iph->ttl;
+}
+
+static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct bpf_fib_lookup fib_params;
+ struct ethhdr *eth = data;
+ struct ipv6hdr *ip6h;
+ struct iphdr *iph;
+ u16 h_proto;
+ u64 nh_off;
+ int rc;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ __builtin_memset(&fib_params, 0, sizeof(fib_params));
+
+ h_proto = eth->h_proto;
+ if (h_proto == htons(ETH_P_IP)) {
+ iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ if (iph->ttl <= 1)
+ return XDP_PASS;
+
+ fib_params.family = AF_INET;
+ fib_params.tos = iph->tos;
+ fib_params.l4_protocol = iph->protocol;
+ fib_params.sport = 0;
+ fib_params.dport = 0;
+ fib_params.tot_len = ntohs(iph->tot_len);
+ fib_params.ipv4_src = iph->saddr;
+ fib_params.ipv4_dst = iph->daddr;
+ } else if (h_proto == htons(ETH_P_IPV6)) {
+ struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
+
+ ip6h = data + nh_off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ if (ip6h->hop_limit <= 1)
+ return XDP_PASS;
+
+ fib_params.family = AF_INET6;
+ fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
+ fib_params.l4_protocol = ip6h->nexthdr;
+ fib_params.sport = 0;
+ fib_params.dport = 0;
+ fib_params.tot_len = ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+ } else {
+ return XDP_PASS;
+ }
+
+ fib_params.ifindex = ctx->ingress_ifindex;
+
+ rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+ /*
+ * Some rc (return codes) from bpf_fib_lookup() are important,
+ * to understand how this XDP-prog interacts with network stack.
+ *
+ * BPF_FIB_LKUP_RET_NO_NEIGH:
+ * Even if route lookup was a success, then the MAC-addresses are also
+ * needed. This is obtained from arp/neighbour table, but if table is
+ * (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid
+ * doing ARP lookup directly from XDP, then send packet to normal
+ * network stack via XDP_PASS and expect it will do ARP resolution.
+ *
+ * BPF_FIB_LKUP_RET_FWD_DISABLED:
+ * The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
+ * setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
+ * enabled this on ingress device.
+ */
+ if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
+ /* Verify egress index has been configured as TX-port.
+ * (Note: User can still have inserted an egress ifindex that
+ * doesn't support XDP xmit, which will result in packet drops).
+ *
+ * Note: lookup in devmap supported since 0cdbb4b09a0.
+ * If not supported will fail with:
+ * cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+ */
+ if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
+ return XDP_PASS;
+
+ if (h_proto == htons(ETH_P_IP))
+ ip_decrease_ttl(iph);
+ else if (h_proto == htons(ETH_P_IPV6))
+ ip6h->hop_limit--;
+
+ memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+ memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+ return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp_fwd")
+int xdp_fwd_prog(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, 0);
+}
+
+SEC("xdp_fwd_direct")
+int xdp_fwd_direct_prog(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
new file mode 100644
index 000000000000..193b3b79b31f
--- /dev/null
+++ b/samples/bpf/xdp_fwd_user.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+#include <net/if.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgen.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+
+static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
+{
+ int err;
+
+ err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
+ if (err < 0) {
+ printf("ERROR: failed to attach program to %s\n", name);
+ return err;
+ }
+
+ /* Adding ifindex as a possible egress TX port */
+ err = bpf_map_update_elem(map_fd, &idx, &idx, 0);
+ if (err)
+ printf("ERROR: failed using device %s as TX-port\n", name);
+
+ return err;
+}
+
+static int do_detach(int ifindex, const char *ifname, const char *app_name)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct bpf_prog_info prog_info = {};
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 info_len, curr_prog_id;
+ int prog_fd;
+ int err = 1;
+
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("ERROR: bpf_xdp_query_id failed (%s)\n",
+ strerror(errno));
+ return err;
+ }
+
+ if (!curr_prog_id) {
+ printf("ERROR: flags(0x%x) xdp prog is not attached to %s\n",
+ xdp_flags, ifname);
+ return err;
+ }
+
+ info_len = sizeof(prog_info);
+ prog_fd = bpf_prog_get_fd_by_id(curr_prog_id);
+ if (prog_fd < 0) {
+ printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n",
+ strerror(errno));
+ return prog_fd;
+ }
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err) {
+ printf("ERROR: bpf_prog_get_info_by_fd failed (%s)\n",
+ strerror(errno));
+ goto close_out;
+ }
+ snprintf(prog_name, sizeof(prog_name), "%s_prog", app_name);
+ prog_name[BPF_OBJ_NAME_LEN - 1] = '\0';
+
+ if (strcmp(prog_info.name, prog_name)) {
+ printf("ERROR: %s isn't attached to %s\n", app_name, ifname);
+ err = 1;
+ goto close_out;
+ }
+
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_detach(ifindex, xdp_flags, &opts);
+ if (err < 0)
+ printf("ERROR: failed to detach program from %s (%s)\n",
+ ifname, strerror(errno));
+ /* TODO: Remember to cleanup map, when adding use of shared map
+ * bpf_map_delete_elem((map_fd, &idx);
+ */
+close_out:
+ close(prog_fd);
+ return err;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s [OPTS] interface-list\n"
+ "\nOPTS:\n"
+ " -d detach program\n"
+ " -S use skb-mode\n"
+ " -F force loading prog\n"
+ " -D direct table lookups (skip fib rules)\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ const char *prog_name = "xdp_fwd";
+ struct bpf_program *prog = NULL;
+ struct bpf_program *pos;
+ const char *sec_name;
+ int prog_fd = -1, map_fd = -1;
+ char filename[PATH_MAX];
+ struct bpf_object *obj;
+ int opt, i, idx, err;
+ int attach = 1;
+ int ret = 0;
+
+ while ((opt = getopt(argc, argv, ":dDSF")) != -1) {
+ switch (opt) {
+ case 'd':
+ attach = 0;
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
+ case 'D':
+ prog_name = "xdp_fwd_direct";
+ break;
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+ if (optind == argc) {
+ usage(basename(argv[0]));
+ return 1;
+ }
+
+ if (attach) {
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (access(filename, O_RDONLY) < 0) {
+ printf("error accessing file %s: %s\n",
+ filename, strerror(errno));
+ return 1;
+ }
+
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("Does kernel support devmap lookup?\n");
+ /* If not, the error message will be:
+ * "cannot pass map_type 14 into func bpf_map_lookup_elem#1"
+ */
+ return 1;
+ }
+
+ bpf_object__for_each_program(pos, obj) {
+ sec_name = bpf_program__section_name(pos);
+ if (sec_name && !strcmp(sec_name, prog_name)) {
+ prog = pos;
+ break;
+ }
+ }
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ printf("program not found: %s\n", strerror(prog_fd));
+ return 1;
+ }
+ map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
+ "xdp_tx_ports"));
+ if (map_fd < 0) {
+ printf("map not found: %s\n", strerror(map_fd));
+ return 1;
+ }
+ }
+
+ for (i = optind; i < argc; ++i) {
+ idx = if_nametoindex(argv[i]);
+ if (!idx)
+ idx = strtoul(argv[i], NULL, 0);
+
+ if (!idx) {
+ fprintf(stderr, "Invalid arg\n");
+ return 1;
+ }
+ if (!attach) {
+ err = do_detach(idx, argv[i], prog_name);
+ if (err)
+ ret = err;
+ } else {
+ err = do_attach(idx, prog_fd, map_fd, argv[i]);
+ if (err)
+ ret = err;
+ }
+ }
+
+ return ret;
+}
diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c
new file mode 100644
index 000000000000..0643330d1d2e
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4.bpf.c
@@ -0,0 +1,189 @@
+/* Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+
+struct trie_value {
+ __u8 prefix[4];
+ __be64 value;
+ int ifindex;
+ int metric;
+ __be32 gw;
+};
+
+/* Key for lpm_trie */
+union key_4 {
+ u32 b32[2];
+ u8 b8[8];
+};
+
+struct arp_entry {
+ __be64 mac;
+ __be32 dst;
+};
+
+struct direct_map {
+ struct arp_entry arp;
+ int ifindex;
+ __be64 mac;
+};
+
+/* Map for trie implementation */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(struct trie_value));
+ __uint(max_entries, 50);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_map SEC(".maps");
+
+/* Map for ARP table */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, __be64);
+ __uint(max_entries, 50);
+} arp_table SEC(".maps");
+
+/* Map to keep the exact match entries in the route table */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, struct direct_map);
+ __uint(max_entries, 50);
+} exact_match SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 100);
+} tx_port SEC(".maps");
+
+SEC("xdp")
+int xdp_router_ipv4_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off = sizeof(*eth);
+ struct datarec *rec;
+ __be16 h_proto;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (rec)
+ NO_TEAR_INC(rec->processed);
+
+ if (data + nh_off > data_end)
+ goto drop;
+
+ h_proto = eth->h_proto;
+ if (h_proto == bpf_htons(ETH_P_8021Q) ||
+ h_proto == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ goto drop;
+
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ switch (bpf_ntohs(h_proto)) {
+ case ETH_P_ARP:
+ if (rec)
+ NO_TEAR_INC(rec->xdp_pass);
+ return XDP_PASS;
+ case ETH_P_IP: {
+ struct iphdr *iph = data + nh_off;
+ struct direct_map *direct_entry;
+ __be64 *dest_mac, *src_mac;
+ int forward_to;
+
+ if (iph + 1 > data_end)
+ goto drop;
+
+ direct_entry = bpf_map_lookup_elem(&exact_match, &iph->daddr);
+
+ /* Check for exact match, this would give a faster lookup */
+ if (direct_entry && direct_entry->mac &&
+ direct_entry->arp.mac) {
+ src_mac = &direct_entry->mac;
+ dest_mac = &direct_entry->arp.mac;
+ forward_to = direct_entry->ifindex;
+ } else {
+ struct trie_value *prefix_value;
+ union key_4 key4;
+
+ /* Look up in the trie for lpm */
+ key4.b32[0] = 32;
+ key4.b8[4] = iph->daddr & 0xff;
+ key4.b8[5] = (iph->daddr >> 8) & 0xff;
+ key4.b8[6] = (iph->daddr >> 16) & 0xff;
+ key4.b8[7] = (iph->daddr >> 24) & 0xff;
+
+ prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
+ if (!prefix_value)
+ goto drop;
+
+ forward_to = prefix_value->ifindex;
+ src_mac = &prefix_value->value;
+ if (!src_mac)
+ goto drop;
+
+ dest_mac = bpf_map_lookup_elem(&arp_table, &iph->daddr);
+ if (!dest_mac) {
+ if (!prefix_value->gw)
+ goto drop;
+
+ dest_mac = bpf_map_lookup_elem(&arp_table,
+ &prefix_value->gw);
+ if (!dest_mac) {
+ /* Forward the packet to the kernel in
+ * order to trigger ARP discovery for
+ * the default gw.
+ */
+ if (rec)
+ NO_TEAR_INC(rec->xdp_pass);
+ return XDP_PASS;
+ }
+ }
+ }
+
+ if (src_mac && dest_mac) {
+ int ret;
+
+ __builtin_memcpy(eth->h_dest, dest_mac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, src_mac, ETH_ALEN);
+
+ ret = bpf_redirect_map(&tx_port, forward_to, 0);
+ if (ret == XDP_REDIRECT) {
+ if (rec)
+ NO_TEAR_INC(rec->xdp_redirect);
+ return ret;
+ }
+ }
+ }
+ default:
+ break;
+ }
+drop:
+ if (rec)
+ NO_TEAR_INC(rec->xdp_drop);
+
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
new file mode 100644
index 000000000000..266fdd0b025d
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2017 Cavium, Inc.
+ */
+#include <linux/bpf.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include "bpf_util.h"
+#include <bpf/libbpf.h>
+#include <libgen.h>
+#include <getopt.h>
+#include <pthread.h>
+#include "xdp_sample_user.h"
+#include "xdp_router_ipv4.skel.h"
+
+static const char *__doc__ =
+"XDP IPv4 router implementation\n"
+"Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n";
+
+static char buf[8192];
+static int lpm_map_fd;
+static int arp_table_map_fd;
+static int exact_match_map_fd;
+static int tx_port_map_fd;
+
+static bool routes_thread_exit;
+static int interval = 5;
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_EXCEPTION_CNT;
+
+DEFINE_SAMPLE_INIT(xdp_router_ipv4);
+
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "stats", no_argument, NULL, 's' },
+ {}
+};
+
+static int get_route_table(int rtm_family);
+
+static int recv_msg(struct sockaddr_nl sock_addr, int sock)
+{
+ struct nlmsghdr *nh;
+ int len, nll = 0;
+ char *buf_ptr;
+
+ buf_ptr = buf;
+ while (1) {
+ len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
+ if (len < 0)
+ return len;
+
+ nh = (struct nlmsghdr *)buf_ptr;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ buf_ptr += len;
+ nll += len;
+ if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
+ break;
+
+ if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
+ break;
+ }
+ return nll;
+}
+
+/* Function to parse the route entry returned by netlink
+ * Updates the route entry related map entries
+ */
+static void read_route(struct nlmsghdr *nh, int nll)
+{
+ char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
+ struct bpf_lpm_trie_key_u8 *prefix_key;
+ struct rtattr *rt_attr;
+ struct rtmsg *rt_msg;
+ int rtm_family;
+ int rtl;
+ int i;
+ struct route_table {
+ int dst_len, iface, metric;
+ __be32 dst, gw;
+ __be64 mac;
+ } route;
+ struct arp_table {
+ __be64 mac;
+ __be32 dst;
+ };
+
+ struct direct_map {
+ struct arp_table arp;
+ int ifindex;
+ __be64 mac;
+ } direct_entry;
+
+ memset(&route, 0, sizeof(route));
+ for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
+ rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
+ rtm_family = rt_msg->rtm_family;
+ if (rtm_family == AF_INET)
+ if (rt_msg->rtm_table != RT_TABLE_MAIN)
+ continue;
+ rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
+ rtl = RTM_PAYLOAD(nh);
+
+ for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
+ switch (rt_attr->rta_type) {
+ case NDA_DST:
+ sprintf(dsts, "%u",
+ (*((__be32 *)RTA_DATA(rt_attr))));
+ break;
+ case RTA_GATEWAY:
+ sprintf(gws, "%u",
+ *((__be32 *)RTA_DATA(rt_attr)));
+ break;
+ case RTA_OIF:
+ sprintf(ifs, "%u",
+ *((int *)RTA_DATA(rt_attr)));
+ break;
+ case RTA_METRICS:
+ sprintf(metrics, "%u",
+ *((int *)RTA_DATA(rt_attr)));
+ default:
+ break;
+ }
+ }
+ sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
+ route.dst = atoi(dsts);
+ route.dst_len = atoi(dsts_len);
+ route.gw = atoi(gws);
+ route.iface = atoi(ifs);
+ route.metric = atoi(metrics);
+ assert(get_mac_addr(route.iface, &route.mac) == 0);
+ assert(bpf_map_update_elem(tx_port_map_fd,
+ &route.iface, &route.iface, 0) == 0);
+ if (rtm_family == AF_INET) {
+ struct trie_value {
+ __u8 prefix[4];
+ __be64 value;
+ int ifindex;
+ int metric;
+ __be32 gw;
+ } *prefix_value;
+
+ prefix_key = alloca(sizeof(*prefix_key) + 4);
+ prefix_value = alloca(sizeof(*prefix_value));
+
+ prefix_key->prefixlen = 32;
+ prefix_key->prefixlen = route.dst_len;
+ direct_entry.mac = route.mac & 0xffffffffffff;
+ direct_entry.ifindex = route.iface;
+ direct_entry.arp.mac = 0;
+ direct_entry.arp.dst = 0;
+ if (route.dst_len == 32) {
+ if (nh->nlmsg_type == RTM_DELROUTE) {
+ assert(bpf_map_delete_elem(exact_match_map_fd,
+ &route.dst) == 0);
+ } else {
+ if (bpf_map_lookup_elem(arp_table_map_fd,
+ &route.dst,
+ &direct_entry.arp.mac) == 0)
+ direct_entry.arp.dst = route.dst;
+ assert(bpf_map_update_elem(exact_match_map_fd,
+ &route.dst,
+ &direct_entry, 0) == 0);
+ }
+ }
+ for (i = 0; i < 4; i++)
+ prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
+
+ if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
+ prefix_value) < 0) {
+ for (i = 0; i < 4; i++)
+ prefix_value->prefix[i] = prefix_key->data[i];
+ prefix_value->value = route.mac & 0xffffffffffff;
+ prefix_value->ifindex = route.iface;
+ prefix_value->gw = route.gw;
+ prefix_value->metric = route.metric;
+
+ assert(bpf_map_update_elem(lpm_map_fd,
+ prefix_key,
+ prefix_value, 0
+ ) == 0);
+ } else {
+ if (nh->nlmsg_type == RTM_DELROUTE) {
+ assert(bpf_map_delete_elem(lpm_map_fd,
+ prefix_key
+ ) == 0);
+ /* Rereading the route table to check if
+ * there is an entry with the same
+ * prefix but a different metric as the
+ * deleted entry.
+ */
+ get_route_table(AF_INET);
+ } else if (prefix_key->data[0] ==
+ prefix_value->prefix[0] &&
+ prefix_key->data[1] ==
+ prefix_value->prefix[1] &&
+ prefix_key->data[2] ==
+ prefix_value->prefix[2] &&
+ prefix_key->data[3] ==
+ prefix_value->prefix[3] &&
+ route.metric >= prefix_value->metric) {
+ continue;
+ } else {
+ for (i = 0; i < 4; i++)
+ prefix_value->prefix[i] =
+ prefix_key->data[i];
+ prefix_value->value =
+ route.mac & 0xffffffffffff;
+ prefix_value->ifindex = route.iface;
+ prefix_value->gw = route.gw;
+ prefix_value->metric = route.metric;
+ assert(bpf_map_update_elem(lpm_map_fd,
+ prefix_key,
+ prefix_value,
+ 0) == 0);
+ }
+ }
+ }
+ memset(&route, 0, sizeof(route));
+ memset(dsts, 0, sizeof(dsts));
+ memset(dsts_len, 0, sizeof(dsts_len));
+ memset(gws, 0, sizeof(gws));
+ memset(ifs, 0, sizeof(ifs));
+ memset(&route, 0, sizeof(route));
+ }
+}
+
+/* Function to read the existing route table when the process is launched*/
+static int get_route_table(int rtm_family)
+{
+ struct sockaddr_nl sa;
+ struct nlmsghdr *nh;
+ int sock, seq = 0;
+ struct msghdr msg;
+ struct iovec iov;
+ int ret = 0;
+ int nll;
+
+ struct {
+ struct nlmsghdr nl;
+ struct rtmsg rt;
+ char buf[8192];
+ } req;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return -errno;
+ }
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ ret = -errno;
+ goto cleanup;
+ }
+ memset(&req, 0, sizeof(req));
+ req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+ req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.nl.nlmsg_type = RTM_GETROUTE;
+
+ req.rt.rtm_family = rtm_family;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.nl.nlmsg_pid = 0;
+ req.nl.nlmsg_seq = ++seq;
+ memset(&msg, 0, sizeof(msg));
+ iov.iov_base = (void *)&req.nl;
+ iov.iov_len = req.nl.nlmsg_len;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ ret = sendmsg(sock, &msg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "send to netlink: %s\n", strerror(errno));
+ ret = -errno;
+ goto cleanup;
+ }
+ memset(buf, 0, sizeof(buf));
+ nll = recv_msg(sa, sock);
+ if (nll < 0) {
+ fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
+ ret = nll;
+ goto cleanup;
+ }
+ nh = (struct nlmsghdr *)buf;
+ read_route(nh, nll);
+cleanup:
+ close(sock);
+ return ret;
+}
+
+/* Function to parse the arp entry returned by netlink
+ * Updates the arp entry related map entries
+ */
+static void read_arp(struct nlmsghdr *nh, int nll)
+{
+ struct rtattr *rt_attr;
+ char dsts[24], mac[24];
+ struct ndmsg *rt_msg;
+ int rtl, ndm_family;
+
+ struct arp_table {
+ __be64 mac;
+ __be32 dst;
+ } arp_entry;
+ struct direct_map {
+ struct arp_table arp;
+ int ifindex;
+ __be64 mac;
+ } direct_entry;
+
+ for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
+ rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
+ rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
+ ndm_family = rt_msg->ndm_family;
+ rtl = RTM_PAYLOAD(nh);
+ for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
+ switch (rt_attr->rta_type) {
+ case NDA_DST:
+ sprintf(dsts, "%u",
+ *((__be32 *)RTA_DATA(rt_attr)));
+ break;
+ case NDA_LLADDR:
+ sprintf(mac, "%lld",
+ *((__be64 *)RTA_DATA(rt_attr)));
+ break;
+ default:
+ break;
+ }
+ }
+ arp_entry.dst = atoi(dsts);
+ arp_entry.mac = atol(mac);
+
+ if (ndm_family == AF_INET) {
+ if (bpf_map_lookup_elem(exact_match_map_fd,
+ &arp_entry.dst,
+ &direct_entry) == 0) {
+ if (nh->nlmsg_type == RTM_DELNEIGH) {
+ direct_entry.arp.dst = 0;
+ direct_entry.arp.mac = 0;
+ } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
+ direct_entry.arp.dst = arp_entry.dst;
+ direct_entry.arp.mac = arp_entry.mac;
+ }
+ assert(bpf_map_update_elem(exact_match_map_fd,
+ &arp_entry.dst,
+ &direct_entry, 0
+ ) == 0);
+ memset(&direct_entry, 0, sizeof(direct_entry));
+ }
+ if (nh->nlmsg_type == RTM_DELNEIGH) {
+ assert(bpf_map_delete_elem(arp_table_map_fd,
+ &arp_entry.dst) == 0);
+ } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
+ assert(bpf_map_update_elem(arp_table_map_fd,
+ &arp_entry.dst,
+ &arp_entry.mac, 0
+ ) == 0);
+ }
+ }
+ memset(&arp_entry, 0, sizeof(arp_entry));
+ memset(dsts, 0, sizeof(dsts));
+ }
+}
+
+/* Function to read the existing arp table when the process is launched*/
+static int get_arp_table(int rtm_family)
+{
+ struct sockaddr_nl sa;
+ struct nlmsghdr *nh;
+ int sock, seq = 0;
+ struct msghdr msg;
+ struct iovec iov;
+ int ret = 0;
+ int nll;
+ struct {
+ struct nlmsghdr nl;
+ struct ndmsg rt;
+ char buf[8192];
+ } req;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return -errno;
+ }
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ ret = -errno;
+ goto cleanup;
+ }
+ memset(&req, 0, sizeof(req));
+ req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+ req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.nl.nlmsg_type = RTM_GETNEIGH;
+ req.rt.ndm_state = NUD_REACHABLE;
+ req.rt.ndm_family = rtm_family;
+ req.nl.nlmsg_pid = 0;
+ req.nl.nlmsg_seq = ++seq;
+ memset(&msg, 0, sizeof(msg));
+ iov.iov_base = (void *)&req.nl;
+ iov.iov_len = req.nl.nlmsg_len;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ ret = sendmsg(sock, &msg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "send to netlink: %s\n", strerror(errno));
+ ret = -errno;
+ goto cleanup;
+ }
+ memset(buf, 0, sizeof(buf));
+ nll = recv_msg(sa, sock);
+ if (nll < 0) {
+ fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
+ ret = nll;
+ goto cleanup;
+ }
+ nh = (struct nlmsghdr *)buf;
+ read_arp(nh, nll);
+cleanup:
+ close(sock);
+ return ret;
+}
+
+/* Function to keep track and update changes in route and arp table
+ * Give regular statistics of packets forwarded
+ */
+static void *monitor_routes_thread(void *arg)
+{
+ struct pollfd fds_route, fds_arp;
+ struct sockaddr_nl la, lr;
+ int sock, sock_arp, nll;
+ struct nlmsghdr *nh;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return NULL;
+ }
+
+ fcntl(sock, F_SETFL, O_NONBLOCK);
+ memset(&lr, 0, sizeof(lr));
+ lr.nl_family = AF_NETLINK;
+ lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
+ if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ close(sock);
+ return NULL;
+ }
+
+ fds_route.fd = sock;
+ fds_route.events = POLL_IN;
+
+ sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock_arp < 0) {
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ close(sock);
+ return NULL;
+ }
+
+ fcntl(sock_arp, F_SETFL, O_NONBLOCK);
+ memset(&la, 0, sizeof(la));
+ la.nl_family = AF_NETLINK;
+ la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
+ if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ fds_arp.fd = sock_arp;
+ fds_arp.events = POLL_IN;
+
+ /* dump route and arp tables */
+ if (get_arp_table(AF_INET) < 0) {
+ fprintf(stderr, "Failed reading arp table\n");
+ goto cleanup;
+ }
+
+ if (get_route_table(AF_INET) < 0) {
+ fprintf(stderr, "Failed reading route table\n");
+ goto cleanup;
+ }
+
+ while (!routes_thread_exit) {
+ memset(buf, 0, sizeof(buf));
+ if (poll(&fds_route, 1, 3) == POLL_IN) {
+ nll = recv_msg(lr, sock);
+ if (nll < 0) {
+ fprintf(stderr, "recv from netlink: %s\n",
+ strerror(nll));
+ goto cleanup;
+ }
+
+ nh = (struct nlmsghdr *)buf;
+ read_route(nh, nll);
+ }
+
+ memset(buf, 0, sizeof(buf));
+ if (poll(&fds_arp, 1, 3) == POLL_IN) {
+ nll = recv_msg(la, sock_arp);
+ if (nll < 0) {
+ fprintf(stderr, "recv from netlink: %s\n",
+ strerror(nll));
+ goto cleanup;
+ }
+
+ nh = (struct nlmsghdr *)buf;
+ read_arp(nh, nll);
+ }
+
+ sleep(interval);
+ }
+
+cleanup:
+ close(sock_arp);
+ close(sock);
+ return NULL;
+}
+
+static void usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error,
+ struct bpf_object *obj)
+{
+ sample_usage(argv, long_options, doc, mask, error);
+}
+
+int main(int argc, char **argv)
+{
+ bool error = true, generic = false, force = false;
+ int opt, ret = EXIT_FAIL_BPF;
+ struct xdp_router_ipv4 *skel;
+ int i, total_ifindex = argc - 1;
+ char **ifname_list = argv + 1;
+ pthread_t routes_thread;
+ int longindex = 0;
+
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ goto end;
+ }
+
+ skel = xdp_router_ipv4__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_router_ipv4__open: %s\n",
+ strerror(errno));
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n",
+ strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = xdp_router_ipv4__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_router_ipv4__load: %s\n",
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ while ((opt = getopt_long(argc, argv, "si:SFvh",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ total_ifindex -= 2;
+ ifname_list += 2;
+ break;
+ case 'S':
+ generic = true;
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'F':
+ force = true;
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'v':
+ sample_switch_mode();
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'h':
+ error = false;
+ default:
+ usage(argv, long_options, __doc__, mask, error, skel->obj);
+ goto end_destroy;
+ }
+ }
+
+ ret = EXIT_FAIL_OPTION;
+ if (optind == argc) {
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_destroy;
+ }
+
+ lpm_map_fd = bpf_map__fd(skel->maps.lpm_map);
+ if (lpm_map_fd < 0) {
+ fprintf(stderr, "Failed loading lpm_map %s\n",
+ strerror(-lpm_map_fd));
+ goto end_destroy;
+ }
+ arp_table_map_fd = bpf_map__fd(skel->maps.arp_table);
+ if (arp_table_map_fd < 0) {
+ fprintf(stderr, "Failed loading arp_table_map_fd %s\n",
+ strerror(-arp_table_map_fd));
+ goto end_destroy;
+ }
+ exact_match_map_fd = bpf_map__fd(skel->maps.exact_match);
+ if (exact_match_map_fd < 0) {
+ fprintf(stderr, "Failed loading exact_match_map_fd %s\n",
+ strerror(-exact_match_map_fd));
+ goto end_destroy;
+ }
+ tx_port_map_fd = bpf_map__fd(skel->maps.tx_port);
+ if (tx_port_map_fd < 0) {
+ fprintf(stderr, "Failed loading tx_port_map_fd %s\n",
+ strerror(-tx_port_map_fd));
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL_XDP;
+ for (i = 0; i < total_ifindex; i++) {
+ int index = if_nametoindex(ifname_list[i]);
+
+ if (!index) {
+ fprintf(stderr, "Interface %s not found %s\n",
+ ifname_list[i], strerror(-tx_port_map_fd));
+ goto end_destroy;
+ }
+ if (sample_install_xdp(skel->progs.xdp_router_ipv4_prog,
+ index, generic, force) < 0)
+ goto end_destroy;
+ }
+
+ ret = pthread_create(&routes_thread, NULL, monitor_routes_thread, NULL);
+ if (ret) {
+ fprintf(stderr, "Failed creating routes_thread: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ ret = sample_run(interval, NULL, NULL);
+ routes_thread_exit = true;
+
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_thread_wait;
+ }
+ ret = EXIT_OK;
+
+end_thread_wait:
+ pthread_join(routes_thread, NULL);
+end_destroy:
+ xdp_router_ipv4__destroy(skel);
+end:
+ sample_exit(ret);
+}
diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c
new file mode 100644
index 000000000000..0eb7e1dcae22
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#include "xdp_sample.bpf.h"
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+array_map rx_cnt SEC(".maps");
+array_map redir_err_cnt SEC(".maps");
+array_map cpumap_enqueue_cnt SEC(".maps");
+array_map cpumap_kthread_cnt SEC(".maps");
+array_map exception_cnt SEC(".maps");
+array_map devmap_xmit_cnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 32 * 32);
+ __type(key, u64);
+ __type(value, struct datarec);
+} devmap_xmit_cnt_multi SEC(".maps");
+
+const volatile int nr_cpus = 0;
+
+/* These can be set before loading so that redundant comparisons can be DCE'd by
+ * the verifier, and only actual matches are tried after loading tp_btf program.
+ * This allows sample to filter tracepoint stats based on net_device.
+ */
+const volatile int from_match[32] = {};
+const volatile int to_match[32] = {};
+
+int cpumap_map_id = 0;
+
+/* Find if b is part of set a, but if a is empty set then evaluate to true */
+#define IN_SET(a, b) \
+ ({ \
+ bool __res = !(a)[0]; \
+ for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
+ __res = (a)[i] == (b); \
+ if (__res) \
+ break; \
+ } \
+ __res; \
+ })
+
+static __always_inline __u32 xdp_get_err_key(int err)
+{
+ switch (err) {
+ case 0:
+ return 0;
+ case -EINVAL:
+ return 2;
+ case -ENETDOWN:
+ return 3;
+ case -EMSGSIZE:
+ return 4;
+ case -EOPNOTSUPP:
+ return 5;
+ case -ENOSPC:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+static __always_inline int xdp_redirect_collect_stat(int from, int err)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = XDP_REDIRECT_ERROR;
+ struct datarec *rec;
+ u32 idx;
+
+ if (!IN_SET(from_match, from))
+ return 0;
+
+ key = xdp_get_err_key(err);
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
+ if (!rec)
+ return 0;
+ if (key)
+ NO_TEAR_INC(rec->dropped);
+ else
+ NO_TEAR_INC(rec->processed);
+ return 0; /* Indicate event was filtered (no further processing)*/
+ /*
+ * Returning 1 here would allow e.g. a perf-record tracepoint
+ * to see and record these events, but it doesn't work well
+ * in-practice as stopping perf-record also unload this
+ * bpf_prog. Plus, there is additional overhead of doing so.
+ */
+}
+
+SEC("tp_btf/xdp_redirect_err")
+int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map_err")
+int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect")
+int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map")
+int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_cpumap_enqueue")
+int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
+ unsigned int drops, int to_cpu)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 idx;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ idx = to_cpu * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (processed > 0)
+ NO_TEAR_INC(rec->issue);
+ /* Inception: It's possible to detect overload situations, via
+ * this tracepoint. This can be used for creating a feedback
+ * loop to XDP, which can take appropriate actions to mitigate
+ * this overload situation.
+ */
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ struct datarec *rec;
+ u32 cpu;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
+ NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
+ NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
+ /* Count times kthread yielded CPU via schedule call */
+ if (sched)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, u32 act)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 key = act, idx;
+
+ if (!IN_SET(from_match, dev->ifindex))
+ return 0;
+ if (!IN_SET(to_match, dev->ifindex))
+ return 0;
+
+ if (key > XDP_REDIRECT)
+ key = XDP_REDIRECT + 1;
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&exception_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_INC(rec->dropped);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u32 cpu;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ NO_TEAR_INC(rec->info);
+ /* Record error cases, where no frame were sent */
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec empty = {};
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u64 idx;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+ idx = idx_in;
+ idx = idx << 32 | idx_out;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
+ if (!rec)
+ return 0;
+
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_INC(rec->info);
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h
new file mode 100644
index 000000000000..fecc41c5df04
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.h
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _XDP_SAMPLE_BPF_H
+#define _XDP_SAMPLE_BPF_H
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#include "net_shared.h"
+#include "xdp_sample_shared.h"
+
+#define EINVAL 22
+#define ENETDOWN 100
+#define EMSGSIZE 90
+#define EOPNOTSUPP 95
+#define ENOSPC 28
+
+typedef struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, unsigned int);
+ __type(value, struct datarec);
+} array_map;
+
+extern array_map rx_cnt;
+extern const volatile int nr_cpus;
+
+enum {
+ XDP_REDIRECT_SUCCESS = 0,
+ XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+ unsigned short *p = data;
+ unsigned short dst[3];
+
+ dst[0] = p[0];
+ dst[1] = p[1];
+ dst[2] = p[2];
+ p[0] = p[3];
+ p[1] = p[4];
+ p[2] = p[5];
+ p[3] = dst[0];
+ p[4] = dst[1];
+ p[5] = dst[2];
+}
+
+/*
+ * Note: including linux/compiler.h or linux/kernel.h for the macros below
+ * conflicts with vmlinux.h include in BPF files, so we define them here.
+ *
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8 __attribute__((__may_alias__)) __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break;
+ case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+ case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+ case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break;
+ case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+ case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+ case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+/* Add a value using relaxed read and relaxed write. Less expensive than
+ * fetch_add when there is no write concurrency.
+ */
+#define NO_TEAR_ADD(x, val) WRITE_ONCE((x), READ_ONCE(x) + (val))
+#define NO_TEAR_INC(x) NO_TEAR_ADD((x), 1)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#endif
diff --git a/samples/bpf/xdp_sample_shared.h b/samples/bpf/xdp_sample_shared.h
new file mode 100644
index 000000000000..8a7669a5d563
--- /dev/null
+++ b/samples/bpf/xdp_sample_shared.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _XDP_SAMPLE_SHARED_H
+#define _XDP_SAMPLE_SHARED_H
+
+struct datarec {
+ size_t processed;
+ size_t dropped;
+ size_t issue;
+ union {
+ size_t xdp_pass;
+ size_t info;
+ };
+ size_t xdp_drop;
+ size_t xdp_redirect;
+} __attribute__((aligned(64)));
+
+#endif
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
new file mode 100644
index 000000000000..158682852162
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.c
@@ -0,0 +1,1673 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/ethtool.h>
+#include <linux/hashtable.h>
+#include <linux/if_link.h>
+#include <linux/jhash.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <locale.h>
+#include <math.h>
+#include <net/if.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/signalfd.h>
+#include <sys/sysinfo.h>
+#include <sys/timerfd.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+
+#define __sample_print(fmt, cond, ...) \
+ ({ \
+ if (cond) \
+ printf(fmt, ##__VA_ARGS__); \
+ })
+
+#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__)
+#define print_default(fmt, ...) \
+ __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__)
+#define __print_err(err, fmt, ...) \
+ ({ \
+ __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \
+ ##__VA_ARGS__); \
+ sample_err_exp = sample_err_exp ? true : err > 0; \
+ })
+#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__)
+
+#define __COLUMN(x) "%'10" x " %-13s"
+#define FMT_COLUMNf __COLUMN(".0f")
+#define FMT_COLUMNd __COLUMN("d")
+#define FMT_COLUMNl __COLUMN("llu")
+#define RX(rx) rx, "rx/s"
+#define PPS(pps) pps, "pkt/s"
+#define DROP(drop) drop, "drop/s"
+#define ERR(err) err, "error/s"
+#define HITS(hits) hits, "hit/s"
+#define XMIT(xmit) xmit, "xmit/s"
+#define PASS(pass) pass, "pass/s"
+#define REDIR(redir) redir, "redir/s"
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#define XDP_REDIRECT_ERR_MAX 7
+
+enum map_type {
+ MAP_RX,
+ MAP_REDIRECT_ERR,
+ MAP_CPUMAP_ENQUEUE,
+ MAP_CPUMAP_KTHREAD,
+ MAP_EXCEPTION,
+ MAP_DEVMAP_XMIT,
+ MAP_DEVMAP_XMIT_MULTI,
+ NUM_MAP,
+};
+
+enum log_level {
+ LL_DEFAULT = 1U << 0,
+ LL_SIMPLE = 1U << 1,
+ LL_DEBUG = 1U << 2,
+};
+
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+
+struct map_entry {
+ struct hlist_node node;
+ __u64 pair;
+ struct record val;
+};
+
+struct stats_record {
+ struct record rx_cnt;
+ struct record redir_err[XDP_REDIRECT_ERR_MAX];
+ struct record kthread;
+ struct record exception[XDP_ACTION_MAX];
+ struct record devmap_xmit;
+ DECLARE_HASHTABLE(xmit_map, 5);
+ struct record enq[];
+};
+
+struct sample_output {
+ struct {
+ __u64 rx;
+ __u64 redir;
+ __u64 drop;
+ __u64 drop_xmit;
+ __u64 err;
+ __u64 xmit;
+ } totals;
+ struct {
+ union {
+ __u64 pps;
+ __u64 num;
+ };
+ __u64 drop;
+ __u64 err;
+ } rx_cnt;
+ struct {
+ __u64 suc;
+ __u64 err;
+ } redir_cnt;
+ struct {
+ __u64 hits;
+ } except_cnt;
+ struct {
+ __u64 pps;
+ __u64 drop;
+ __u64 err;
+ double bavg;
+ } xmit_cnt;
+};
+
+struct xdp_desc {
+ int ifindex;
+ __u32 prog_id;
+ int flags;
+} sample_xdp_progs[32];
+
+struct datarec *sample_mmap[NUM_MAP];
+struct bpf_map *sample_map[NUM_MAP];
+size_t sample_map_count[NUM_MAP];
+enum log_level sample_log_level;
+struct sample_output sample_out;
+unsigned long sample_interval;
+bool sample_err_exp;
+int sample_xdp_cnt;
+int sample_n_cpus;
+int sample_sig_fd;
+int sample_mask;
+
+static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = {
+ /* Key=1 keeps unknown errors */
+ "Success",
+ "Unknown",
+ "EINVAL",
+ "ENETDOWN",
+ "EMSGSIZE",
+ "EOPNOTSUPP",
+ "ENOSPC",
+};
+
+/* Keyed from Unknown */
+static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = {
+ "Unknown error",
+ "Invalid redirection",
+ "Device being redirected to is down",
+ "Packet length too large for device",
+ "Operation not supported",
+ "No space in ptr_ring of cpumap kthread",
+};
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ return UINT64_MAX;
+ }
+ return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static void sample_print_help(int mask)
+{
+ printf("Output format description\n\n"
+ "By default, redirect success statistics are disabled, use -s to enable.\n"
+ "The terse output mode is default, verbose mode can be activated using -v\n"
+ "Use SIGQUIT (Ctrl + \\) to switch the mode dynamically at runtime\n\n"
+ "Terse mode displays at most the following fields:\n"
+ " rx/s Number of packets received per second\n"
+ " redir/s Number of packets successfully redirected per second\n"
+ " err,drop/s Aggregated count of errors per second (including dropped packets)\n"
+ " xmit/s Number of packets transmitted on the output device per second\n\n"
+ "Output description for verbose mode:\n"
+ " FIELD DESCRIPTION\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ printf(" receive\t\tDisplays the number of packets received & errors encountered\n"
+ " \t\t\tWhenever an error or packet drop occurs, details of per CPU error\n"
+ " \t\t\tand drop statistics will be expanded inline in terse mode.\n"
+ " \t\t\t\tpkt/s - Packets received per second\n"
+ " \t\t\t\tdrop/s - Packets dropped per second\n"
+ " \t\t\t\terror/s - Errors encountered per second\n\n");
+ }
+ if (mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ printf(" redirect\t\tDisplays the number of packets successfully redirected\n"
+ " \t\t\tErrors encountered are expanded under redirect_err field\n"
+ " \t\t\tNote that passing -s to enable it has a per packet overhead\n"
+ " \t\t\t\tredir/s - Packets redirected successfully per second\n\n"
+ " redirect_err\t\tDisplays the number of packets that failed redirection\n"
+ " \t\t\tThe errno is expanded under this field with per CPU count\n"
+ " \t\t\tThe recognized errors are:\n");
+
+ for (int i = 2; i < XDP_REDIRECT_ERR_MAX; i++)
+ printf("\t\t\t %s: %s\n", xdp_redirect_err_names[i],
+ xdp_redirect_err_help[i - 1]);
+
+ printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ printf(" enqueue to cpu N\tDisplays the number of packets enqueued to bulk queue of CPU N\n"
+ " \t\t\tExpands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N\n"
+ " \t\t\tReceived packets can be associated with the CPU redirect program is enqueuing \n"
+ " \t\t\tpackets to.\n"
+ " \t\t\t\tpkt/s - Packets enqueued per second from other CPU to CPU N\n"
+ " \t\t\t\tdrop/s - Packets dropped when trying to enqueue to CPU N\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ printf(" kthread\t\tDisplays the number of packets processed in CPUMAP kthread for each CPU\n"
+ " \t\t\tPackets consumed from ptr_ring in kthread, and its xdp_stats (after calling \n"
+ " \t\t\tCPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and\n"
+ " \t\t\tthen per-CPU to associate it to each CPU's pinned CPUMAP kthread.\n"
+ " \t\t\t\tpkt/s - Packets consumed per second from ptr_ring\n"
+ " \t\t\t\tdrop/s - Packets dropped per second in kthread\n"
+ " \t\t\t\tsched - Number of times kthread called schedule()\n\n"
+ " \t\t\txdp_stats (also expands to per-CPU counts)\n"
+ " \t\t\t\tpass/s - XDP_PASS count for CPUMAP program execution\n"
+ " \t\t\t\tdrop/s - XDP_DROP count for CPUMAP program execution\n"
+ " \t\t\t\tredir/s - XDP_REDIRECT count for CPUMAP program execution\n\n");
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ printf(" xdp_exception\t\tDisplays xdp_exception tracepoint events\n"
+ " \t\t\tThis can occur due to internal driver errors, unrecognized\n"
+ " \t\t\tXDP actions and due to explicit user trigger by use of XDP_ABORTED\n"
+ " \t\t\tEach action is expanded below this field with its count\n"
+ " \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n");
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ printf(" devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n"
+ " \t\t\tThis tracepoint is invoked for successful transmissions on output\n"
+ " \t\t\tdevice but these statistics are not available for generic XDP mode,\n"
+ " \t\t\thence they will be omitted from the output when using SKB mode\n"
+ " \t\t\t\txmit/s - Number of packets that were transmitted per second\n"
+ " \t\t\t\tdrop/s - Number of packets that failed transmissions per second\n"
+ " \t\t\t\tdrv_err/s - Number of internal driver errors per second\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+}
+
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error)
+{
+ int i;
+
+ if (!error)
+ sample_print_help(mask);
+
+ printf("\n%s\nOption for %s:\n", doc, argv[0]);
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-15s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value: %d)",
+ *long_options[i].flag);
+ else
+ printf("\t short-option: -%c", long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ struct datarec *array;
+
+ array = calloc(nr_cpus, sizeof(*array));
+ if (!array) {
+ fprintf(stderr, "Failed to allocate memory (nr_cpus: %u)\n",
+ nr_cpus);
+ return NULL;
+ }
+ return array;
+}
+
+static int map_entry_init(struct map_entry *e, __u64 pair)
+{
+ e->pair = pair;
+ INIT_HLIST_NODE(&e->node);
+ e->val.timestamp = gettime();
+ e->val.cpu = alloc_record_per_cpu();
+ if (!e->val.cpu)
+ return -ENOMEM;
+ return 0;
+}
+
+static void map_collect_percpu(struct datarec *values, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_processed = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = READ_ONCE(values[i].processed);
+ rec->cpu[i].dropped = READ_ONCE(values[i].dropped);
+ rec->cpu[i].issue = READ_ONCE(values[i].issue);
+ rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+ rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+ rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+
+ sum_processed += rec->cpu[i].processed;
+ sum_dropped += rec->cpu[i].dropped;
+ sum_issue += rec->cpu[i].issue;
+ sum_xdp_pass += rec->cpu[i].xdp_pass;
+ sum_xdp_drop += rec->cpu[i].xdp_drop;
+ sum_xdp_redirect += rec->cpu[i].xdp_redirect;
+ }
+
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
+}
+
+static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ __u32 batch, count = 32;
+ struct datarec *values;
+ bool init = false;
+ __u64 *keys;
+ int i, ret;
+
+ keys = calloc(count, sizeof(__u64));
+ if (!keys)
+ return -ENOMEM;
+ values = calloc(count * nr_cpus, sizeof(struct datarec));
+ if (!values) {
+ free(keys);
+ return -ENOMEM;
+ }
+
+ for (;;) {
+ bool exit = false;
+
+ ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
+ keys, values, &count, NULL);
+ if (ret < 0 && errno != ENOENT)
+ break;
+ if (errno == ENOENT)
+ exit = true;
+
+ init = true;
+ for (i = 0; i < count; i++) {
+ struct map_entry *e, *x = NULL;
+ __u64 pair = keys[i];
+ struct datarec *arr;
+
+ arr = &values[i * nr_cpus];
+ hash_for_each_possible(rec->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (!x) {
+ x = calloc(1, sizeof(*x));
+ if (!x)
+ goto cleanup;
+ if (map_entry_init(x, pair) < 0) {
+ free(x);
+ goto cleanup;
+ }
+ hash_add(rec->xmit_map, &x->node, pair);
+ }
+ map_collect_percpu(arr, &x->val);
+ }
+
+ if (exit)
+ break;
+ count = 32;
+ }
+
+ free(values);
+ free(keys);
+ return 0;
+cleanup:
+ free(values);
+ free(keys);
+ return -ENOMEM;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int i;
+
+ rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record));
+ if (!rec) {
+ fprintf(stderr, "Failed to allocate memory\n");
+ return NULL;
+ }
+
+ if (sample_mask & SAMPLE_RX_CNT) {
+ rec->rx_cnt.cpu = alloc_record_per_cpu();
+ if (!rec->rx_cnt.cpu) {
+ fprintf(stderr,
+ "Failed to allocate rx_cnt per-CPU array\n");
+ goto end_rec;
+ }
+ }
+ if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) {
+ rec->redir_err[i].cpu = alloc_record_per_cpu();
+ if (!rec->redir_err[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate redir_err per-CPU array for "
+ "\"%s\" case\n",
+ xdp_redirect_err_names[i]);
+ while (i--)
+ free(rec->redir_err[i].cpu);
+ goto end_rx_cnt;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ rec->kthread.cpu = alloc_record_per_cpu();
+ if (!rec->kthread.cpu) {
+ fprintf(stderr,
+ "Failed to allocate kthread per-CPU array\n");
+ goto end_redir;
+ }
+ }
+ if (sample_mask & SAMPLE_EXCEPTION_CNT) {
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ rec->exception[i].cpu = alloc_record_per_cpu();
+ if (!rec->exception[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate exception per-CPU array for "
+ "\"%s\" case\n",
+ action2str(i));
+ while (i--)
+ free(rec->exception[i].cpu);
+ goto end_kthread;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ rec->devmap_xmit.cpu = alloc_record_per_cpu();
+ if (!rec->devmap_xmit.cpu) {
+ fprintf(stderr,
+ "Failed to allocate devmap_xmit per-CPU array\n");
+ goto end_exception;
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ hash_init(rec->xmit_map);
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ for (i = 0; i < sample_n_cpus; i++) {
+ rec->enq[i].cpu = alloc_record_per_cpu();
+ if (!rec->enq[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate enqueue per-CPU array for "
+ "CPU %d\n",
+ i);
+ while (i--)
+ free(rec->enq[i].cpu);
+ goto end_devmap_xmit;
+ }
+ }
+ }
+
+ return rec;
+
+end_devmap_xmit:
+ free(rec->devmap_xmit.cpu);
+end_exception:
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(rec->exception[i].cpu);
+end_kthread:
+ free(rec->kthread.cpu);
+end_redir:
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(rec->redir_err[i].cpu);
+end_rx_cnt:
+ free(rec->rx_cnt.cpu);
+end_rec:
+ free(rec);
+ return NULL;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ struct hlist_node *tmp;
+ struct map_entry *e;
+ int i;
+
+ for (i = 0; i < sample_n_cpus; i++)
+ free(r->enq[i].cpu);
+ hash_for_each_safe(r->xmit_map, i, tmp, e, node) {
+ hash_del(&e->node);
+ free(e->val.cpu);
+ free(e);
+ }
+ free(r->devmap_xmit.cpu);
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->exception[i].cpu);
+ free(r->kthread.cpu);
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(r->redir_err[i].cpu);
+ free(r->rx_cnt.cpu);
+ free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double)period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double sample_round(double val)
+{
+ if (val - floor(val) < 0.5)
+ return floor(val);
+ return ceil(val);
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->dropped - p->dropped;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->info - p->info;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass,
+ double *xdp_drop, double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
+static void stats_get_rx_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus, struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->rx_cnt;
+ prev = &stats_prev->rx_cnt;
+ t = calc_period(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), ERR(err));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->rx_cnt.pps = pps;
+ out->rx_cnt.drop = drop;
+ out->rx_cnt.err = err;
+ out->totals.rx += pps;
+ out->totals.drop += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_cpumap_enqueue(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i, to_cpu;
+
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) {
+ rec = &stats_rec->enq[to_cpu];
+ prev = &stats_prev->enq[to_cpu];
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ if (pps > 0 || drop > 0) {
+ char str[64];
+
+ snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu);
+
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu);
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+ print_default(
+ " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+ }
+}
+
+static void stats_get_cpumap_remote(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ double xdp_pass, xdp_drop, xdp_redirect;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass || xdp_drop || xdp_redirect) {
+ print_err(xdp_drop,
+ " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ "xdp_stats", PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t);
+ if (!xdp_pass && !xdp_drop && !xdp_redirect)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+}
+
+static void stats_get_cpumap_kthread(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err,
+ "sched");
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), err, "sched");
+ }
+}
+
+static void stats_get_redirect_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps;
+ int i;
+
+ rec = &stats_rec->redir_err[0];
+ prev = &stats_prev->redir_err[0];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ if (!pps)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ out->redir_cnt.suc = pps;
+ out->totals.redir += pps;
+ }
+}
+
+static void stats_get_redirect_err_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, drop, sum = 0;
+ int rec_i, i;
+
+ for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) {
+ char str[64];
+
+ rec = &stats_rec->redir_err[rec_i];
+ prev = &stats_prev->redir_err[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ if (drop > 0 && !out) {
+ snprintf(str, sizeof(str),
+ sample_log_level & LL_DEFAULT ? "%s total" :
+ "%s",
+ xdp_redirect_err_names[rec_i]);
+ print_err(drop, " %-18s " FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ sum += drop;
+ }
+
+ if (out) {
+ out->redir_cnt.err = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_exception_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double t, drop, sum = 0;
+ struct record *rec, *prev;
+ int rec_i, i;
+
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ rec = &stats_rec->exception[rec_i];
+ prev = &stats_prev->exception[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ /* Fold out errors after heading */
+ sum += drop;
+
+ if (drop > 0 && !out) {
+ print_always(" %-18s " FMT_COLUMNf "\n",
+ action2str(rec_i), ERR(drop));
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n",
+ str, ERR(drop));
+ }
+ }
+ }
+
+ if (out) {
+ out->except_cnt.hits = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_devmap_xmit(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double pps, drop, info, err;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->devmap_xmit;
+ prev = &stats_prev->devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(r, p, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n",
+ str, XMIT(pps), DROP(drop), err, "drv_err/s",
+ info, "bulk-avg");
+ }
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ info = calc_info_pps(&rec->total, &prev->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->xmit_cnt.pps = pps;
+ out->xmit_cnt.drop = drop;
+ out->xmit_cnt.bavg = info;
+ out->xmit_cnt.err = err;
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out,
+ bool xmit_total)
+{
+ double pps, drop, info, err;
+ struct map_entry *entry;
+ struct record *r, *p;
+ double t;
+ int bkt;
+
+ hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
+ struct map_entry *e, *x = NULL;
+ char ifname_from[IFNAMSIZ];
+ char ifname_to[IFNAMSIZ];
+ const char *fstr, *tstr;
+ unsigned long prev_time;
+ struct record beg = {};
+ __u32 from_idx, to_idx;
+ char str[128];
+ __u64 pair;
+ int i;
+
+ prev_time = sample_interval * NANOSEC_PER_SEC;
+
+ pair = entry->pair;
+ from_idx = pair >> 32;
+ to_idx = pair & 0xFFFFFFFF;
+
+ r = &entry->val;
+ beg.timestamp = r->timestamp - prev_time;
+
+ /* Find matching entry from stats_prev map */
+ hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (x)
+ p = &x->val;
+ else
+ p = &beg;
+ t = calc_period(r, p);
+ pps = calc_pps(&r->total, &p->total, t);
+ drop = calc_drop_pps(&r->total, &p->total, t);
+ info = calc_info_pps(&r->total, &p->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&r->total, &p->total, t);
+
+ if (out) {
+ /* We are responsible for filling out totals */
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ continue;
+ }
+
+ fstr = tstr = NULL;
+ if (if_indextoname(from_idx, ifname_from))
+ fstr = ifname_from;
+ if (if_indextoname(to_idx, ifname_to))
+ tstr = ifname_to;
+
+ snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
+ tstr ?: "?");
+ /* Skip idle streams of redirection */
+ if (pps || drop || err) {
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
+ err, "drv_err/s", info, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *rc = &r->cpu[i];
+ struct datarec *pc, p_beg = {};
+ char str[64];
+
+ pc = p == &beg ? &p_beg : &p->cpu[i];
+
+ pps = calc_pps(rc, pc, t);
+ drop = calc_drop_pps(rc, pc, t);
+ err = calc_errs_pps(rc, pc, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(rc, pc, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps),
+ DROP(drop), err, "drv_err/s", info, "bulk-avg");
+ }
+ }
+}
+
+static void stats_print(const char *prefix, int mask, struct stats_record *r,
+ struct stats_record *p, struct sample_output *out)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ const char *str;
+
+ print_always("%-23s", prefix ?: "Summary");
+ if (mask & SAMPLE_RX_CNT)
+ print_always(FMT_COLUMNl, RX(out->totals.rx));
+ if (mask & SAMPLE_REDIRECT_CNT)
+ print_always(FMT_COLUMNl, REDIR(out->totals.redir));
+ printf(FMT_COLUMNl,
+ out->totals.err + out->totals.drop + out->totals.drop_xmit,
+ "err,drop/s");
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
+ mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ printf(FMT_COLUMNl, XMIT(out->totals.xmit));
+ printf("\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ?
+ "receive total" :
+ "receive";
+ print_err((out->rx_cnt.err || out->rx_cnt.drop),
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n",
+ str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop),
+ ERR(out->rx_cnt.err));
+
+ stats_get_rx_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ stats_get_cpumap_enqueue(r, p, nr_cpus);
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ stats_get_cpumap_kthread(r, p, nr_cpus);
+ stats_get_cpumap_remote(r, p, nr_cpus);
+ }
+
+ if (mask & SAMPLE_REDIRECT_CNT) {
+ str = out->redir_cnt.suc ? "redirect total" : "redirect";
+ print_default(" %-20s " FMT_COLUMNl "\n", str,
+ REDIR(out->redir_cnt.suc));
+
+ stats_get_redirect_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ?
+ "redirect_err total" :
+ "redirect_err";
+ print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str,
+ ERR(out->redir_cnt.err));
+
+ stats_get_redirect_err_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ str = out->except_cnt.hits ? "xdp_exception total" :
+ "xdp_exception";
+
+ print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str,
+ HITS(out->except_cnt.hits));
+
+ stats_get_exception_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
+ "devmap_xmit total" :
+ "devmap_xmit";
+
+ print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
+ __COLUMN(".2f") "\n",
+ str, XMIT(out->xmit_cnt.pps),
+ DROP(out->xmit_cnt.drop), out->xmit_cnt.err,
+ "drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
+
+ stats_get_devmap_xmit(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+
+ if (sample_log_level & LL_DEFAULT ||
+ ((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
+ sample_err_exp = false;
+ printf("\n");
+ }
+}
+
+int sample_setup_maps(struct bpf_map **maps)
+{
+ sample_n_cpus = libbpf_num_possible_cpus();
+
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ sample_map[i] = maps[i];
+
+ switch (i) {
+ case MAP_RX:
+ case MAP_CPUMAP_KTHREAD:
+ case MAP_DEVMAP_XMIT:
+ sample_map_count[i] = sample_n_cpus;
+ break;
+ case MAP_REDIRECT_ERR:
+ sample_map_count[i] =
+ XDP_REDIRECT_ERR_MAX * sample_n_cpus;
+ break;
+ case MAP_EXCEPTION:
+ sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus;
+ case MAP_CPUMAP_ENQUEUE:
+ sample_map_count[i] = sample_n_cpus * sample_n_cpus;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0)
+ return -errno;
+ }
+ sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
+ return 0;
+}
+
+static int sample_setup_maps_mappings(void)
+{
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ size_t size = sample_map_count[i] * sizeof(struct datarec);
+
+ sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(sample_map[i]), 0);
+ if (sample_mmap[i] == MAP_FAILED)
+ return -errno;
+ }
+ return 0;
+}
+
+int __sample_init(int mask)
+{
+ sigset_t st;
+
+ sigemptyset(&st);
+ sigaddset(&st, SIGQUIT);
+ sigaddset(&st, SIGINT);
+ sigaddset(&st, SIGTERM);
+
+ if (sigprocmask(SIG_BLOCK, &st, NULL) < 0)
+ return -errno;
+
+ sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK);
+ if (sample_sig_fd < 0)
+ return -errno;
+
+ sample_mask = mask;
+
+ return sample_setup_maps_mappings();
+}
+
+static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
+{
+ __u32 cur_prog_id = 0;
+ int ret;
+
+ if (prog_id) {
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
+ if (ret < 0)
+ return -errno;
+
+ if (prog_id != cur_prog_id) {
+ print_always(
+ "Program on ifindex %d does not match installed "
+ "program, skipping unload\n",
+ ifindex);
+ return -ENOENT;
+ }
+ }
+
+ return bpf_xdp_detach(ifindex, xdp_flags, NULL);
+}
+
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force)
+{
+ int ret, xdp_flags = 0;
+ __u32 prog_id = 0;
+
+ if (sample_xdp_cnt == 32) {
+ fprintf(stderr,
+ "Total limit for installed XDP programs in a sample reached\n");
+ return -ENOTSUP;
+ }
+
+ xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
+ xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+ ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to install program \"%s\" on ifindex %d, mode = %s, "
+ "force = %s: %s\n",
+ bpf_program__name(xdp_prog), ifindex,
+ generic ? "skb" : "native", force ? "true" : "false",
+ strerror(-ret));
+ return ret;
+ }
+
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to get XDP program id for ifindex %d, removing program: %s\n",
+ ifindex, strerror(errno));
+ __sample_remove_xdp(ifindex, 0, xdp_flags);
+ return ret;
+ }
+ sample_xdp_progs[sample_xdp_cnt++] =
+ (struct xdp_desc){ ifindex, prog_id, xdp_flags };
+
+ return 0;
+}
+
+static void sample_summary_print(void)
+{
+ double num = sample_out.rx_cnt.num;
+
+ if (sample_out.totals.rx) {
+ double pkts = sample_out.totals.rx;
+
+ print_always(" Packets received : %'-10llu\n",
+ sample_out.totals.rx);
+ print_always(" Average packets/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+ if (sample_out.totals.redir) {
+ double pkts = sample_out.totals.redir;
+
+ print_always(" Packets redirected : %'-10llu\n",
+ sample_out.totals.redir);
+ print_always(" Average redir/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+ if (sample_out.totals.drop)
+ print_always(" Rx dropped : %'-10llu\n",
+ sample_out.totals.drop);
+ if (sample_out.totals.drop_xmit)
+ print_always(" Tx dropped : %'-10llu\n",
+ sample_out.totals.drop_xmit);
+ if (sample_out.totals.err)
+ print_always(" Errors recorded : %'-10llu\n",
+ sample_out.totals.err);
+ if (sample_out.totals.xmit) {
+ double pkts = sample_out.totals.xmit;
+
+ print_always(" Packets transmitted : %'-10llu\n",
+ sample_out.totals.xmit);
+ print_always(" Average transmit/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+}
+
+void sample_exit(int status)
+{
+ size_t size;
+
+ for (int i = 0; i < NUM_MAP; i++) {
+ size = sample_map_count[i] * sizeof(**sample_mmap);
+ munmap(sample_mmap[i], size);
+ }
+ while (sample_xdp_cnt--) {
+ int i = sample_xdp_cnt, ifindex, xdp_flags;
+ __u32 prog_id;
+
+ prog_id = sample_xdp_progs[i].prog_id;
+ ifindex = sample_xdp_progs[i].ifindex;
+ xdp_flags = sample_xdp_progs[i].flags;
+
+ __sample_remove_xdp(ifindex, prog_id, xdp_flags);
+ }
+ sample_summary_print();
+ close(sample_sig_fd);
+ exit(status);
+}
+
+static int sample_stats_collect(struct stats_record *rec)
+{
+ int i;
+
+ if (sample_mask & SAMPLE_RX_CNT)
+ map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt);
+
+ if (sample_mask & SAMPLE_REDIRECT_CNT)
+ map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]);
+
+ if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) {
+ for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus],
+ &rec->redir_err[i]);
+ }
+
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ for (i = 0; i < sample_n_cpus; i++)
+ map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus],
+ &rec->enq[i]);
+
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT)
+ map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD],
+ &rec->kthread);
+
+ if (sample_mask & SAMPLE_EXCEPTION_CNT)
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
+ &rec->exception[i]);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
+ map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
+ if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void sample_summary_update(struct sample_output *out)
+{
+ sample_out.totals.rx += out->totals.rx;
+ sample_out.totals.redir += out->totals.redir;
+ sample_out.totals.drop += out->totals.drop;
+ sample_out.totals.drop_xmit += out->totals.drop_xmit;
+ sample_out.totals.err += out->totals.err;
+ sample_out.totals.xmit += out->totals.xmit;
+ sample_out.rx_cnt.num++;
+}
+
+static void sample_stats_print(int mask, struct stats_record *cur,
+ struct stats_record *prev, char *prog_name)
+{
+ struct sample_output out = {};
+
+ if (mask & SAMPLE_RX_CNT)
+ stats_get_rx_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_CNT)
+ stats_get_redirect_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_ERR_CNT)
+ stats_get_redirect_err_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_EXCEPTION_CNT)
+ stats_get_exception_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT)
+ stats_get_devmap_xmit(cur, prev, 0, &out);
+ else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(cur, prev, 0, &out,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+ sample_summary_update(&out);
+
+ stats_print(prog_name, mask, cur, prev, &out);
+}
+
+void sample_switch_mode(void)
+{
+ sample_log_level ^= LL_DEBUG - 1;
+}
+
+static int sample_signal_cb(void)
+{
+ struct signalfd_siginfo si;
+ int r;
+
+ r = read(sample_sig_fd, &si, sizeof(si));
+ if (r < 0)
+ return -errno;
+
+ switch (si.ssi_signo) {
+ case SIGQUIT:
+ sample_switch_mode();
+ printf("\n");
+ break;
+ default:
+ printf("\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Pointer swap trick */
+static void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static int sample_timer_cb(int timerfd, struct stats_record **rec,
+ struct stats_record **prev)
+{
+ char line[64] = "Summary";
+ int ret;
+ __u64 t;
+
+ ret = read(timerfd, &t, sizeof(t));
+ if (ret < 0)
+ return -errno;
+
+ swap(prev, rec);
+ ret = sample_stats_collect(*rec);
+ if (ret < 0)
+ return ret;
+
+ if (sample_xdp_cnt == 2 && !(sample_mask & SAMPLE_SKIP_HEADING)) {
+ char fi[IFNAMSIZ];
+ char to[IFNAMSIZ];
+ const char *f, *t;
+
+ f = t = NULL;
+ if (if_indextoname(sample_xdp_progs[0].ifindex, fi))
+ f = fi;
+ if (if_indextoname(sample_xdp_progs[1].ifindex, to))
+ t = to;
+
+ snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+ }
+
+ sample_stats_print(sample_mask, *rec, *prev, line);
+ return 0;
+}
+
+int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+{
+ struct timespec ts = { interval, 0 };
+ struct itimerspec its = { ts, ts };
+ struct stats_record *rec, *prev;
+ struct pollfd pfd[2] = {};
+ int timerfd, ret;
+
+ if (!interval) {
+ fprintf(stderr, "Incorrect interval 0\n");
+ return -EINVAL;
+ }
+ sample_interval = interval;
+ /* Pretty print numbers */
+ setlocale(LC_NUMERIC, "en_US.UTF-8");
+
+ timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (timerfd < 0)
+ return -errno;
+ timerfd_settime(timerfd, 0, &its, NULL);
+
+ pfd[0].fd = sample_sig_fd;
+ pfd[0].events = POLLIN;
+
+ pfd[1].fd = timerfd;
+ pfd[1].events = POLLIN;
+
+ ret = -ENOMEM;
+ rec = alloc_stats_record();
+ if (!rec)
+ goto end;
+ prev = alloc_stats_record();
+ if (!prev)
+ goto end_rec;
+
+ ret = sample_stats_collect(rec);
+ if (ret < 0)
+ goto end_rec_prev;
+
+ for (;;) {
+ ret = poll(pfd, 2, -1);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (pfd[0].revents & POLLIN)
+ ret = sample_signal_cb();
+ else if (pfd[1].revents & POLLIN)
+ ret = sample_timer_cb(timerfd, &rec, &prev);
+
+ if (ret)
+ break;
+
+ if (post_cb)
+ post_cb(ctx);
+ }
+
+end_rec_prev:
+ free_stats_record(prev);
+end_rec:
+ free_stats_record(rec);
+end:
+ close(timerfd);
+
+ return ret;
+}
+
+const char *get_driver_name(int ifindex)
+{
+ struct ethtool_drvinfo drv = {};
+ char ifname[IF_NAMESIZE];
+ static char drvname[32];
+ struct ifreq ifr = {};
+ int fd, r = 0;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return "[error]";
+
+ if (!if_indextoname(ifindex, ifname))
+ goto end;
+
+ drv.cmd = ETHTOOL_GDRVINFO;
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ifr.ifr_data = (void *)&drv;
+
+ r = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (r)
+ goto end;
+
+ safe_strncpy(drvname, drv.driver, sizeof(drvname));
+
+ close(fd);
+ return drvname;
+
+end:
+ r = errno;
+ close(fd);
+ return r == EOPNOTSUPP ? "loopback" : "[error]";
+}
+
+int get_mac_addr(int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr = {};
+ int fd, r;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (!if_indextoname(ifindex, ifname)) {
+ r = -errno;
+ goto end;
+ }
+
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ r = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (r) {
+ r = -errno;
+ goto end;
+ }
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+
+end:
+ close(fd);
+ return r;
+}
+
+__attribute__((constructor)) static void sample_ctor(void)
+{
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ /* Just exit, nothing to cleanup right now */
+ exit(EXIT_FAIL_BPF);
+ }
+}
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
new file mode 100644
index 000000000000..f45051679977
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.h
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_SAMPLE_USER_H
+#define XDP_SAMPLE_USER_H
+
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "xdp_sample_shared.h"
+
+enum stats_mask {
+ _SAMPLE_REDIRECT_MAP = 1U << 0,
+ SAMPLE_RX_CNT = 1U << 1,
+ SAMPLE_REDIRECT_ERR_CNT = 1U << 2,
+ SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3,
+ SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4,
+ SAMPLE_EXCEPTION_CNT = 1U << 5,
+ SAMPLE_DEVMAP_XMIT_CNT = 1U << 6,
+ SAMPLE_REDIRECT_CNT = 1U << 7,
+ SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
+ SAMPLE_SKIP_HEADING = 1U << 9,
+};
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+int sample_setup_maps(struct bpf_map **maps);
+int __sample_init(int mask);
+void sample_exit(int status);
+int sample_run(int interval, void (*post_cb)(void *), void *ctx);
+
+void sample_switch_mode(void);
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force);
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error);
+
+const char *get_driver_name(int ifindex);
+int get_mac_addr(int ifindex, void *mac_addr);
+
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+#endif
+__attribute__((unused))
+static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+{
+ if (!size)
+ return dst;
+ strncpy(dst, src, size - 1);
+ dst[size - 1] = '\0';
+ return dst;
+}
+#pragma GCC diagnostic pop
+
+#define __attach_tp(name) \
+ ({ \
+ if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\
+ return -EINVAL; \
+ skel->links.name = bpf_program__attach(skel->progs.name); \
+ if (!skel->links.name) \
+ return -errno; \
+ })
+
+#define sample_init_pre_load(skel) \
+ ({ \
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \
+ sample_setup_maps((struct bpf_map *[]){ \
+ skel->maps.rx_cnt, skel->maps.redir_err_cnt, \
+ skel->maps.cpumap_enqueue_cnt, \
+ skel->maps.cpumap_kthread_cnt, \
+ skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \
+ skel->maps.devmap_xmit_cnt_multi }); \
+ })
+
+#define DEFINE_SAMPLE_INIT(name) \
+ static int sample_init(struct name *skel, int mask) \
+ { \
+ int ret; \
+ ret = __sample_init(mask); \
+ if (ret < 0) \
+ return ret; \
+ if (mask & SAMPLE_REDIRECT_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map); \
+ if (mask & SAMPLE_REDIRECT_CNT) \
+ __attach_tp(tp_xdp_redirect); \
+ if (mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map_err); \
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) \
+ __attach_tp(tp_xdp_redirect_err); \
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \
+ __attach_tp(tp_xdp_cpumap_enqueue); \
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) \
+ __attach_tp(tp_xdp_cpumap_kthread); \
+ if (mask & SAMPLE_EXCEPTION_CNT) \
+ __attach_tp(tp_xdp_exception); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) \
+ __attach_tp(tp_xdp_devmap_xmit); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \
+ __attach_tp(tp_xdp_devmap_xmit_multi); \
+ return 0; \
+ }
+
+#endif
diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h
index dd12cc35110f..be839892caff 100644
--- a/samples/bpf/xdp_tx_iptunnel_common.h
+++ b/samples/bpf/xdp_tx_iptunnel_common.h
@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index 0f4f6e8c8611..0e2bca3a3fff 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -16,22 +16,22 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "xdp_tx_iptunnel_common.h"
-struct bpf_map_def SEC("maps") rxcnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = 256,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 256);
+} rxcnt SEC(".maps");
-struct bpf_map_def SEC("maps") vip2tnl = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct vip),
- .value_size = sizeof(struct iptnl_info),
- .max_entries = MAX_IPTNL_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct vip);
+ __type(value, struct iptnl_info);
+ __uint(max_entries, MAX_IPTNL_ENTRIES);
+} vip2tnl SEC(".maps");
static __always_inline void count_tx(u32 protocol)
{
@@ -212,7 +212,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp.frags")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 715cd12eaca5..7e4b2f7108a6 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/if_link.h>
@@ -12,25 +9,39 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
+#include <net/if.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
-#include "bpf_load.h"
-#include "libbpf.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include "bpf_util.h"
#include "xdp_tx_iptunnel_common.h"
#define STATS_INTERVAL_S 2U
static int ifindex = -1;
-static __u32 xdp_flags = 0;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int rxcnt_map_fd;
+static __u32 prog_id;
static void int_exit(int sig)
{
- if (ifindex > -1)
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ __u32 curr_prog_id = 0;
+
+ if (ifindex > -1) {
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
+ exit(1);
+ }
+ if (prog_id == curr_prog_id)
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
+ else if (!curr_prog_id)
+ printf("couldn't find a prog id on a given iface\n");
+ else
+ printf("program on interface changed, not removing\n");
+ }
exit(0);
}
@@ -53,7 +64,8 @@ static void poll_stats(unsigned int kill_after_s)
for (proto = 0; proto < nr_protos; proto++) {
__u64 sum = 0;
- assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0);
+ assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto,
+ values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[proto][i]);
@@ -71,7 +83,7 @@ static void usage(const char *cmd)
"in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
"is used to select packets to encapsulate\n\n");
printf("Usage: %s [...]\n", cmd);
- printf(" -i <ifindex> Interface Index\n");
+ printf(" -i <ifname|ifindex> Interface\n");
printf(" -a <vip-service-address> IPv4 or IPv6\n");
printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
printf(" -s <source-ip> Used in the IPTunnel header\n");
@@ -81,6 +93,7 @@ static void usage(const char *cmd)
printf(" -P <IP-Protocol> Default is TCP\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
+ printf(" -F Force loading the XDP prog\n");
printf(" -h Display this help\n");
}
@@ -138,16 +151,19 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port)
int main(int argc, char **argv)
{
+ int min_port = 0, max_port = 0, vip2tnl_map_fd;
+ const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
unsigned char opt_flags[256] = {};
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
- const char *optstr = "i:a:p:s:d:m:T:P:SNh";
- int min_port = 0, max_port = 0;
struct iptnl_info tnl = {};
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
+ struct bpf_object *obj;
struct vip vip = {};
char filename[256];
- int opt;
- int i;
+ int opt, prog_fd;
+ int i, err;
tnl.family = AF_UNSPEC;
vip.protocol = IPPROTO_TCP;
@@ -162,7 +178,9 @@ int main(int argc, char **argv)
switch (opt) {
case 'i':
- ifindex = atoi(optarg);
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = atoi(optarg);
break;
case 'a':
vip.family = parse_ipstr(optarg, vip.daddr.v6);
@@ -209,7 +227,10 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(argv[0]);
@@ -218,6 +239,9 @@ int main(int argc, char **argv)
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
@@ -226,20 +250,31 @@ int main(int argc, char **argv)
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+ if (!ifindex) {
+ fprintf(stderr, "Invalid ifname\n");
return 1;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("bpf_object__load(): %s\n", strerror(errno));
return 1;
}
+ prog_fd = bpf_program__fd(prog);
- if (!prog_fd[0]) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+ vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
+ if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) {
+ printf("bpf_object__find_map_fd_by_name failed\n");
return 1;
}
@@ -248,20 +283,28 @@ int main(int argc, char **argv)
while (min_port <= max_port) {
vip.dport = htons(min_port++);
- if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
+ if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl,
+ BPF_NOEXIST)) {
perror("bpf_map_update_elem(&vip2tnl)");
return 1;
}
}
- if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err) {
+ printf("can't get prog info - %s\n", strerror(errno));
+ return err;
+ }
+ prog_id = info.id;
+
poll_stats(kill_after_s);
- set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
return 0;
}