summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/mips/include/uapi/asm/perf_regs.h40
-rw-r--r--tools/arch/x86/include/asm/asm.h193
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h7
-rw-r--r--tools/arch/x86/include/asm/irq_vectors.h7
-rw-r--r--tools/arch/x86/include/asm/nops.h24
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h2
-rw-r--r--tools/bootconfig/include/linux/bootconfig.h4
-rw-r--r--tools/bootconfig/main.c1
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst2
-rw-r--r--tools/bpf/bpftool/Makefile5
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool6
-rw-r--r--tools/bpf/bpftool/cgroup.c3
-rw-r--r--tools/bpf/bpftool/gen.c421
-rw-r--r--tools/bpf/bpftool/main.c11
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/bpf/bpftool/prog.c109
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c3
-rw-r--r--tools/bpf/resolve_btfids/main.c3
-rw-r--r--tools/build/Makefile.build22
-rwxr-xr-xtools/debugging/kernel-chktaint2
-rw-r--r--tools/include/linux/bits.h2
-rw-r--r--tools/include/linux/const.h8
-rw-r--r--tools/include/uapi/asm-generic/unistd.h3
-rw-r--r--tools/include/uapi/linux/bpf.h82
-rw-r--r--tools/include/uapi/linux/fs.h2
-rw-r--r--tools/include/uapi/linux/in.h3
-rw-r--r--tools/include/uapi/linux/kvm.h5
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/prctl.h8
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile18
-rw-r--r--tools/lib/bpf/README.rst168
-rw-r--r--tools/lib/bpf/bpf.c179
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h41
-rw-r--r--tools/lib/bpf/bpf_helpers.h66
-rw-r--r--tools/lib/bpf/bpf_prog_linfo.c18
-rw-r--r--tools/lib/bpf/bpf_tracing.h108
-rw-r--r--tools/lib/bpf/btf.c302
-rw-r--r--tools/lib/bpf/btf_dump.c14
-rw-r--r--tools/lib/bpf/gen_loader.c729
-rw-r--r--tools/lib/bpf/libbpf.c965
-rw-r--r--tools/lib/bpf/libbpf.h68
-rw-r--r--tools/lib/bpf/libbpf.map13
-rw-r--r--tools/lib/bpf/libbpf_errno.c7
-rw-r--r--tools/lib/bpf/libbpf_internal.h66
-rw-r--r--tools/lib/bpf/libbpf_legacy.h59
-rw-r--r--tools/lib/bpf/linker.c41
-rw-r--r--tools/lib/bpf/netlink.c572
-rw-r--r--tools/lib/bpf/nlattr.c2
-rw-r--r--tools/lib/bpf/nlattr.h60
-rw-r--r--tools/lib/bpf/ringbuf.c26
-rw-r--r--tools/lib/bpf/skel_internal.h123
-rw-r--r--tools/lib/bpf/xsk.c2
-rw-r--r--tools/lib/traceevent/plugins/plugin_kvm.c4
-rw-r--r--tools/objtool/arch/x86/decode.c6
-rw-r--r--tools/objtool/arch/x86/include/arch/special.h1
-rw-r--r--tools/objtool/check.c38
-rw-r--r--tools/objtool/elf.c135
-rw-r--r--tools/objtool/include/objtool/elf.h18
-rw-r--r--tools/objtool/include/objtool/objtool.h3
-rw-r--r--tools/objtool/include/objtool/special.h1
-rw-r--r--tools/objtool/special.c14
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt6
-rw-r--r--tools/perf/Documentation/perf-script.txt7
-rw-r--r--tools/perf/Makefile.config1
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--tools/perf/builtin-buildid-list.c3
-rw-r--r--tools/perf/builtin-record.c6
-rw-r--r--tools/perf/builtin-stat.c13
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/perf.c4
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/cache.json30
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/floating_point.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/frontend.json124
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/locks.json4
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/marked.json61
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/memory.json79
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/others.json133
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pipeline.json135
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pmc.json8
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/translation.json22
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py12
-rw-r--r--tools/perf/tests/attr/base-record2
-rw-r--r--tools/perf/tests/pfm.c4
-rwxr-xr-xtools/perf/tests/shell/stat_bpf_counters.sh4
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/util/bpf_counter.c10
-rw-r--r--tools/perf/util/dwarf-aux.c8
-rw-r--r--tools/perf/util/env.c1
-rw-r--r--tools/perf/util/event.h2
-rw-r--r--tools/perf/util/evlist.c3
-rw-r--r--tools/perf/util/evsel.c1
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c6
-rw-r--r--tools/perf/util/intel-pt.c6
-rw-r--r--tools/perf/util/machine.c3
-rw-r--r--tools/perf/util/metricgroup.c14
-rw-r--r--tools/perf/util/parse-events.c13
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/perf_api_probe.c10
-rw-r--r--tools/perf/util/perf_api_probe.h1
-rw-r--r--tools/perf/util/pfm.c11
-rw-r--r--tools/perf/util/probe-finder.c3
-rw-r--r--tools/perf/util/session.c1
-rw-r--r--tools/perf/util/stat-display.c8
-rw-r--r--tools/perf/util/symbol-elf.c1
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c18
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c15
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst.h2
-rw-r--r--tools/scripts/Makefile.include30
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/Makefile19
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs3
-rw-r--r--tools/testing/selftests/bpf/README.rst19
-rw-r--r--tools/testing/selftests/bpf/bench.c1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c6
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c2
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_module.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c288
-rw-r--r--tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c559
-rw-r--r--tools/testing/selftests/bpf/prog_tests/obj_name.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_bpf.c395
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c785
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/syscall.c121
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_args.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_and_delete.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_migrate_reuseport.c135
-rw-r--r--tools/testing/selftests/bpf/progs/test_rdonly_maps.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_bpf.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_peer.c56
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c6
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c94
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh1
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c8
-rw-r--r--tools/testing/selftests/bpf/test_maps.c185
-rw-r--r--tools/testing/selftests/bpf/test_progs.c3
-rw-r--r--tools/testing/selftests/bpf/test_progs.h9
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_redirect.sh216
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c7
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh204
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c14
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/stack_ptr.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c15
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh3
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_scale.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh69
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh24
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh4
-rw-r--r--tools/testing/selftests/exec/Makefile6
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore2
-rw-r--r--tools/testing/selftests/futex/functional/Makefile7
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c136
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c171
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c126
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh6
-rw-r--r--tools/testing/selftests/kvm/.gitignore8
-rw-r--r--tools/testing/selftests/kvm/Makefile16
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c250
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c3
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c439
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c174
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c5
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c34
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h83
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h58
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h12
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h91
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h (renamed from tools/testing/selftests/kvm/include/evmcs.h)2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/hyperv.h185
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h66
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h11
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c237
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/handlers.S126
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c131
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c2
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c6
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c404
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h17
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c8
-rw-r--r--tools/testing/selftests/kvm/lib/rbtree.c1
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c17
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c51
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/apic.c45
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c368
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c9
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c52
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c18
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1037
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c6
-rw-r--r--tools/testing/selftests/kvm/steal_time.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/emulator_error_test.c219
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c79
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_cpuid_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c649
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c147
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c242
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c65
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c2
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config1
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c88
-rw-r--r--tools/testing/selftests/nci/.gitignore1
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py8
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh12
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh364
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh32
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh456
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh458
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh2
-rwxr-xr-xtools/testing/selftests/net/icmp.sh74
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh8
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c125
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh83
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh180
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh4
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh3
-rw-r--r--tools/testing/selftests/net/so_netns_cookie.c61
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh573
-rw-r--r--tools/testing/selftests/net/tls.c87
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh17
-rwxr-xr-xtools/testing/selftests/net/veth.sh5
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh221
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c7
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c14
-rw-r--r--tools/testing/selftests/proc/.gitignore1
-rw-r--r--tools/testing/selftests/rlimits/.gitignore2
-rw-r--r--tools/testing/selftests/rlimits/Makefile6
-rw-r--r--tools/testing/selftests/rlimits/config1
-rw-r--r--tools/testing/selftests/rlimits/rlimits-per-userns.c161
-rw-r--r--tools/testing/selftests/sched/.gitignore1
-rw-r--r--tools/testing/selftests/sched/Makefile14
-rw-r--r--tools/testing/selftests/sched/config1
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c338
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c10
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c78
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py42
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json45
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json28
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json8
-rw-r--r--tools/testing/selftests/vm/gup_test.c96
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh1
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config1
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c491
-rw-r--r--tools/testing/vsock/util.c32
-rw-r--r--tools/testing/vsock/util.h3
-rw-r--r--tools/testing/vsock/vsock_test.c116
-rw-r--r--tools/vm/page_owner_sort.c4
387 files changed, 17463 insertions, 3325 deletions
diff --git a/tools/arch/mips/include/uapi/asm/perf_regs.h b/tools/arch/mips/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..d0f4ecd616cf
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_MIPS_PERF_REGS_H
+#define _ASM_MIPS_PERF_REGS_H
+
+enum perf_event_mips_regs {
+ PERF_REG_MIPS_PC,
+ PERF_REG_MIPS_R1,
+ PERF_REG_MIPS_R2,
+ PERF_REG_MIPS_R3,
+ PERF_REG_MIPS_R4,
+ PERF_REG_MIPS_R5,
+ PERF_REG_MIPS_R6,
+ PERF_REG_MIPS_R7,
+ PERF_REG_MIPS_R8,
+ PERF_REG_MIPS_R9,
+ PERF_REG_MIPS_R10,
+ PERF_REG_MIPS_R11,
+ PERF_REG_MIPS_R12,
+ PERF_REG_MIPS_R13,
+ PERF_REG_MIPS_R14,
+ PERF_REG_MIPS_R15,
+ PERF_REG_MIPS_R16,
+ PERF_REG_MIPS_R17,
+ PERF_REG_MIPS_R18,
+ PERF_REG_MIPS_R19,
+ PERF_REG_MIPS_R20,
+ PERF_REG_MIPS_R21,
+ PERF_REG_MIPS_R22,
+ PERF_REG_MIPS_R23,
+ PERF_REG_MIPS_R24,
+ PERF_REG_MIPS_R25,
+ PERF_REG_MIPS_R26,
+ PERF_REG_MIPS_R27,
+ PERF_REG_MIPS_R28,
+ PERF_REG_MIPS_R29,
+ PERF_REG_MIPS_R30,
+ PERF_REG_MIPS_R31,
+ PERF_REG_MIPS_MAX = PERF_REG_MIPS_R31 + 1,
+};
+#endif /* _ASM_MIPS_PERF_REGS_H */
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
new file mode 100644
index 000000000000..3ad3da9a7d97
--- /dev/null
+++ b/tools/arch/x86/include/asm/asm.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ASM_H
+#define _ASM_X86_ASM_H
+
+#ifdef __ASSEMBLY__
+# define __ASM_FORM(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
+#else
+#include <linux/stringify.h>
+# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) ","
+#endif
+
+#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
+#ifndef __x86_64__
+/* 32 bit */
+# define __ASM_SEL(a,b) __ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
+#else
+/* 64 bit */
+# define __ASM_SEL(a,b) __ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
+#endif
+
+#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \
+ inst##q##__VA_ARGS__)
+#define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg)
+
+#define _ASM_PTR __ASM_SEL(.long, .quad)
+#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
+
+#define _ASM_MOV __ASM_SIZE(mov)
+#define _ASM_INC __ASM_SIZE(inc)
+#define _ASM_DEC __ASM_SIZE(dec)
+#define _ASM_ADD __ASM_SIZE(add)
+#define _ASM_SUB __ASM_SIZE(sub)
+#define _ASM_XADD __ASM_SIZE(xadd)
+#define _ASM_MUL __ASM_SIZE(mul)
+
+#define _ASM_AX __ASM_REG(ax)
+#define _ASM_BX __ASM_REG(bx)
+#define _ASM_CX __ASM_REG(cx)
+#define _ASM_DX __ASM_REG(dx)
+#define _ASM_SP __ASM_REG(sp)
+#define _ASM_BP __ASM_REG(bp)
+#define _ASM_SI __ASM_REG(si)
+#define _ASM_DI __ASM_REG(di)
+
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1 _ASM_AX
+#define _ASM_ARG2 _ASM_DX
+#define _ASM_ARG3 _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1 _ASM_DI
+#define _ASM_ARG2 _ASM_SI
+#define _ASM_ARG3 _ASM_DX
+#define _ASM_ARG4 _ASM_CX
+#define _ASM_ARG5 r8
+#define _ASM_ARG6 r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
+/*
+ * Macros to generate condition code outputs from inline assembly,
+ * The output operand must be type "bool".
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
+# define CC_OUT(c) "=@cc" #c
+#else
+# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
+# define CC_OUT(c) [_cc_ ## c] "=qm"
+#endif
+
+#ifdef __KERNEL__
+
+/* Exception table entry */
+#ifdef __ASSEMBLY__
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ .pushsection "__ex_table","a" ; \
+ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+ .long (handler) - . ; \
+ .popsection
+
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# ifdef CONFIG_KPROBES
+# define _ASM_NOKPROBE(entry) \
+ .pushsection "_kprobe_blacklist","aw" ; \
+ _ASM_ALIGN ; \
+ _ASM_PTR (entry); \
+ .popsection
+# else
+# define _ASM_NOKPROBE(entry)
+# endif
+
+#else /* ! __ASSEMBLY__ */
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
+ " .popsection\n"
+
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_X86_ASM_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index b7dd944dc867..8f28fafa98b3 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,11 +56,8 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_IOMMU_SUPPORT
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
+/* Force disable because it's broken beyond repair */
+#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f..43dcb9284208 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
* This file enumerates the exact layout of them:
*/
+/* This is used as an interrupt vector when programming the APIC. */
#define NMI_VECTOR 0x02
-#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
*/
#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
@@ -114,6 +114,9 @@
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
+#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
/*
* Size the maximum number of interrupts.
*
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index c1e5e818ba16..c5573eaa5bb9 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_NOPS_H
#define _ASM_X86_NOPS_H
+#include <asm/asm.h>
+
/*
* Define nops for use with alternative() and for tracing.
*/
@@ -57,20 +59,14 @@
#endif /* CONFIG_64BIT */
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
-
-#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
#define ASM_NOP_MAX 8
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..0662f644aad9 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags;
} smm;
+ __u16 pad;
+
__u32 flags;
__u64 preemption_timer_deadline;
};
diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h
index 078cbd2ba651..de7f30f99af3 100644
--- a/tools/bootconfig/include/linux/bootconfig.h
+++ b/tools/bootconfig/include/linux/bootconfig.h
@@ -4,4 +4,8 @@
#include "../../../../include/linux/bootconfig.h"
+#ifndef fallthrough
+# define fallthrough
+#endif
+
#endif
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 7362bef1a368..6cd6080cac04 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -399,6 +399,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
}
/* TODO: Ensure the @path is initramfs/initrd image */
if (fstat(fd, &stat) < 0) {
+ ret = -errno;
pr_err("Failed to get the size of %s\n", path);
goto out;
}
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 790944c35602..baee8591ac76 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -30,7 +30,8 @@ CGROUP COMMANDS
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
+| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
+| **sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -106,6 +107,7 @@ DESCRIPTION
**getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
**getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
**getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
+ **sock_release** closing an userspace inet socket (since 5.9).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 358c7309d419..fe1b38e7e887 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -44,7 +44,7 @@ PROG COMMANDS
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-| **cgroup/getsockopt** | **cgroup/setsockopt** |
+| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index b3073ae84018..d73232be1e99 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -136,7 +136,7 @@ endif
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
@@ -180,6 +180,9 @@ endif
CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
+$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+ $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index d67518bcbd44..cc33c5824a2f 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -478,7 +478,7 @@ _bpftool()
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
- cgroup/setsockopt struct_ops \
+ cgroup/setsockopt cgroup/sock_release struct_ops \
fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
@@ -1021,7 +1021,7 @@ _bpftool()
device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
getpeername4 getpeername6 getsockname4 getsockname6 \
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
- setsockopt'
+ setsockopt sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
case $prev in
@@ -1032,7 +1032,7 @@ _bpftool()
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
post_bind4|post_bind6|connect4|connect6|getpeername4|\
getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
- recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
+ recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index d901cc1b904a..6e53b1d393f4 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -28,7 +28,8 @@
" connect6 | getpeername4 | getpeername6 |\n" \
" getsockname4 | getsockname6 | sendmsg4 |\n" \
" sendmsg6 | recvmsg4 | recvmsg6 |\n" \
- " sysctl | getsockopt | setsockopt }"
+ " sysctl | getsockopt | setsockopt |\n" \
+ " sock_release }"
static unsigned int query_flags;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 31ade77f5ef8..1d71ff8c52fa 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -18,6 +18,7 @@
#include <sys/stat.h>
#include <sys/mman.h>
#include <bpf/btf.h>
+#include <bpf/bpf_gen_internal.h>
#include "json_writer.h"
#include "main.h"
@@ -106,8 +107,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
if (strcmp(sec_name, ".data") == 0) {
sec_ident = "data";
+ strip_mods = true;
} else if (strcmp(sec_name, ".bss") == 0) {
sec_ident = "bss";
+ strip_mods = true;
} else if (strcmp(sec_name, ".rodata") == 0) {
sec_ident = "rodata";
strip_mods = true;
@@ -129,6 +132,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
int need_off = sec_var->offset, align_off, align;
__u32 var_type_id = var->type;
+ /* static variables are not exposed through BPF skeleton */
+ if (btf_var(var)->linkage == BTF_VAR_STATIC)
+ continue;
+
if (off > need_off) {
p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
sec_name, i, need_off, off);
@@ -268,6 +275,327 @@ static void codegen(const char *template, ...)
free(s);
}
+static void print_hex(const char *data, int data_sz)
+{
+ int i, len;
+
+ for (i = 0, len = 0; i < data_sz; i++) {
+ int w = data[i] ? 4 : 2;
+
+ len += w;
+ if (len > 78) {
+ printf("\\\n");
+ len = w;
+ }
+ if (!data[i])
+ printf("\\0");
+ else
+ printf("\\x%02x", (unsigned char)data[i]);
+ }
+}
+
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t map_sz;
+
+ map_sz = (size_t)roundup(bpf_map__value_size(map), 8) * bpf_map__max_entries(map);
+ map_sz = roundup(map_sz, page_sz);
+ return map_sz;
+}
+
+static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
+{
+ struct bpf_program *prog;
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *tp_name;
+
+ codegen("\
+ \n\
+ \n\
+ static inline int \n\
+ %1$s__%2$s__attach(struct %1$s *skel) \n\
+ { \n\
+ int prog_fd = skel->progs.%2$s.prog_fd; \n\
+ ", obj_name, bpf_program__name(prog));
+
+ switch (bpf_program__get_type(prog)) {
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ tp_name = strchr(bpf_program__section_name(prog), '/') + 1;
+ printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
+ break;
+ case BPF_PROG_TYPE_TRACING:
+ printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n");
+ break;
+ default:
+ printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n");
+ break;
+ }
+ codegen("\
+ \n\
+ \n\
+ if (fd > 0) \n\
+ skel->links.%1$s_fd = fd; \n\
+ return fd; \n\
+ } \n\
+ ", bpf_program__name(prog));
+ }
+
+ codegen("\
+ \n\
+ \n\
+ static inline int \n\
+ %1$s__attach(struct %1$s *skel) \n\
+ { \n\
+ int ret = 0; \n\
+ \n\
+ ", obj_name);
+
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ ret = ret < 0 ? ret : %1$s__%2$s__attach(skel); \n\
+ ", obj_name, bpf_program__name(prog));
+ }
+
+ codegen("\
+ \n\
+ return ret < 0 ? ret : 0; \n\
+ } \n\
+ \n\
+ static inline void \n\
+ %1$s__detach(struct %1$s *skel) \n\
+ { \n\
+ ", obj_name);
+
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ skel_closenz(skel->links.%1$s_fd); \n\
+ ", bpf_program__name(prog));
+ }
+
+ codegen("\
+ \n\
+ } \n\
+ ");
+}
+
+static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
+{
+ struct bpf_program *prog;
+ struct bpf_map *map;
+
+ codegen("\
+ \n\
+ static void \n\
+ %1$s__destroy(struct %1$s *skel) \n\
+ { \n\
+ if (!skel) \n\
+ return; \n\
+ %1$s__detach(skel); \n\
+ ",
+ obj_name);
+
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ skel_closenz(skel->progs.%1$s.prog_fd); \n\
+ ", bpf_program__name(prog));
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ const char * ident;
+
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+ if (bpf_map__is_internal(map) &&
+ (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ printf("\tmunmap(skel->%1$s, %2$zd);\n",
+ ident, bpf_map_mmap_sz(map));
+ codegen("\
+ \n\
+ skel_closenz(skel->maps.%1$s.map_fd); \n\
+ ", ident);
+ }
+ codegen("\
+ \n\
+ free(skel); \n\
+ } \n\
+ ",
+ obj_name);
+}
+
+static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
+{
+ struct bpf_object_load_attr load_attr = {};
+ DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+ struct bpf_map *map;
+ int err = 0;
+
+ err = bpf_object__gen_loader(obj, &opts);
+ if (err)
+ return err;
+
+ load_attr.obj = obj;
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ load_attr.log_level = 1 + 2 + 4;
+
+ err = bpf_object__load_xattr(&load_attr);
+ if (err) {
+ p_err("failed to load object file");
+ goto out;
+ }
+ /* If there was no error during load then gen_loader_opts
+ * are populated with the loader program.
+ */
+
+ /* finish generating 'struct skel' */
+ codegen("\
+ \n\
+ }; \n\
+ ", obj_name);
+
+
+ codegen_attach_detach(obj, obj_name);
+
+ codegen_destroy(obj, obj_name);
+
+ codegen("\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open(void) \n\
+ { \n\
+ struct %1$s *skel; \n\
+ \n\
+ skel = calloc(sizeof(*skel), 1); \n\
+ if (!skel) \n\
+ goto cleanup; \n\
+ skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\
+ ",
+ obj_name, opts.data_sz);
+ bpf_object__for_each_map(map, obj) {
+ const char *ident;
+ const void *mmap_data = NULL;
+ size_t mmap_size = 0;
+
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+
+ if (!bpf_map__is_internal(map) ||
+ !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ continue;
+
+ codegen("\
+ \n\
+ skel->%1$s = \n\
+ mmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n\
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); \n\
+ if (skel->%1$s == (void *) -1) \n\
+ goto cleanup; \n\
+ memcpy(skel->%1$s, (void *)\"\\ \n\
+ ", ident, bpf_map_mmap_sz(map));
+ mmap_data = bpf_map__initial_value(map, &mmap_size);
+ print_hex(mmap_data, mmap_size);
+ printf("\", %2$zd);\n"
+ "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n",
+ ident, mmap_size);
+ }
+ codegen("\
+ \n\
+ return skel; \n\
+ cleanup: \n\
+ %1$s__destroy(skel); \n\
+ return NULL; \n\
+ } \n\
+ \n\
+ static inline int \n\
+ %1$s__load(struct %1$s *skel) \n\
+ { \n\
+ struct bpf_load_and_run_opts opts = {}; \n\
+ int err; \n\
+ \n\
+ opts.ctx = (struct bpf_loader_ctx *)skel; \n\
+ opts.data_sz = %2$d; \n\
+ opts.data = (void *)\"\\ \n\
+ ",
+ obj_name, opts.data_sz);
+ print_hex(opts.data, opts.data_sz);
+ codegen("\
+ \n\
+ \"; \n\
+ ");
+
+ codegen("\
+ \n\
+ opts.insns_sz = %d; \n\
+ opts.insns = (void *)\"\\ \n\
+ ",
+ opts.insns_sz);
+ print_hex(opts.insns, opts.insns_sz);
+ codegen("\
+ \n\
+ \"; \n\
+ err = bpf_load_and_run(&opts); \n\
+ if (err < 0) \n\
+ return err; \n\
+ ", obj_name);
+ bpf_object__for_each_map(map, obj) {
+ const char *ident, *mmap_flags;
+
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+
+ if (!bpf_map__is_internal(map) ||
+ !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+ continue;
+ if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+ mmap_flags = "PROT_READ";
+ else
+ mmap_flags = "PROT_READ | PROT_WRITE";
+
+ printf("\tskel->%1$s =\n"
+ "\t\tmmap(skel->%1$s, %2$zd, %3$s, MAP_SHARED | MAP_FIXED,\n"
+ "\t\t\tskel->maps.%1$s.map_fd, 0);\n",
+ ident, bpf_map_mmap_sz(map), mmap_flags);
+ }
+ codegen("\
+ \n\
+ return 0; \n\
+ } \n\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open_and_load(void) \n\
+ { \n\
+ struct %1$s *skel; \n\
+ \n\
+ skel = %1$s__open(); \n\
+ if (!skel) \n\
+ return NULL; \n\
+ if (%1$s__load(skel)) { \n\
+ %1$s__destroy(skel); \n\
+ return NULL; \n\
+ } \n\
+ return skel; \n\
+ } \n\
+ ", obj_name);
+
+ codegen("\
+ \n\
+ \n\
+ #endif /* %s */ \n\
+ ",
+ header_guard);
+ err = 0;
+out:
+ return err;
+}
+
static int do_skeleton(int argc, char **argv)
{
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
@@ -277,7 +605,7 @@ static int do_skeleton(int argc, char **argv)
struct bpf_object *obj = NULL;
const char *file, *ident;
struct bpf_program *prog;
- int fd, len, err = -1;
+ int fd, err = -1;
struct bpf_map *map;
struct btf *btf;
struct stat st;
@@ -359,7 +687,25 @@ static int do_skeleton(int argc, char **argv)
}
get_header_guard(header_guard, obj_name);
- codegen("\
+ if (use_loader) {
+ codegen("\
+ \n\
+ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\
+ /* THIS FILE IS AUTOGENERATED! */ \n\
+ #ifndef %2$s \n\
+ #define %2$s \n\
+ \n\
+ #include <stdlib.h> \n\
+ #include <bpf/bpf.h> \n\
+ #include <bpf/skel_internal.h> \n\
+ \n\
+ struct %1$s { \n\
+ struct bpf_loader_ctx ctx; \n\
+ ",
+ obj_name, header_guard
+ );
+ } else {
+ codegen("\
\n\
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\
\n\
@@ -367,6 +713,7 @@ static int do_skeleton(int argc, char **argv)
#ifndef %2$s \n\
#define %2$s \n\
\n\
+ #include <errno.h> \n\
#include <stdlib.h> \n\
#include <bpf/libbpf.h> \n\
\n\
@@ -375,7 +722,8 @@ static int do_skeleton(int argc, char **argv)
struct bpf_object *obj; \n\
",
obj_name, header_guard
- );
+ );
+ }
if (map_cnt) {
printf("\tstruct {\n");
@@ -383,7 +731,10 @@ static int do_skeleton(int argc, char **argv)
ident = get_map_ident(map);
if (!ident)
continue;
- printf("\t\tstruct bpf_map *%s;\n", ident);
+ if (use_loader)
+ printf("\t\tstruct bpf_map_desc %s;\n", ident);
+ else
+ printf("\t\tstruct bpf_map *%s;\n", ident);
}
printf("\t} maps;\n");
}
@@ -391,14 +742,22 @@ static int do_skeleton(int argc, char **argv)
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
- printf("\t\tstruct bpf_program *%s;\n",
- bpf_program__name(prog));
+ if (use_loader)
+ printf("\t\tstruct bpf_prog_desc %s;\n",
+ bpf_program__name(prog));
+ else
+ printf("\t\tstruct bpf_program *%s;\n",
+ bpf_program__name(prog));
}
printf("\t} progs;\n");
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
- printf("\t\tstruct bpf_link *%s;\n",
- bpf_program__name(prog));
+ if (use_loader)
+ printf("\t\tint %s_fd;\n",
+ bpf_program__name(prog));
+ else
+ printf("\t\tstruct bpf_link *%s;\n",
+ bpf_program__name(prog));
}
printf("\t} links;\n");
}
@@ -409,6 +768,10 @@ static int do_skeleton(int argc, char **argv)
if (err)
goto out;
}
+ if (use_loader) {
+ err = gen_trace(obj, obj_name, header_guard);
+ goto out;
+ }
codegen("\
\n\
@@ -431,18 +794,23 @@ static int do_skeleton(int argc, char **argv)
%1$s__open_opts(const struct bpf_object_open_opts *opts) \n\
{ \n\
struct %1$s *obj; \n\
+ int err; \n\
\n\
obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\
- if (!obj) \n\
+ if (!obj) { \n\
+ errno = ENOMEM; \n\
return NULL; \n\
- if (%1$s__create_skeleton(obj)) \n\
- goto err; \n\
- if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
- goto err; \n\
+ } \n\
+ \n\
+ err = %1$s__create_skeleton(obj); \n\
+ err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\
+ if (err) \n\
+ goto err_out; \n\
\n\
return obj; \n\
- err: \n\
+ err_out: \n\
%1$s__destroy(obj); \n\
+ errno = -err; \n\
return NULL; \n\
} \n\
\n\
@@ -462,12 +830,15 @@ static int do_skeleton(int argc, char **argv)
%1$s__open_and_load(void) \n\
{ \n\
struct %1$s *obj; \n\
+ int err; \n\
\n\
obj = %1$s__open(); \n\
if (!obj) \n\
return NULL; \n\
- if (%1$s__load(obj)) { \n\
+ err = %1$s__load(obj); \n\
+ if (err) { \n\
%1$s__destroy(obj); \n\
+ errno = -err; \n\
return NULL; \n\
} \n\
return obj; \n\
@@ -498,7 +869,7 @@ static int do_skeleton(int argc, char **argv)
\n\
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
- return -1; \n\
+ goto err; \n\
obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
@@ -578,19 +949,7 @@ static int do_skeleton(int argc, char **argv)
file_sz);
/* embed contents of BPF object file */
- for (i = 0, len = 0; i < file_sz; i++) {
- int w = obj_data[i] ? 4 : 2;
-
- len += w;
- if (len > 78) {
- printf("\\\n");
- len = w;
- }
- if (!obj_data[i])
- printf("\\0");
- else
- printf("\\x%02x", (unsigned char)obj_data[i]);
- }
+ print_hex(obj_data, file_sz);
codegen("\
\n\
@@ -599,7 +958,7 @@ static int do_skeleton(int argc, char **argv)
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
- return -1; \n\
+ return -ENOMEM; \n\
} \n\
\n\
#endif /* %s */ \n\
@@ -636,7 +995,7 @@ static int do_object(int argc, char **argv)
while (argc) {
file = GET_ARG();
- err = bpf_linker__add_file(linker, file);
+ err = bpf_linker__add_file(linker, file, NULL);
if (err) {
p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
goto out;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d9afb730136a..3ddfd4843738 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -29,6 +29,7 @@ bool show_pinned;
bool block_mount;
bool verifier_logs;
bool relaxed_maps;
+bool use_loader;
struct btf *base_btf;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
@@ -340,8 +341,10 @@ static int do_batch(int argc, char **argv)
n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
if (!n_argc)
continue;
- if (n_argc < 0)
+ if (n_argc < 0) {
+ err = n_argc;
goto err_close;
+ }
if (json_output) {
jsonw_start_object(json_wtr);
@@ -392,6 +395,7 @@ int main(int argc, char **argv)
{ "mapcompat", no_argument, NULL, 'm' },
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
+ { "use-loader", no_argument, NULL, 'L' },
{ "base-btf", required_argument, NULL, 'B' },
{ 0 }
};
@@ -409,7 +413,7 @@ int main(int argc, char **argv)
hash_init(link_table.table);
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfmndB:",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndB:",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@@ -452,6 +456,9 @@ int main(int argc, char **argv)
return -1;
}
break;
+ case 'L':
+ use_loader = true;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 76e91641262b..c1cf29798b99 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -90,6 +90,7 @@ extern bool show_pids;
extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
+extern bool use_loader;
extern struct btf *base_btf;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3f067d2d7584..cc48726740ad 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -16,6 +16,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/syscall.h>
+#include <dirent.h>
#include <linux/err.h>
#include <linux/perf_event.h>
@@ -24,6 +25,8 @@
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#include <bpf/bpf_gen_internal.h>
+#include <bpf/skel_internal.h>
#include "cfg.h"
#include "main.h"
@@ -1499,7 +1502,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
set_max_rlimit();
obj = bpf_object__open_file(file, &open_opts);
- if (IS_ERR_OR_NULL(obj)) {
+ if (libbpf_get_error(obj)) {
p_err("failed to open object file");
goto err_free_reuse_maps;
}
@@ -1645,8 +1648,110 @@ err_free_reuse_maps:
return -1;
}
+static int count_open_fds(void)
+{
+ DIR *dp = opendir("/proc/self/fd");
+ struct dirent *de;
+ int cnt = -3;
+
+ if (!dp)
+ return -1;
+
+ while ((de = readdir(dp)))
+ cnt++;
+
+ closedir(dp);
+ return cnt;
+}
+
+static int try_loader(struct gen_loader_opts *gen)
+{
+ struct bpf_load_and_run_opts opts = {};
+ struct bpf_loader_ctx *ctx;
+ int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
+ sizeof(struct bpf_prog_desc));
+ int log_buf_sz = (1u << 24) - 1;
+ int err, fds_before, fd_delta;
+ char *log_buf;
+
+ ctx = alloca(ctx_sz);
+ memset(ctx, 0, ctx_sz);
+ ctx->sz = ctx_sz;
+ ctx->log_level = 1;
+ ctx->log_size = log_buf_sz;
+ log_buf = malloc(log_buf_sz);
+ if (!log_buf)
+ return -ENOMEM;
+ ctx->log_buf = (long) log_buf;
+ opts.ctx = ctx;
+ opts.data = gen->data;
+ opts.data_sz = gen->data_sz;
+ opts.insns = gen->insns;
+ opts.insns_sz = gen->insns_sz;
+ fds_before = count_open_fds();
+ err = bpf_load_and_run(&opts);
+ fd_delta = count_open_fds() - fds_before;
+ if (err < 0) {
+ fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf);
+ if (fd_delta)
+ fprintf(stderr, "loader prog leaked %d FDs\n",
+ fd_delta);
+ }
+ free(log_buf);
+ return err;
+}
+
+static int do_loader(int argc, char **argv)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+ DECLARE_LIBBPF_OPTS(gen_loader_opts, gen);
+ struct bpf_object_load_attr load_attr = {};
+ struct bpf_object *obj;
+ const char *file;
+ int err = 0;
+
+ if (!REQ_ARGS(1))
+ return -1;
+ file = GET_ARG();
+
+ obj = bpf_object__open_file(file, &open_opts);
+ if (libbpf_get_error(obj)) {
+ p_err("failed to open object file");
+ goto err_close_obj;
+ }
+
+ err = bpf_object__gen_loader(obj, &gen);
+ if (err)
+ goto err_close_obj;
+
+ load_attr.obj = obj;
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ load_attr.log_level = 1 + 2 + 4;
+
+ err = bpf_object__load_xattr(&load_attr);
+ if (err) {
+ p_err("failed to load object file");
+ goto err_close_obj;
+ }
+
+ if (verifier_logs) {
+ struct dump_data dd = {};
+
+ kernel_syms_load(&dd);
+ dump_xlated_plain(&dd, (void *)gen.insns, gen.insns_sz, false, false);
+ kernel_syms_destroy(&dd);
+ }
+ err = try_loader(&gen);
+err_close_obj:
+ bpf_object__close(obj);
+ return err;
+}
+
static int do_load(int argc, char **argv)
{
+ if (use_loader)
+ return do_loader(argc, argv);
return load_with_options(argc, argv, true);
}
@@ -2138,7 +2243,7 @@ static int do_help(int argc, char **argv)
" cgroup/getpeername4 | cgroup/getpeername6 |\n"
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
- " cgroup/getsockopt | cgroup/setsockopt |\n"
+ " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 6fc3e6f7f40c..f1f32e21d5cd 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+ else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "map[idx:%u]+%u", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 7550fd9c3188..3ad9301b0f00 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -655,6 +655,9 @@ static int symbols_patch(struct object *obj)
if (sets_patch(obj))
return -1;
+ /* Set type to ensure endian translation occurs. */
+ obj->efile.idlist->d_type = ELF_T_WORD;
+
elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
err = elf_update(obj->efile.elf, ELF_C_WRITE);
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index cd72016c3cfa..715092fc6a23 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -51,39 +51,39 @@ subdir-obj-y :=
build-file := $(dir)/Build
-include $(build-file)
-quiet_cmd_flex = FLEX $@
-quiet_cmd_bison = BISON $@
+quiet_cmd_flex = FLEX $@
+quiet_cmd_bison = BISON $@
# Create directory unless it exists
-quiet_cmd_mkdir = MKDIR $(dir $@)
+quiet_cmd_mkdir = MKDIR $(dir $@)
cmd_mkdir = mkdir -p $(dir $@)
rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir))
# Compile command
-quiet_cmd_cc_o_c = CC $@
+quiet_cmd_cc_o_c = CC $@
cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
-quiet_cmd_host_cc_o_c = HOSTCC $@
+quiet_cmd_host_cc_o_c = HOSTCC $@
cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
-quiet_cmd_cxx_o_c = CXX $@
+quiet_cmd_cxx_o_c = CXX $@
cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
-quiet_cmd_cpp_i_c = CPP $@
+quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
-quiet_cmd_cc_s_c = AS $@
+quiet_cmd_cc_s_c = AS $@
cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
-quiet_cmd_gen = GEN $@
+quiet_cmd_gen = GEN $@
# Link agregate command
# If there's nothing to link, create empty $@ object.
-quiet_cmd_ld_multi = LD $@
+quiet_cmd_ld_multi = LD $@
cmd_ld_multi = $(if $(strip $(obj-y)),\
$(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
-quiet_cmd_host_ld_multi = HOSTLD $@
+quiet_cmd_host_ld_multi = HOSTLD $@
cmd_host_ld_multi = $(if $(strip $(obj-y)),\
$(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 719f18b1edf0..f1af27ce9f20 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -196,7 +196,7 @@ else
fi
echo "For a more detailed explanation of the various taint flags see"
-echo " Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel sources"
+echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
echo "Raw taint value as int/string: $taint/'$out'"
#EOF#
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7f475d59a097..87d112650dfb 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -22,7 +22,7 @@
#include <linux/build_bug.h>
#define GENMASK_INPUT_CHECK(h, l) \
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
- __builtin_constant_p((l) > (h)), (l) > (h), 0)))
+ __is_constexpr((l) > (h)), (l) > (h), 0)))
#else
/*
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h
index 81b8aae5a855..435ddd72d2c4 100644
--- a/tools/include/linux/const.h
+++ b/tools/include/linux/const.h
@@ -3,4 +3,12 @@
#include <vdso/const.h>
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
#endif /* _LINUX_CONST_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b..d2a942086fcb 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ec6d85a81744..bf9252c7381e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
* Look up an element with the given *key* in the map referred to
* by the file descriptor *fd*, and if found, delete the element.
*
+ * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ * types, the *flags* argument needs to be set to 0, but for other
+ * map types, it may be specified as:
+ *
+ * **BPF_F_LOCK**
+ * Look up and delete the value of a spin-locked map
+ * without returning the lock. This must be specified if
+ * the elements contain a spinlock.
+ *
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
* implement this command as a "pop" operation, deleting the top
* element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
* This command is only valid for the following map types:
* * **BPF_MAP_TYPE_QUEUE**
* * **BPF_MAP_TYPE_STACK**
+ * * **BPF_MAP_TYPE_HASH**
+ * * **BPF_MAP_TYPE_PERCPU_HASH**
+ * * **BPF_MAP_TYPE_LRU_HASH**
+ * * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
@@ -837,6 +850,7 @@ enum bpf_cmd {
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
BPF_PROG_TEST_RUN,
+ BPF_PROG_RUN = BPF_PROG_TEST_RUN,
BPF_PROG_GET_NEXT_ID,
BPF_MAP_GET_NEXT_ID,
BPF_PROG_GET_FD_BY_ID,
@@ -937,6 +951,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
};
enum bpf_attach_type {
@@ -979,6 +994,8 @@ enum bpf_attach_type {
BPF_SK_LOOKUP,
BPF_XDP,
BPF_SK_SKB_VERDICT,
+ BPF_SK_REUSEPORT_SELECT,
+ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
__MAX_BPF_ATTACH_TYPE
};
@@ -1097,8 +1114,8 @@ enum bpf_link_type {
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD
- * insn[0].imm: map fd
+ * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX]
+ * insn[0].imm: map fd or fd_idx
* insn[1].imm: 0
* insn[0].off: 0
* insn[1].off: 0
@@ -1106,15 +1123,19 @@ enum bpf_link_type {
* verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
-/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
- * insn[0].imm: map fd
+#define BPF_PSEUDO_MAP_IDX 5
+
+/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE
+ * insn[0].imm: map fd or fd_idx
* insn[1].imm: offset into value
* insn[0].off: 0
* insn[1].off: 0
* ldimm64 rewrite: address of map[0]+offset
* verifier type: PTR_TO_MAP_VALUE
*/
-#define BPF_PSEUDO_MAP_VALUE 2
+#define BPF_PSEUDO_MAP_VALUE 2
+#define BPF_PSEUDO_MAP_IDX_VALUE 6
+
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
* insn[0].imm: kernel btd id of VAR
* insn[1].imm: 0
@@ -1314,6 +1335,8 @@ union bpf_attr {
/* or valid module BTF object fd or 0 to attach to vmlinux */
__u32 attach_btf_obj_fd;
};
+ __u32 :32; /* pad */
+ __aligned_u64 fd_array; /* array of FDs */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2534,8 +2557,12 @@ union bpf_attr {
* The lower two bits of *flags* are used as the return code if
* the map lookup fails. This is so that the return value can be
* one of the XDP program return codes up to **XDP_TX**, as chosen
- * by the caller. Any higher bits in the *flags* argument must be
- * unset.
+ * by the caller. The higher bits of *flags* can be set to
+ * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ * With BPF_F_BROADCAST the packet will be broadcasted to all the
+ * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ * interface will be excluded when do broadcasting.
*
* See also **bpf_redirect**\ (), which only supports redirecting
* to an ifindex, but doesn't require a map to do so.
@@ -4735,6 +4762,24 @@ union bpf_attr {
* be zero-terminated except when **str_size** is 0.
*
* Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * Description
+ * Execute bpf syscall with given arguments.
+ * Return
+ * A syscall result.
+ *
+ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
+ * Description
+ * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
+ * Return
+ * Returns btf_id and btf_obj_fd in lower and upper 32 bits.
+ *
+ * long bpf_sys_close(u32 fd)
+ * Description
+ * Execute close syscall for given FD.
+ * Return
+ * A syscall result.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4903,6 +4948,9 @@ union bpf_attr {
FN(check_mtu), \
FN(for_each_map_elem), \
FN(snprintf), \
+ FN(sys_bpf), \
+ FN(btf_find_by_name_kind), \
+ FN(sys_close), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5080,6 +5128,12 @@ enum {
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
};
+/* Flags for bpf_redirect_map helper */
+enum {
+ BPF_F_BROADCAST = (1ULL << 3),
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+};
+
#define __bpf_md_ptr(type, name) \
union { \
type name; \
@@ -5364,6 +5418,20 @@ struct sk_reuseport_md {
__u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
__u32 bind_inany; /* Is sock bound to an INANY address? */
__u32 hash; /* A hash of the packet 4 tuples */
+ /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+ * new incoming connection request (e.g. selecting a listen sk for
+ * the received SYN in the TCP case). reuse->sk is one of the sk
+ * in the reuseport group. The bpf prog can use reuse->sk to learn
+ * the local listening ip/port without looking into the skb.
+ *
+ * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+ * reuse->migrating_sk is the socket that needs to be migrated
+ * to another listening socket. migrating_sk could be a fullsock
+ * sk that is fully established or a reqsk that is in-the-middle
+ * of 3-way handshake.
+ */
+ __bpf_md_ptr(struct bpf_sock *, sk);
+ __bpf_md_ptr(struct bpf_sock *, migrating_sk);
};
#define BPF_TAG_SIZE 8
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index f44eb0a04afd..4c32e97dcdf0 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
/*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
*/
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 7d6687618d80..d1b327036ae4 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -289,6 +289,9 @@ struct sockaddr_in {
/* Address indicating an error return. */
#define INADDR_NONE ((unsigned long int) 0xffffffff)
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define INADDR_DUMMY ((unsigned long int) 0xc0000008)
+
/* Network number for local host loopback. */
#define IN_LOOPBACKNET 127
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 3fd9a7e9d90c..79d9c44d1ad7 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -8,6 +8,7 @@
* Note: you must update KVM_API_VERSION if you change this interface.
*/
+#include <linux/const.h>
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/ioctl.h>
@@ -1879,8 +1880,8 @@ struct kvm_hyperv_eventfd {
* conversion after harvesting an entry. Also, it must not skip any
* dirty bits, so that dirty bits are always harvested in sequence.
*/
-#define KVM_DIRTY_GFN_F_DIRTY BIT(0)
-#define KVM_DIRTY_GFN_F_RESET BIT(1)
+#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0)
+#define KVM_DIRTY_GFN_F_RESET _BITUL(1)
#define KVM_DIRTY_GFN_F_MASK 0x3
/*
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bf8143505c49..f92880a15645 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -464,7 +464,7 @@ struct perf_event_attr {
/*
* User provided data if sigtrap=1, passed back to user via
- * siginfo_t::si_perf, e.g. to permit user to identify the event.
+ * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
*/
__u64 sig_data;
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 18a9f59dc067..967d9c55323d 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -259,4 +259,12 @@ struct prctl_mm_map {
#define PR_PAC_SET_ENABLED_KEYS 60
#define PR_PAC_GET_ENABLED_KEYS 61
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 9b057cc7650a..430f6874fa41 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e43e1896cb4b..ec14aa725bb0 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -223,18 +223,14 @@ install_lib: all_cmd
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
+INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+ bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h \
+ bpf_endian.h bpf_core_read.h skel_internal.h
+
install_headers: $(BPF_HELPER_DEFS)
- $(call QUIET_INSTALL, headers) \
- $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,btf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
- $(call do_install,xsk.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
- $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
- $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+ $(call QUIET_INSTALL, headers) \
+ $(foreach hdr,$(INSTALL_HEADERS), \
+ $(call do_install,$(hdr),$(prefix)/include/bpf,644);)
install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
deleted file mode 100644
index 8928f7787f2d..000000000000
--- a/tools/lib/bpf/README.rst
+++ /dev/null
@@ -1,168 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-libbpf API naming convention
-============================
-
-libbpf API provides access to a few logically separated groups of
-functions and types. Every group has its own naming convention
-described here. It's recommended to follow these conventions whenever a
-new function or type is added to keep libbpf API clean and consistent.
-
-All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``perf_buffer_``.
-
-System call wrappers
---------------------
-
-System call wrappers are simple wrappers for commands supported by
-sys_bpf system call. These wrappers should go to ``bpf.h`` header file
-and map one-on-one to corresponding commands.
-
-For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
-command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
-
-Objects
--------
-
-Another class of types and functions provided by libbpf API is "objects"
-and functions to work with them. Objects are high-level abstractions
-such as BPF program or BPF map. They're represented by corresponding
-structures such as ``struct bpf_object``, ``struct bpf_program``,
-``struct bpf_map``, etc.
-
-Structures are forward declared and access to their fields should be
-provided via corresponding getters and setters rather than directly.
-
-These objects are associated with corresponding parts of ELF object that
-contains compiled BPF programs.
-
-For example ``struct bpf_object`` represents ELF object itself created
-from an ELF file or from a buffer, ``struct bpf_program`` represents a
-program in ELF object and ``struct bpf_map`` is a map.
-
-Functions that work with an object have names built from object name,
-double underscore and part that describes function purpose.
-
-For example ``bpf_object__open`` consists of the name of corresponding
-object, ``bpf_object``, double underscore and ``open`` that defines the
-purpose of the function to open ELF file and create ``bpf_object`` from
-it.
-
-Another example: ``bpf_program__load`` is named for corresponding
-object, ``bpf_program``, that is separated from other part of the name
-by double underscore.
-
-All objects and corresponding functions other than BTF related should go
-to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
-
-Auxiliary functions
--------------------
-
-Auxiliary functions and types that don't fit well in any of categories
-described above should have ``libbpf_`` prefix, e.g.
-``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
-
-AF_XDP functions
--------------------
-
-AF_XDP functions should have an ``xsk_`` prefix, e.g.
-``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
-of both low-level ring access functions and high-level configuration
-functions. These can be mixed and matched. Note that these functions
-are not reentrant for performance reasons.
-
-Please take a look at Documentation/networking/af_xdp.rst in the Linux
-kernel source tree on how to use XDP sockets and for some common
-mistakes in case you do not get any traffic up to user space.
-
-libbpf ABI
-==========
-
-libbpf can be both linked statically or used as DSO. To avoid possible
-conflicts with other libraries an application is linked with, all
-non-static libbpf symbols should have one of the prefixes mentioned in
-API documentation above. See API naming convention to choose the right
-name for a new symbol.
-
-Symbol visibility
------------------
-
-libbpf follow the model when all global symbols have visibility "hidden"
-by default and to make a symbol visible it has to be explicitly
-attributed with ``LIBBPF_API`` macro. For example:
-
-.. code-block:: c
-
- LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
-
-This prevents from accidentally exporting a symbol, that is not supposed
-to be a part of ABI what, in turn, improves both libbpf developer- and
-user-experiences.
-
-ABI versionning
----------------
-
-To make future ABI extensions possible libbpf ABI is versioned.
-Versioning is implemented by ``libbpf.map`` version script that is
-passed to linker.
-
-Version name is ``LIBBPF_`` prefix + three-component numeric version,
-starting from ``0.0.1``.
-
-Every time ABI is being changed, e.g. because a new symbol is added or
-semantic of existing symbol is changed, ABI version should be bumped.
-This bump in ABI version is at most once per kernel development cycle.
-
-For example, if current state of ``libbpf.map`` is:
-
-.. code-block::
- LIBBPF_0.0.1 {
- global:
- bpf_func_a;
- bpf_func_b;
- local:
- \*;
- };
-
-, and a new symbol ``bpf_func_c`` is being introduced, then
-``libbpf.map`` should be changed like this:
-
-.. code-block::
- LIBBPF_0.0.1 {
- global:
- bpf_func_a;
- bpf_func_b;
- local:
- \*;
- };
- LIBBPF_0.0.2 {
- global:
- bpf_func_c;
- } LIBBPF_0.0.1;
-
-, where new version ``LIBBPF_0.0.2`` depends on the previous
-``LIBBPF_0.0.1``.
-
-Format of version script and ways to handle ABI changes, including
-incompatible ones, described in details in [1].
-
-Stand-alone build
-=================
-
-Under https://github.com/libbpf/libbpf there is a (semi-)automated
-mirror of the mainline's version of libbpf for a stand-alone build.
-
-However, all changes to libbpf's code base must be upstreamed through
-the mainline kernel tree.
-
-License
-=======
-
-libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
-
-Links
-=====
-
-[1] https://www.akkadia.org/drepper/dsohowto.pdf
- (Chapter 3. Maintaining APIs and ABIs).
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index bba48ff4c5c0..86dcac44f32f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -80,6 +80,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
union bpf_attr attr;
+ int fd;
memset(&attr, '\0', sizeof(attr));
@@ -102,7 +103,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
else
attr.inner_map_fd = create_attr->inner_map_fd;
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
@@ -160,6 +162,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
__u32 map_flags, int node)
{
union bpf_attr attr;
+ int fd;
memset(&attr, '\0', sizeof(attr));
@@ -178,7 +181,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
attr.numa_node = node;
}
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
@@ -222,10 +226,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
int fd;
if (!load_attr->log_buf != !load_attr->log_buf_sz)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
@@ -281,8 +285,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
load_attr->func_info_cnt,
load_attr->func_info_rec_size,
attr.func_info_rec_size);
- if (!finfo)
+ if (!finfo) {
+ errno = E2BIG;
goto done;
+ }
attr.func_info = ptr_to_u64(finfo);
attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -293,8 +299,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
load_attr->line_info_cnt,
load_attr->line_info_rec_size,
attr.line_info_rec_size);
- if (!linfo)
+ if (!linfo) {
+ errno = E2BIG;
goto done;
+ }
attr.line_info = ptr_to_u64(linfo);
attr.line_info_rec_size = load_attr->line_info_rec_size;
@@ -318,9 +326,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
fd = sys_bpf_prog_load(&attr, sizeof(attr));
done:
+ /* free() doesn't affect errno, so we don't need to restore it */
free(finfo);
free(linfo);
- return fd;
+ return libbpf_err_errno(fd);
}
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
@@ -329,7 +338,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
struct bpf_prog_load_params p = {};
if (!load_attr || !log_buf != !log_buf_sz)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
p.prog_type = load_attr->prog_type;
p.expected_attach_type = load_attr->expected_attach_type;
@@ -391,6 +400,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
int log_level)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.prog_type = type;
@@ -404,13 +414,15 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
attr.kern_version = kern_version;
attr.prog_flags = prog_flags;
- return sys_bpf_prog_load(&attr, sizeof(attr));
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
@@ -418,24 +430,28 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
attr.value = ptr_to_u64(value);
attr.flags = flags;
- return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_lookup_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
- return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
@@ -443,17 +459,33 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
attr.value = ptr_to_u64(value);
attr.flags = flags;
- return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
+}
+
+int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+ union bpf_attr attr;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
+ attr.flags = flags;
return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
}
@@ -461,34 +493,40 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
int bpf_map_delete_elem(int fd, const void *key)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
- return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_get_next_key(int fd, const void *key, void *next_key)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.next_key = ptr_to_u64(next_key);
- return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_map_freeze(int fd)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
- return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
@@ -500,7 +538,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
int ret;
if (!OPTS_VALID(opts, bpf_map_batch_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.batch.map_fd = fd;
@@ -515,7 +553,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
ret = sys_bpf(cmd, &attr, sizeof(attr));
*count = attr.batch.count;
- return ret;
+ return libbpf_err_errno(ret);
}
int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
@@ -552,22 +590,26 @@ int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
attr.bpf_fd = fd;
- return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_obj_get(const char *pathname)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
- return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
@@ -585,9 +627,10 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
const struct bpf_prog_attach_opts *opts)
{
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
@@ -596,30 +639,35 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
attr.attach_flags = OPTS_GET(opts, flags, 0);
attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
- return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_type = type;
- return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
- return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_link_create(int prog_fd, int target_fd,
@@ -628,15 +676,16 @@ int bpf_link_create(int prog_fd, int target_fd,
{
__u32 target_btf_id, iter_info_len;
union bpf_attr attr;
+ int fd;
if (!OPTS_VALID(opts, bpf_link_create_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
iter_info_len = OPTS_GET(opts, iter_info_len, 0);
target_btf_id = OPTS_GET(opts, target_btf_id, 0);
if (iter_info_len && target_btf_id)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
@@ -652,26 +701,30 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.target_btf_id = target_btf_id;
}
- return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_link_detach(int link_fd)
{
union bpf_attr attr;
+ int ret;
memset(&attr, 0, sizeof(attr));
attr.link_detach.link_fd = link_fd;
- return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_link_update_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.link_update.link_fd = link_fd;
@@ -679,17 +732,20 @@ int bpf_link_update(int link_fd, int new_prog_fd,
attr.link_update.flags = OPTS_GET(opts, flags, 0);
attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
- return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
int bpf_iter_create(int link_fd)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.iter_create.link_fd = link_fd;
- return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
@@ -706,10 +762,12 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
attr.query.prog_ids = ptr_to_u64(prog_ids);
ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+
if (attach_flags)
*attach_flags = attr.query.attach_flags;
*prog_cnt = attr.query.prog_cnt;
- return ret;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
@@ -727,13 +785,15 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
attr.test.repeat = repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
if (size_out)
*size_out = attr.test.data_size_out;
if (retval)
*retval = attr.test.retval;
if (duration)
*duration = attr.test.duration;
- return ret;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
@@ -742,7 +802,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
int ret;
if (!test_attr->data_out && test_attr->data_size_out > 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.test.prog_fd = test_attr->prog_fd;
@@ -757,11 +817,13 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
attr.test.repeat = test_attr->repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
test_attr->data_size_out = attr.test.data_size_out;
test_attr->ctx_size_out = attr.test.ctx_size_out;
test_attr->retval = attr.test.retval;
test_attr->duration = attr.test.duration;
- return ret;
+
+ return libbpf_err_errno(ret);
}
int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
@@ -770,7 +832,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
int ret;
if (!OPTS_VALID(opts, bpf_test_run_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.test.prog_fd = prog_fd;
@@ -788,11 +850,13 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
OPTS_SET(opts, data_size_out, attr.test.data_size_out);
OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
OPTS_SET(opts, duration, attr.test.duration);
OPTS_SET(opts, retval, attr.test.retval);
- return ret;
+
+ return libbpf_err_errno(ret);
}
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
@@ -807,7 +871,7 @@ static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
if (!err)
*next_id = attr.next_id;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
@@ -833,41 +897,49 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
int bpf_prog_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.prog_id = id;
- return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_map_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.map_id = id;
- return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_btf_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.btf_id = id;
- return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_link_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.link_id = id;
- return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
@@ -881,21 +953,24 @@ int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
attr.info.info = ptr_to_u64(info);
err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+
if (!err)
*info_len = attr.info.info_len;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;
- return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
@@ -915,12 +990,13 @@ retry:
}
fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
- if (fd == -1 && !do_log && log_buf && log_buf_size) {
+
+ if (fd < 0 && !do_log && log_buf && log_buf_size) {
do_log = true;
goto retry;
}
- return fd;
+ return libbpf_err_errno(fd);
}
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
@@ -937,37 +1013,42 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
attr.task_fd_query.buf_len = *buf_len;
err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+
*buf_len = attr.task_fd_query.buf_len;
*prog_id = attr.task_fd_query.prog_id;
*fd_type = attr.task_fd_query.fd_type;
*probe_offset = attr.task_fd_query.probe_offset;
*probe_addr = attr.task_fd_query.probe_addr;
- return err;
+ return libbpf_err_errno(err);
}
int bpf_enable_stats(enum bpf_stats_type type)
{
union bpf_attr attr;
+ int fd;
memset(&attr, 0, sizeof(attr));
attr.enable_stats.type = type;
- return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+ fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+ return libbpf_err_errno(fd);
}
int bpf_prog_bind_map(int prog_fd, int map_fd,
const struct bpf_prog_bind_opts *opts)
{
union bpf_attr attr;
+ int ret;
if (!OPTS_VALID(opts, bpf_prog_bind_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memset(&attr, 0, sizeof(attr));
attr.prog_bind_map.prog_fd = prog_fd;
attr.prog_bind_map.map_fd = map_fd;
attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
- return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+ return libbpf_err_errno(ret);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 875dde20d56e..4f758f8f50cd 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -124,6 +124,8 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
__u64 flags);
LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
+ void *value, __u64 flags);
LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
LIBBPF_API int bpf_map_freeze(int fd);
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
new file mode 100644
index 000000000000..615400391e57
--- /dev/null
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __BPF_GEN_INTERNAL_H
+#define __BPF_GEN_INTERNAL_H
+
+struct ksym_relo_desc {
+ const char *name;
+ int kind;
+ int insn_idx;
+};
+
+struct bpf_gen {
+ struct gen_loader_opts *opts;
+ void *data_start;
+ void *data_cur;
+ void *insn_start;
+ void *insn_cur;
+ ssize_t cleanup_label;
+ __u32 nr_progs;
+ __u32 nr_maps;
+ int log_level;
+ int error;
+ struct ksym_relo_desc *relos;
+ int relo_cnt;
+ char attach_target[128];
+ int attach_kind;
+};
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level);
+int bpf_gen__finish(struct bpf_gen *gen);
+void bpf_gen__free(struct bpf_gen *gen);
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
+void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx);
+struct bpf_prog_load_params;
+void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx);
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx);
+
+#endif
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 9720dc0b4605..b9987c3efa3c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -158,4 +158,70 @@ enum libbpf_tristate {
#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
+#ifndef ___bpf_concat
+#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_snprintf(out, out_size, ___fmt, \
+ ___param, sizeof(___param)); \
+})
+
#endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 3ed1a27b5f7c..5c503096ef43 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
nr_linfo = info->nr_line_info;
if (!nr_linfo)
- return NULL;
+ return errno = EINVAL, NULL;
/*
* The min size that bpf_prog_linfo has to access for
@@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
*/
if (info->line_info_rec_size <
offsetof(struct bpf_line_info, file_name_off))
- return NULL;
+ return errno = EINVAL, NULL;
prog_linfo = calloc(1, sizeof(*prog_linfo));
if (!prog_linfo)
- return NULL;
+ return errno = ENOMEM, NULL;
/* Copy xlated line_info */
prog_linfo->nr_linfo = nr_linfo;
@@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
err_free:
bpf_prog_linfo__free(prog_linfo);
- return NULL;
+ return errno = EINVAL, NULL;
}
const struct bpf_line_info *
@@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
const __u64 *jited_linfo;
if (func_idx >= prog_linfo->nr_jited_func)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
if (nr_skip >= nr_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
jited_rec_size = prog_linfo->jited_rec_size;
@@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
(start * jited_rec_size);
jited_linfo = raw_jited_linfo;
if (addr < *jited_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo -= nr_skip;
rec_size = prog_linfo->rec_size;
@@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
nr_linfo = prog_linfo->nr_linfo;
if (nr_skip >= nr_linfo)
- return NULL;
+ return errno = ENOENT, NULL;
rec_size = prog_linfo->rec_size;
raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
linfo = raw_linfo;
if (insn_off < linfo->insn_off)
- return NULL;
+ return errno = ENOENT, NULL;
nr_linfo -= nr_skip;
for (i = 0; i < nr_linfo; i++) {
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 8c954ebc0c7c..d6bfbe009296 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -25,26 +25,35 @@
#define bpf_target_sparc
#define bpf_target_defined
#else
- #undef bpf_target_defined
-#endif
/* Fall back to what the compiler says */
-#ifndef bpf_target_defined
#if defined(__x86_64__)
#define bpf_target_x86
+ #define bpf_target_defined
#elif defined(__s390__)
#define bpf_target_s390
+ #define bpf_target_defined
#elif defined(__arm__)
#define bpf_target_arm
+ #define bpf_target_defined
#elif defined(__aarch64__)
#define bpf_target_arm64
+ #define bpf_target_defined
#elif defined(__mips__)
#define bpf_target_mips
+ #define bpf_target_defined
#elif defined(__powerpc__)
#define bpf_target_powerpc
+ #define bpf_target_defined
#elif defined(__sparc__)
#define bpf_target_sparc
+ #define bpf_target_defined
+#endif /* no compiler target */
+
#endif
+
+#ifndef __BPF_TARGET_MISSING
+#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\""
#endif
#if defined(bpf_target_x86)
@@ -287,7 +296,7 @@ struct pt_regs;
#elif defined(bpf_target_sparc)
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
-#else
+#elif defined(bpf_target_defined)
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
@@ -295,13 +304,48 @@ struct pt_regs;
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
+#if !defined(bpf_target_defined)
+
+#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* !defined(bpf_target_defined) */
+
+#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
#define ___bpf_narg(...) \
___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
- ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+#endif
#define ___bpf_ctx_cast0() ctx
#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
@@ -413,56 +457,4 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-#define ___bpf_fill0(arr, p, x) do {} while (0)
-#define ___bpf_fill1(arr, p, x) arr[p] = x
-#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
-#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
-#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
-#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
-#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
-#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
-#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
-#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
-#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
-#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
-#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
-#define ___bpf_fill(arr, args...) \
- ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
-
-/*
- * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
- * in a structure.
- */
-#define BPF_SEQ_PRINTF(seq, fmt, args...) \
-({ \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[___bpf_narg(args)]; \
- \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- ___bpf_fill(___param, args); \
- _Pragma("GCC diagnostic pop") \
- \
- bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
- ___param, sizeof(___param)); \
-})
-
-/*
- * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
- * an array of u64.
- */
-#define BPF_SNPRINTF(out, out_size, fmt, args...) \
-({ \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[___bpf_narg(args)]; \
- \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- ___bpf_fill(___param, args); \
- _Pragma("GCC diagnostic pop") \
- \
- bpf_snprintf(out, out_size, ___fmt, \
- ___param, sizeof(___param)); \
-})
-
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d57e13a13798..b46760b93bb4 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -443,7 +443,7 @@ struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
{
if (type_id >= btf->start_id + btf->nr_types)
- return NULL;
+ return errno = EINVAL, NULL;
return btf_type_by_id((struct btf *)btf, type_id);
}
@@ -510,7 +510,7 @@ size_t btf__pointer_size(const struct btf *btf)
int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
{
if (ptr_sz != 4 && ptr_sz != 8)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
btf->ptr_sz = ptr_sz;
return 0;
}
@@ -537,7 +537,7 @@ enum btf_endianness btf__endianness(const struct btf *btf)
int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
{
if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
if (!btf->swapped_endian) {
@@ -568,8 +568,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
int i;
t = btf__type_by_id(btf, type_id);
- for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
- i++) {
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
switch (btf_kind(t)) {
case BTF_KIND_INT:
case BTF_KIND_STRUCT:
@@ -592,12 +591,12 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_ARRAY:
array = btf_array(t);
if (nelems && array->nelems > UINT32_MAX / nelems)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
nelems *= array->nelems;
type_id = array->type;
break;
default:
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
t = btf__type_by_id(btf, type_id);
@@ -605,9 +604,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
done:
if (size < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (nelems && size > UINT32_MAX / nelems)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
return nelems * size;
}
@@ -640,7 +639,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
for (i = 0; i < vlen; i++, m++) {
align = btf__align_of(btf, m->type);
if (align <= 0)
- return align;
+ return libbpf_err(align);
max_align = max(max_align, align);
}
@@ -648,7 +647,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
}
default:
pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
- return 0;
+ return errno = EINVAL, 0;
}
}
@@ -667,7 +666,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
}
if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return type_id;
}
@@ -687,7 +686,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
return i;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
@@ -709,7 +708,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return i;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
static bool btf_is_modifiable(const struct btf *btf)
@@ -785,12 +784,12 @@ static struct btf *btf_new_empty(struct btf *base_btf)
struct btf *btf__new_empty(void)
{
- return btf_new_empty(NULL);
+ return libbpf_ptr(btf_new_empty(NULL));
}
struct btf *btf__new_empty_split(struct btf *base_btf)
{
- return btf_new_empty(base_btf);
+ return libbpf_ptr(btf_new_empty(base_btf));
}
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
@@ -846,7 +845,7 @@ done:
struct btf *btf__new(const void *data, __u32 size)
{
- return btf_new(data, size, NULL);
+ return libbpf_ptr(btf_new(data, size, NULL));
}
static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
@@ -937,7 +936,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
goto done;
}
btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
- if (IS_ERR(btf))
+ err = libbpf_get_error(btf);
+ if (err)
goto done;
switch (gelf_getclass(elf)) {
@@ -953,9 +953,9 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
if (btf_ext && btf_ext_data) {
- *btf_ext = btf_ext__new(btf_ext_data->d_buf,
- btf_ext_data->d_size);
- if (IS_ERR(*btf_ext))
+ *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+ err = libbpf_get_error(*btf_ext);
+ if (err)
goto done;
} else if (btf_ext) {
*btf_ext = NULL;
@@ -965,30 +965,24 @@ done:
elf_end(elf);
close(fd);
- if (err)
- return ERR_PTR(err);
- /*
- * btf is always parsed before btf_ext, so no need to clean up
- * btf_ext, if btf loading failed
- */
- if (IS_ERR(btf))
+ if (!err)
return btf;
- if (btf_ext && IS_ERR(*btf_ext)) {
- btf__free(btf);
- err = PTR_ERR(*btf_ext);
- return ERR_PTR(err);
- }
- return btf;
+
+ if (btf_ext)
+ btf_ext__free(*btf_ext);
+ btf__free(btf);
+
+ return ERR_PTR(err);
}
struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
{
- return btf_parse_elf(path, NULL, btf_ext);
+ return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
}
struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
{
- return btf_parse_elf(path, base_btf, NULL);
+ return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
}
static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
@@ -1056,36 +1050,39 @@ err_out:
struct btf *btf__parse_raw(const char *path)
{
- return btf_parse_raw(path, NULL);
+ return libbpf_ptr(btf_parse_raw(path, NULL));
}
struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
{
- return btf_parse_raw(path, base_btf);
+ return libbpf_ptr(btf_parse_raw(path, base_btf));
}
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
struct btf *btf;
+ int err;
if (btf_ext)
*btf_ext = NULL;
btf = btf_parse_raw(path, base_btf);
- if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+ err = libbpf_get_error(btf);
+ if (!err)
return btf;
-
+ if (err != -EPROTO)
+ return ERR_PTR(err);
return btf_parse_elf(path, base_btf, btf_ext);
}
struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
{
- return btf_parse(path, NULL, btf_ext);
+ return libbpf_ptr(btf_parse(path, NULL, btf_ext));
}
struct btf *btf__parse_split(const char *path, struct btf *base_btf)
{
- return btf_parse(path, base_btf, NULL);
+ return libbpf_ptr(btf_parse(path, base_btf, NULL));
}
static int compare_vsi_off(const void *_a, const void *_b)
@@ -1178,7 +1175,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
}
}
- return err;
+ return libbpf_err(err);
}
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
@@ -1191,13 +1188,13 @@ int btf__load(struct btf *btf)
int err = 0;
if (btf->fd >= 0)
- return -EEXIST;
+ return libbpf_err(-EEXIST);
retry_load:
if (log_buf_size) {
log_buf = malloc(log_buf_size);
if (!log_buf)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
*log_buf = 0;
}
@@ -1229,7 +1226,7 @@ retry_load:
done:
free(log_buf);
- return err;
+ return libbpf_err(err);
}
int btf__fd(const struct btf *btf)
@@ -1305,7 +1302,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
if (!data)
- return NULL;
+ return errno = -ENOMEM, NULL;
btf->raw_size = data_sz;
if (btf->swapped_endian)
@@ -1323,7 +1320,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
else if (offset - btf->start_str_off < btf->hdr->str_len)
return btf_strs_data(btf) + (offset - btf->start_str_off);
else
- return NULL;
+ return errno = EINVAL, NULL;
}
const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -1388,17 +1385,20 @@ exit_free:
int btf__get_from_id(__u32 id, struct btf **btf)
{
struct btf *res;
- int btf_fd;
+ int err, btf_fd;
*btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(id);
if (btf_fd < 0)
- return -errno;
+ return libbpf_err(-errno);
res = btf_get_from_fd(btf_fd, NULL);
+ err = libbpf_get_error(res);
+
close(btf_fd);
- if (IS_ERR(res))
- return PTR_ERR(res);
+
+ if (err)
+ return libbpf_err(err);
*btf = res;
return 0;
@@ -1415,31 +1415,30 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__s64 key_size, value_size;
__s32 container_id;
- if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
- max_name) {
+ if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
pr_warn("map:%s length of '____btf_map_%s' is too long\n",
map_name, map_name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
container_id = btf__find_by_name(btf, container_name);
if (container_id < 0) {
pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
map_name, container_name);
- return container_id;
+ return libbpf_err(container_id);
}
container_type = btf__type_by_id(btf, container_id);
if (!container_type) {
pr_warn("map:%s cannot find BTF type for container_id:%u\n",
map_name, container_id);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
pr_warn("map:%s container_name:%s is an invalid container struct\n",
map_name, container_name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
key = btf_members(container_type);
@@ -1448,25 +1447,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
key_size = btf__resolve_size(btf, key->type);
if (key_size < 0) {
pr_warn("map:%s invalid BTF key_type_size\n", map_name);
- return key_size;
+ return libbpf_err(key_size);
}
if (expected_key_size != key_size) {
pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
map_name, (__u32)key_size, expected_key_size);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
value_size = btf__resolve_size(btf, value->type);
if (value_size < 0) {
pr_warn("map:%s invalid BTF value_type_size\n", map_name);
- return value_size;
+ return libbpf_err(value_size);
}
if (expected_value_size != value_size) {
pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
map_name, (__u32)value_size, expected_value_size);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
*key_type_id = key->type;
@@ -1563,11 +1562,11 @@ int btf__find_str(struct btf *btf, const char *s)
/* BTF needs to be in a modifiable state to build string lookup index */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
off = strset__find_str(btf->strs_set, s);
if (off < 0)
- return off;
+ return libbpf_err(off);
return btf->start_str_off + off;
}
@@ -1588,11 +1587,11 @@ int btf__add_str(struct btf *btf, const char *s)
}
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
off = strset__add_str(btf->strs_set, s);
if (off < 0)
- return off;
+ return libbpf_err(off);
btf->hdr->str_len = strset__data_size(btf->strs_set);
@@ -1616,7 +1615,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
if (err)
- return err;
+ return libbpf_err(err);
btf->hdr->type_len += data_sz;
btf->hdr->str_off += data_sz;
@@ -1653,21 +1652,21 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
sz = btf_type_size(src_type);
if (sz < 0)
- return sz;
+ return libbpf_err(sz);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
memcpy(t, src_type, sz);
err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
if (err)
- return err;
+ return libbpf_err(err);
return btf_commit_type(btf, sz);
}
@@ -1688,21 +1687,21 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(int);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
/* if something goes wrong later, we might end up with an extra string,
* but that shouldn't be a problem, because BTF can't be constructed
@@ -1736,20 +1735,20 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* byte_sz must be one of the explicitly allowed values */
if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
byte_sz != 16)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -1780,15 +1779,15 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
int sz, name_off = 0;
if (validate_type_id(ref_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1831,15 +1830,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
int sz;
if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(struct btf_array);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
t->name_off = 0;
t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
@@ -1860,12 +1859,12 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
int sz, name_off = 0;
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -1943,30 +1942,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
/* last type should be union/struct */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_composite(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* best-effort bit field offset/size enforcement */
is_bitfield = bit_size || (bit_offset % 8 != 0);
if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* only offset 0 is allowed for unions */
if (btf_is_union(t) && bit_offset)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_member);
m = btf_add_type_mem(btf, sz);
if (!m)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2008,15 +2007,15 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2048,25 +2047,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
/* last type should be BTF_KIND_ENUM */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_enum(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (value < INT_MIN || value > UINT_MAX)
- return -E2BIG;
+ return libbpf_err(-E2BIG);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_enum);
v = btf_add_type_mem(btf, sz);
if (!v)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2096,7 +2095,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
{
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
switch (fwd_kind) {
case BTF_FWD_STRUCT:
@@ -2117,7 +2116,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
*/
return btf__add_enum(btf, name, sizeof(int));
default:
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
}
@@ -2132,7 +2131,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
{
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
}
@@ -2187,10 +2186,10 @@ int btf__add_func(struct btf *btf, const char *name,
int id;
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
linkage != BTF_FUNC_EXTERN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
if (id > 0) {
@@ -2198,7 +2197,7 @@ int btf__add_func(struct btf *btf, const char *name,
t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
}
- return id;
+ return libbpf_err(id);
}
/*
@@ -2219,15 +2218,15 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
int sz;
if (validate_type_id(ret_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
/* start out with vlen=0; this will be adjusted when adding enum
* values, if necessary
@@ -2254,23 +2253,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
int sz, name_off = 0;
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* last type should be BTF_KIND_FUNC_PROTO */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_func_proto(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_param);
p = btf_add_type_mem(btf, sz);
if (!p)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (name && name[0]) {
name_off = btf__add_str(btf, name);
@@ -2308,21 +2307,21 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
linkage != BTF_VAR_GLOBAL_EXTERN)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type) + sizeof(struct btf_var);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2357,15 +2356,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
/* non-empty name */
if (!name || !name[0])
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_type);
t = btf_add_type_mem(btf, sz);
if (!t)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
name_off = btf__add_str(btf, name);
if (name_off < 0)
@@ -2397,22 +2396,22 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
/* last type should be BTF_KIND_DATASEC */
if (btf->nr_types == 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
t = btf_last_type(btf);
if (!btf_is_datasec(t))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (validate_type_id(var_type_id))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* decompose and invalidate raw data */
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
sz = sizeof(struct btf_var_secinfo);
v = btf_add_type_mem(btf, sz);
if (!v)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
v->type = var_type_id;
v->offset = offset;
@@ -2614,11 +2613,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
err = btf_ext_parse_hdr(data, size);
if (err)
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
btf_ext = calloc(1, sizeof(struct btf_ext));
if (!btf_ext)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
btf_ext->data_size = size;
btf_ext->data = malloc(size);
@@ -2628,9 +2627,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
}
memcpy(btf_ext->data, data, size);
- if (btf_ext->hdr->hdr_len <
- offsetofend(struct btf_ext_header, line_info_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+ err = -EINVAL;
goto done;
+ }
+
err = btf_ext_setup_func_info(btf_ext);
if (err)
goto done;
@@ -2639,8 +2640,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
+ err = -EINVAL;
goto done;
+ }
+
err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -2648,7 +2652,7 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
done:
if (err) {
btf_ext__free(btf_ext);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
return btf_ext;
@@ -2687,7 +2691,7 @@ static int btf_ext_reloc_info(const struct btf *btf,
existing_len = (*cnt) * record_size;
data = realloc(*info, existing_len + records_len);
if (!data)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
memcpy(data + existing_len, sinfo->data, records_len);
/* adjust insn_off only, the rest data will be passed
@@ -2697,15 +2701,14 @@ static int btf_ext_reloc_info(const struct btf *btf,
__u32 *insn_off;
insn_off = data + existing_len + (i * record_size);
- *insn_off = *insn_off / sizeof(struct bpf_insn) +
- insns_cnt;
+ *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
}
*info = data;
*cnt += sinfo->num_info;
return 0;
}
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
int btf_ext__reloc_func_info(const struct btf *btf,
@@ -2894,11 +2897,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
if (IS_ERR(d)) {
pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (btf_ensure_modifiable(btf))
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
err = btf_dedup_prep(d);
if (err) {
@@ -2938,7 +2941,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
done:
btf_dedup_free(d);
- return err;
+ return libbpf_err(err);
}
#define BTF_UNPROCESSED_ID ((__u32)-1)
@@ -4411,7 +4414,7 @@ struct btf *libbpf_find_kernel_btf(void)
char path[PATH_MAX + 1];
struct utsname buf;
struct btf *btf;
- int i;
+ int i, err;
uname(&buf);
@@ -4425,17 +4428,16 @@ struct btf *libbpf_find_kernel_btf(void)
btf = btf__parse_raw(path);
else
btf = btf__parse_elf(path, NULL);
-
- pr_debug("loading kernel BTF '%s': %ld\n",
- path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
- if (IS_ERR(btf))
+ err = libbpf_get_error(btf);
+ pr_debug("loading kernel BTF '%s': %d\n", path, err);
+ if (err)
continue;
return btf;
}
pr_warn("failed to find valid kernel BTF\n");
- return ERR_PTR(-ESRCH);
+ return libbpf_err_ptr(-ESRCH);
}
int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5e2809d685bf..5dc6b5172bb3 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -128,7 +128,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d = calloc(1, sizeof(struct btf_dump));
if (!d)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
d->btf = btf;
d->btf_ext = btf_ext;
@@ -156,7 +156,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
return d;
err:
btf_dump__free(d);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
static int btf_dump_resize(struct btf_dump *d)
@@ -236,16 +236,16 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
int err, i;
if (id > btf__get_nr_types(d->btf))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = btf_dump_resize(d);
if (err)
- return err;
+ return libbpf_err(err);
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
- return err;
+ return libbpf_err(err);
for (i = 0; i < d->emit_queue_cnt; i++)
btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
@@ -1075,11 +1075,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
int lvl, err;
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = btf_dump_resize(d);
if (err)
- return -EINVAL;
+ return libbpf_err(err);
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
new file mode 100644
index 000000000000..8df718a6b142
--- /dev/null
+++ b/tools/lib/bpf/gen_loader.c
@@ -0,0 +1,729 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+#include "bpf_gen_internal.h"
+#include "skel_internal.h"
+
+#define MAX_USED_MAPS 64
+#define MAX_USED_PROGS 32
+
+/* The following structure describes the stack layout of the loader program.
+ * In addition R6 contains the pointer to context.
+ * R7 contains the result of the last sys_bpf command (typically error or FD).
+ * R9 contains the result of the last sys_close command.
+ *
+ * Naming convention:
+ * ctx - bpf program context
+ * stack - bpf program stack
+ * blob - bpf_attr-s, strings, insns, map data.
+ * All the bytes that loader prog will use for read/write.
+ */
+struct loader_stack {
+ __u32 btf_fd;
+ __u32 map_fd[MAX_USED_MAPS];
+ __u32 prog_fd[MAX_USED_PROGS];
+ __u32 inner_map_fd;
+};
+
+#define stack_off(field) \
+ (__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field))
+
+#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))
+
+static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
+{
+ size_t off = gen->insn_cur - gen->insn_start;
+ void *insn_start;
+
+ if (gen->error)
+ return gen->error;
+ if (size > INT32_MAX || off + size > INT32_MAX) {
+ gen->error = -ERANGE;
+ return -ERANGE;
+ }
+ insn_start = realloc(gen->insn_start, off + size);
+ if (!insn_start) {
+ gen->error = -ENOMEM;
+ free(gen->insn_start);
+ gen->insn_start = NULL;
+ return -ENOMEM;
+ }
+ gen->insn_start = insn_start;
+ gen->insn_cur = insn_start + off;
+ return 0;
+}
+
+static int realloc_data_buf(struct bpf_gen *gen, __u32 size)
+{
+ size_t off = gen->data_cur - gen->data_start;
+ void *data_start;
+
+ if (gen->error)
+ return gen->error;
+ if (size > INT32_MAX || off + size > INT32_MAX) {
+ gen->error = -ERANGE;
+ return -ERANGE;
+ }
+ data_start = realloc(gen->data_start, off + size);
+ if (!data_start) {
+ gen->error = -ENOMEM;
+ free(gen->data_start);
+ gen->data_start = NULL;
+ return -ENOMEM;
+ }
+ gen->data_start = data_start;
+ gen->data_cur = data_start + off;
+ return 0;
+}
+
+static void emit(struct bpf_gen *gen, struct bpf_insn insn)
+{
+ if (realloc_insn_buf(gen, sizeof(insn)))
+ return;
+ memcpy(gen->insn_cur, &insn, sizeof(insn));
+ gen->insn_cur += sizeof(insn);
+}
+
+static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2)
+{
+ emit(gen, insn1);
+ emit(gen, insn2);
+}
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level)
+{
+ size_t stack_sz = sizeof(struct loader_stack);
+ int i;
+
+ gen->log_level = log_level;
+ /* save ctx pointer into R6 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
+
+ /* bzero stack */
+ emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10));
+ emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
+
+ /* jump over cleanup code */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
+ /* size of cleanup code below */
+ (stack_sz / 4) * 3 + 2));
+
+ /* remember the label where all error branches will jump to */
+ gen->cleanup_label = gen->insn_cur - gen->insn_start;
+ /* emit cleanup code: close all temp FDs */
+ for (i = 0; i < stack_sz; i += 4) {
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
+ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+ }
+ /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
+ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ emit(gen, BPF_EXIT_INSN());
+}
+
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
+{
+ void *prev;
+
+ if (realloc_data_buf(gen, size))
+ return 0;
+ prev = gen->data_cur;
+ memcpy(gen->data_cur, data, size);
+ gen->data_cur += size;
+ return prev - gen->data_start;
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */
+static void emit_rel_store(struct bpf_gen *gen, int off, int data)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, data));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */
+static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off)
+{
+ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10));
+ emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off,
+ bool check_non_zero)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off));
+ if (check_non_zero)
+ /* If value in ctx is zero don't update the blob.
+ * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c
+ */
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
+}
+
+static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size)
+{
+ emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, attr));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf));
+ /* remember the result in R7 */
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+}
+
+static bool is_simm16(__s64 value)
+{
+ return value == (__s64)(__s16)value;
+}
+
+static void emit_check_err(struct bpf_gen *gen)
+{
+ __s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+
+ /* R7 contains result of last sys_bpf command.
+ * if (R7 < 0) goto cleanup;
+ */
+ if (is_simm16(off)) {
+ emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+}
+
+/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */
+static void emit_debug(struct bpf_gen *gen, int reg1, int reg2,
+ const char *fmt, va_list args)
+{
+ char buf[1024];
+ int addr, len, ret;
+
+ if (!gen->log_level)
+ return;
+ ret = vsnprintf(buf, sizeof(buf), fmt, args);
+ if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0)
+ /* The special case to accommodate common debug_ret():
+ * to avoid specifying BPF_REG_7 and adding " r=%%d" to
+ * prints explicitly.
+ */
+ strcat(buf, " r=%d");
+ len = strlen(buf) + 1;
+ addr = add_data(gen, buf, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, addr));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ if (reg1 >= 0)
+ emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1));
+ if (reg2 >= 0)
+ emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk));
+}
+
+static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ emit_debug(gen, reg1, reg2, fmt, args);
+ va_end(args);
+}
+
+static void debug_ret(struct bpf_gen *gen, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ emit_debug(gen, BPF_REG_7, -1, fmt, args);
+ va_end(args);
+}
+
+static void __emit_sys_close(struct bpf_gen *gen)
+{
+ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0,
+ /* 2 is the number of the following insns
+ * * 6 is additional insns in debug_regs
+ */
+ 2 + (gen->log_level ? 6 : 0)));
+ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+ debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d");
+}
+
+static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off)
+{
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off));
+ __emit_sys_close(gen);
+}
+
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
+{
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, blob_off));
+ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0));
+ __emit_sys_close(gen);
+}
+
+int bpf_gen__finish(struct bpf_gen *gen)
+{
+ int i;
+
+ emit_sys_close_stack(gen, stack_off(btf_fd));
+ for (i = 0; i < gen->nr_progs; i++)
+ move_stack2ctx(gen,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * gen->nr_maps +
+ sizeof(struct bpf_prog_desc) * i +
+ offsetof(struct bpf_prog_desc, prog_fd), 4,
+ stack_off(prog_fd[i]));
+ for (i = 0; i < gen->nr_maps; i++)
+ move_stack2ctx(gen,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * i +
+ offsetof(struct bpf_map_desc, map_fd), 4,
+ stack_off(map_fd[i]));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(gen, BPF_EXIT_INSN());
+ pr_debug("gen: finish %d\n", gen->error);
+ if (!gen->error) {
+ struct gen_loader_opts *opts = gen->opts;
+
+ opts->insns = gen->insn_start;
+ opts->insns_sz = gen->insn_cur - gen->insn_start;
+ opts->data = gen->data_start;
+ opts->data_sz = gen->data_cur - gen->data_start;
+ }
+ return gen->error;
+}
+
+void bpf_gen__free(struct bpf_gen *gen)
+{
+ if (!gen)
+ return;
+ free(gen->data_start);
+ free(gen->insn_start);
+ free(gen);
+}
+
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
+ __u32 btf_raw_size)
+{
+ int attr_size = offsetofend(union bpf_attr, btf_log_level);
+ int btf_data, btf_load_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: load_btf: size %d\n", btf_raw_size);
+ btf_data = add_data(gen, btf_raw_data, btf_raw_size);
+
+ attr.btf_size = btf_raw_size;
+ btf_load_attr = add_data(gen, &attr, attr_size);
+
+ /* populate union bpf_attr with user provided log details */
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4,
+ offsetof(struct bpf_loader_ctx, log_level), false);
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4,
+ offsetof(struct bpf_loader_ctx, log_size), false);
+ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8,
+ offsetof(struct bpf_loader_ctx, log_buf), false);
+ /* populate union bpf_attr with a pointer to the BTF data */
+ emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data);
+ /* emit BTF_LOAD command */
+ emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size);
+ debug_ret(gen, "btf_load size %d", btf_raw_size);
+ emit_check_err(gen);
+ /* remember btf_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd)));
+}
+
+void bpf_gen__map_create(struct bpf_gen *gen,
+ struct bpf_create_map_attr *map_attr, int map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
+ bool close_inner_map_fd = false;
+ int map_create_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ attr.map_type = map_attr->map_type;
+ attr.key_size = map_attr->key_size;
+ attr.value_size = map_attr->value_size;
+ attr.map_flags = map_attr->map_flags;
+ memcpy(attr.map_name, map_attr->name,
+ min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.numa_node = map_attr->numa_node;
+ attr.map_ifindex = map_attr->map_ifindex;
+ attr.max_entries = map_attr->max_entries;
+ switch (attr.map_type) {
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_STACK_TRACE:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_CPUMAP:
+ case BPF_MAP_TYPE_XSKMAP:
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_RINGBUF:
+ break;
+ default:
+ attr.btf_key_type_id = map_attr->btf_key_type_id;
+ attr.btf_value_type_id = map_attr->btf_value_type_id;
+ }
+
+ pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n",
+ attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id);
+
+ map_create_attr = add_data(gen, &attr, attr_size);
+ if (attr.btf_value_type_id)
+ /* populate union bpf_attr with btf_fd saved in the stack earlier */
+ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4,
+ stack_off(btf_fd));
+ switch (attr.map_type) {
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4,
+ stack_off(inner_map_fd));
+ close_inner_map_fd = true;
+ break;
+ default:
+ break;
+ }
+ /* conditionally update max_entries */
+ if (map_idx >= 0)
+ move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * map_idx +
+ offsetof(struct bpf_map_desc, max_entries),
+ true /* check that max_entries != 0 */);
+ /* emit MAP_CREATE command */
+ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);
+ debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d",
+ attr.map_name, map_idx, map_attr->map_type, attr.value_size,
+ attr.btf_value_type_id);
+ emit_check_err(gen);
+ /* remember map_fd in the stack, if successful */
+ if (map_idx < 0) {
+ /* This bpf_gen__map_create() function is called with map_idx >= 0
+ * for all maps that libbpf loading logic tracks.
+ * It's called with -1 to create an inner map.
+ */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(inner_map_fd)));
+ } else if (map_idx != gen->nr_maps) {
+ gen->error = -EDOM; /* internal bug */
+ return;
+ } else {
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(map_fd[map_idx])));
+ gen->nr_maps++;
+ }
+ if (close_inner_map_fd)
+ emit_sys_close_stack(gen, stack_off(inner_map_fd));
+}
+
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
+ enum bpf_attach_type type)
+{
+ const char *prefix;
+ int kind, ret;
+
+ btf_get_kernel_prefix_kind(type, &prefix, &kind);
+ gen->attach_kind = kind;
+ ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
+ prefix, attach_name);
+ if (ret == sizeof(gen->attach_target))
+ gen->error = -ENOSPC;
+}
+
+static void emit_find_attach_target(struct bpf_gen *gen)
+{
+ int name, len = strlen(gen->attach_target) + 1;
+
+ pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind);
+ name = add_data(gen, gen->attach_target, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "find_by_name_kind(%s,%d)",
+ gen->attach_target, gen->attach_kind);
+ emit_check_err(gen);
+ /* if successful, btf_id is in lower 32-bit of R7 and
+ * btf_obj_fd is in upper 32-bit
+ */
+}
+
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
+ int insn_idx)
+{
+ struct ksym_relo_desc *relo;
+
+ relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo));
+ if (!relo) {
+ gen->error = -ENOMEM;
+ return;
+ }
+ gen->relos = relo;
+ relo += gen->relo_cnt;
+ relo->name = name;
+ relo->kind = kind;
+ relo->insn_idx = insn_idx;
+ gen->relo_cnt++;
+}
+
+static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
+{
+ int name, insn, len = strlen(relo->name) + 1;
+
+ pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx);
+ name = add_data(gen, relo->name, len);
+
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, name));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
+ emit_check_err(gen);
+ /* store btf_id into insn[insn_idx].imm */
+ insn = insns + sizeof(struct bpf_insn) * relo->insn_idx +
+ offsetof(struct bpf_insn, imm);
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, insn));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0));
+ if (relo->kind == BTF_KIND_VAR) {
+ /* store btf_obj_fd into insn[insn_idx + 1].imm */
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ sizeof(struct bpf_insn)));
+ }
+}
+
+static void emit_relos(struct bpf_gen *gen, int insns)
+{
+ int i;
+
+ for (i = 0; i < gen->relo_cnt; i++)
+ emit_relo(gen, gen->relos + i, insns);
+}
+
+static void cleanup_relos(struct bpf_gen *gen, int insns)
+{
+ int i, insn;
+
+ for (i = 0; i < gen->relo_cnt; i++) {
+ if (gen->relos[i].kind != BTF_KIND_VAR)
+ continue;
+ /* close fd recorded in insn[insn_idx + 1].imm */
+ insn = insns +
+ sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) +
+ offsetof(struct bpf_insn, imm);
+ emit_sys_close_blob(gen, insn);
+ }
+ if (gen->relo_cnt) {
+ free(gen->relos);
+ gen->relo_cnt = 0;
+ gen->relos = NULL;
+ }
+}
+
+void bpf_gen__prog_load(struct bpf_gen *gen,
+ struct bpf_prog_load_params *load_attr, int prog_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, fd_array);
+ int prog_load_attr, license, insns, func_info, line_info;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: prog_load: type %d insns_cnt %zd\n",
+ load_attr->prog_type, load_attr->insn_cnt);
+ /* add license string to blob of bytes */
+ license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1);
+ /* add insns to blob of bytes */
+ insns = add_data(gen, load_attr->insns,
+ load_attr->insn_cnt * sizeof(struct bpf_insn));
+
+ attr.prog_type = load_attr->prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ attr.attach_btf_id = load_attr->attach_btf_id;
+ attr.prog_ifindex = load_attr->prog_ifindex;
+ attr.kern_version = 0;
+ attr.insn_cnt = (__u32)load_attr->insn_cnt;
+ attr.prog_flags = load_attr->prog_flags;
+
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_cnt = load_attr->func_info_cnt;
+ func_info = add_data(gen, load_attr->func_info,
+ attr.func_info_cnt * attr.func_info_rec_size);
+
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_cnt = load_attr->line_info_cnt;
+ line_info = add_data(gen, load_attr->line_info,
+ attr.line_info_cnt * attr.line_info_rec_size);
+
+ memcpy(attr.prog_name, load_attr->name,
+ min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+ prog_load_attr = add_data(gen, &attr, attr_size);
+
+ /* populate union bpf_attr with a pointer to license */
+ emit_rel_store(gen, attr_field(prog_load_attr, license), license);
+
+ /* populate union bpf_attr with a pointer to instructions */
+ emit_rel_store(gen, attr_field(prog_load_attr, insns), insns);
+
+ /* populate union bpf_attr with a pointer to func_info */
+ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
+
+ /* populate union bpf_attr with a pointer to line_info */
+ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);
+
+ /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */
+ emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array),
+ stack_off(map_fd[0]));
+
+ /* populate union bpf_attr with user provided log details */
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4,
+ offsetof(struct bpf_loader_ctx, log_level), false);
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4,
+ offsetof(struct bpf_loader_ctx, log_size), false);
+ move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8,
+ offsetof(struct bpf_loader_ctx, log_buf), false);
+ /* populate union bpf_attr with btf_fd saved in the stack earlier */
+ move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4,
+ stack_off(btf_fd));
+ if (gen->attach_kind) {
+ emit_find_attach_target(gen);
+ /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, prog_load_attr));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ offsetof(union bpf_attr, attach_btf_id)));
+ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+ offsetof(union bpf_attr, attach_btf_obj_fd)));
+ }
+ emit_relos(gen, insns);
+ /* emit PROG_LOAD command */
+ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
+ debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
+ /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
+ cleanup_relos(gen, insns);
+ if (gen->attach_kind)
+ emit_sys_close_blob(gen,
+ attr_field(prog_load_attr, attach_btf_obj_fd));
+ emit_check_err(gen);
+ /* remember prog_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+ stack_off(prog_fd[gen->nr_progs])));
+ gen->nr_progs++;
+}
+
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
+ __u32 value_size)
+{
+ int attr_size = offsetofend(union bpf_attr, flags);
+ int map_update_attr, value, key;
+ union bpf_attr attr;
+ int zero = 0;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: map_update_elem: idx %d\n", map_idx);
+
+ value = add_data(gen, pvalue, value_size);
+ key = add_data(gen, &zero, sizeof(zero));
+
+ /* if (map_desc[map_idx].initial_value)
+ * copy_from_user(value, initial_value, value_size);
+ */
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,
+ sizeof(struct bpf_loader_ctx) +
+ sizeof(struct bpf_map_desc) * map_idx +
+ offsetof(struct bpf_map_desc, initial_value)));
+ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4));
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+ 0, 0, 0, value));
+ emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size));
+ emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user));
+
+ map_update_attr = add_data(gen, &attr, attr_size);
+ move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4,
+ stack_off(map_fd[map_idx]));
+ emit_rel_store(gen, attr_field(map_update_attr, key), key);
+ emit_rel_store(gen, attr_field(map_update_attr, value), value);
+ /* emit MAP_UPDATE_ELEM command */
+ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+ debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size);
+ emit_check_err(gen);
+}
+
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
+{
+ int attr_size = offsetofend(union bpf_attr, map_fd);
+ int map_freeze_attr;
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_size);
+ pr_debug("gen: map_freeze: idx %d\n", map_idx);
+ map_freeze_attr = add_data(gen, &attr, attr_size);
+ move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4,
+ stack_off(map_fd[map_idx]));
+ /* emit MAP_FREEZE command */
+ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size);
+ debug_ret(gen, "map_freeze");
+ emit_check_err(gen);
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e2a3cf437814..1e04ce724240 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -54,6 +54,7 @@
#include "str_error.h"
#include "libbpf_internal.h"
#include "hashmap.h"
+#include "bpf_gen_internal.h"
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
@@ -150,6 +151,23 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
+/* this goes away in libbpf 1.0 */
+enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
+
+int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
+{
+ /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
+ * get all possible values we compensate last +1, and then (2*x - 1)
+ * to get the bit mask
+ */
+ if (mode != LIBBPF_STRICT_ALL
+ && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
+ return errno = EINVAL, -EINVAL;
+
+ libbpf_mode = mode;
+ return 0;
+}
+
enum kern_feature_id {
/* v4.14: kernel support for program & map names. */
FEAT_PROG_NAME,
@@ -178,7 +196,7 @@ enum kern_feature_id {
__FEAT_CNT,
};
-static bool kernel_supports(enum kern_feature_id feat_id);
+static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
enum reloc_type {
RELO_LD64,
@@ -432,6 +450,8 @@ struct bpf_object {
bool loaded;
bool has_subcalls;
+ struct bpf_gen *gen_loader;
+
/*
* Information when doing elf related work. Only valid if fd
* is valid.
@@ -677,6 +697,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return -LIBBPF_ERRNO__FORMAT;
}
+ if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+ pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
+ return -ENOTSUP;
+ }
+
pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
@@ -700,13 +725,14 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
- /* if function is a global/weak symbol, but has hidden
- * visibility (STV_HIDDEN), mark its BTF FUNC as static to
- * enable more permissive BPF verification mode with more
- * outside context available to BPF verifier
+ /* if function is a global/weak symbol, but has restricted
+ * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
+ * as static to enable more permissive BPF verification mode
+ * with more outside context available to BPF verifier
*/
if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
- && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+ && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN
+ || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL))
prog->mark_btf_static = true;
nr_progs++;
@@ -1794,7 +1820,6 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (!symbols)
return -EINVAL;
-
scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
data = elf_sec_data(obj, scn);
if (!scn || !data) {
@@ -1854,6 +1879,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return -LIBBPF_ERRNO__FORMAT;
}
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION
+ || GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+ pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
+ return -ENOTSUP;
+ }
+
map->libbpf_type = LIBBPF_MAP_UNSPEC;
map->sec_idx = sym.st_shndx;
map->sec_offset = sym.st_value;
@@ -2261,6 +2292,16 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
pr_debug("map '%s': found inner map definition.\n", map->name);
}
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+ switch (linkage) {
+ case BTF_VAR_STATIC: return "static";
+ case BTF_VAR_GLOBAL_ALLOCATED: return "global";
+ case BTF_VAR_GLOBAL_EXTERN: return "extern";
+ default: return "unknown";
+ }
+}
+
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
const struct btf_type *sec,
int var_idx, int sec_idx,
@@ -2293,10 +2334,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
map_name, btf_kind_str(var));
return -EINVAL;
}
- if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
- var_extra->linkage != BTF_VAR_STATIC) {
- pr_warn("map '%s': unsupported var linkage %u.\n",
- map_name, var_extra->linkage);
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+ pr_warn("map '%s': unsupported map linkage %s.\n",
+ map_name, btf_var_linkage_str(var_extra->linkage));
return -EOPNOTSUPP;
}
@@ -2425,10 +2465,8 @@ static int bpf_object__init_maps(struct bpf_object *obj,
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
err = err ?: bpf_object__init_struct_ops_maps(obj);
- if (err)
- return err;
- return 0;
+ return err;
}
static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -2443,20 +2481,20 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
static bool btf_needs_sanitization(struct bpf_object *obj)
{
- bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
- bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
- bool has_float = kernel_supports(FEAT_BTF_FLOAT);
- bool has_func = kernel_supports(FEAT_BTF_FUNC);
+ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
return !has_func || !has_datasec || !has_func_global || !has_float;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
- bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
- bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
- bool has_float = kernel_supports(FEAT_BTF_FLOAT);
- bool has_func = kernel_supports(FEAT_BTF_FUNC);
+ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@@ -2539,16 +2577,14 @@ static int bpf_object__init_btf(struct bpf_object *obj,
if (btf_data) {
obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
- if (IS_ERR(obj->btf)) {
- err = PTR_ERR(obj->btf);
+ err = libbpf_get_error(obj->btf);
+ if (err) {
obj->btf = NULL;
- pr_warn("Error loading ELF section %s: %d.\n",
- BTF_ELF_SEC, err);
+ pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
goto out;
}
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
- err = 0;
}
if (btf_ext_data) {
if (!obj->btf) {
@@ -2556,11 +2592,11 @@ static int bpf_object__init_btf(struct bpf_object *obj,
BTF_EXT_ELF_SEC, BTF_ELF_SEC);
goto out;
}
- obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
- btf_ext_data->d_size);
- if (IS_ERR(obj->btf_ext)) {
- pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
- BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+ err = libbpf_get_error(obj->btf_ext);
+ if (err) {
+ pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
+ BTF_EXT_ELF_SEC, err);
obj->btf_ext = NULL;
goto out;
}
@@ -2637,15 +2673,15 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
int err;
/* btf_vmlinux could be loaded earlier */
- if (obj->btf_vmlinux)
+ if (obj->btf_vmlinux || obj->gen_loader)
return 0;
if (!force && !obj_needs_vmlinux_btf(obj))
return 0;
obj->btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(obj->btf_vmlinux)) {
- err = PTR_ERR(obj->btf_vmlinux);
+ err = libbpf_get_error(obj->btf_vmlinux);
+ if (err) {
pr_warn("Error loading vmlinux BTF: %d\n", err);
obj->btf_vmlinux = NULL;
return err;
@@ -2662,7 +2698,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
if (!obj->btf)
return 0;
- if (!kernel_supports(FEAT_BTF)) {
+ if (!kernel_supports(obj, FEAT_BTF)) {
if (kernel_needs_btf(obj)) {
err = -EOPNOTSUPP;
goto report;
@@ -2711,15 +2747,29 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
/* clone BTF to sanitize a copy and leave the original intact */
raw_data = btf__get_raw_data(obj->btf, &sz);
kern_btf = btf__new(raw_data, sz);
- if (IS_ERR(kern_btf))
- return PTR_ERR(kern_btf);
+ err = libbpf_get_error(kern_btf);
+ if (err)
+ return err;
/* enforce 8-byte pointers for BPF-targeted BTFs */
btf__set_pointer_size(obj->btf, 8);
bpf_object__sanitize_btf(obj, kern_btf);
}
- err = btf__load(kern_btf);
+ if (obj->gen_loader) {
+ __u32 raw_size = 0;
+ const void *raw_data = btf__get_raw_data(kern_btf, &raw_size);
+
+ if (!raw_data)
+ return -ENOMEM;
+ bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
+ /* Pretend to have valid FD to pass various fd >= 0 checks.
+ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+ */
+ btf__set_fd(kern_btf, 0);
+ } else {
+ err = btf__load(kern_btf);
+ }
if (sanitize) {
if (!err) {
/* move fd to libbpf's BTF */
@@ -3216,6 +3266,9 @@ static int add_dummy_ksym_var(struct btf *btf)
const struct btf_var_secinfo *vs;
const struct btf_type *sec;
+ if (!btf)
+ return 0;
+
sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
BTF_KIND_DATASEC);
if (sec_btf_id < 0)
@@ -3470,7 +3523,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
if (pos->sec_name && !strcmp(pos->sec_name, title))
return pos;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
static bool prog_is_subprog(const struct bpf_object *obj,
@@ -3503,7 +3556,7 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
if (!strcmp(prog->name, name))
return prog;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
@@ -3850,11 +3903,11 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
err = bpf_obj_get_info_by_fd(fd, &info, &len);
if (err)
- return err;
+ return libbpf_err(err);
new_name = strdup(info.name);
if (!new_name)
- return -errno;
+ return libbpf_err(-errno);
new_fd = open("/", O_RDONLY | O_CLOEXEC);
if (new_fd < 0) {
@@ -3892,7 +3945,7 @@ err_close_new_fd:
close(new_fd);
err_free_new_name:
free(new_name);
- return err;
+ return libbpf_err(err);
}
__u32 bpf_map__max_entries(const struct bpf_map *map)
@@ -3903,7 +3956,7 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
{
if (!bpf_map_type__is_map_in_map(map->def.type))
- return NULL;
+ return errno = EINVAL, NULL;
return map->inner_map;
}
@@ -3911,7 +3964,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.max_entries = max_entries;
return 0;
}
@@ -3919,7 +3972,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
{
if (!map || !max_entries)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
return bpf_map__set_max_entries(map, max_entries);
}
@@ -3935,6 +3988,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
};
int ret;
+ if (obj->gen_loader)
+ return 0;
+
/* make sure basic loading works */
memset(&attr, 0, sizeof(attr));
@@ -3945,6 +4001,10 @@ bpf_object__probe_loading(struct bpf_object *obj)
ret = bpf_load_program_xattr(&attr, NULL, 0);
if (ret < 0) {
+ attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ }
+ if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
@@ -4290,11 +4350,17 @@ static struct kern_feature_desc {
},
};
-static bool kernel_supports(enum kern_feature_id feat_id)
+static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
struct kern_feature_desc *feat = &feature_probes[feat_id];
int ret;
+ if (obj->gen_loader)
+ /* To generate loader program assume the latest kernel
+ * to avoid doing extra prog_load, map_create syscalls.
+ */
+ return true;
+
if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
ret = feat->probe();
if (ret > 0) {
@@ -4377,6 +4443,13 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
+ if (obj->gen_loader) {
+ bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
+ map->mmaped, map->def.value_size);
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
+ bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
+ return 0;
+ }
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) {
err = -errno;
@@ -4402,14 +4475,14 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
{
struct bpf_create_map_attr create_attr;
struct bpf_map_def *def = &map->def;
memset(&create_attr, 0, sizeof(create_attr));
- if (kernel_supports(FEAT_PROG_NAME))
+ if (kernel_supports(obj, FEAT_PROG_NAME))
create_attr.name = map->name;
create_attr.map_ifindex = map->map_ifindex;
create_attr.map_type = def->type;
@@ -4450,7 +4523,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
if (map->inner_map) {
int err;
- err = bpf_object__create_map(obj, map->inner_map);
+ err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
map->name, err);
@@ -4462,7 +4535,15 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
create_attr.inner_map_fd = map->inner_map_fd;
}
- map->fd = bpf_create_map_xattr(&create_attr);
+ if (obj->gen_loader) {
+ bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
+ /* Pretend to have valid FD to pass various fd >= 0 checks.
+ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+ */
+ map->fd = 0;
+ } else {
+ map->fd = bpf_create_map_xattr(&create_attr);
+ }
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
@@ -4483,6 +4564,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
return -errno;
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+ if (obj->gen_loader)
+ map->inner_map->fd = -1;
bpf_map__destroy(map->inner_map);
zfree(&map->inner_map);
}
@@ -4490,11 +4573,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
-static int init_map_slots(struct bpf_map *map)
+static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
{
const struct bpf_map *targ_map;
unsigned int i;
- int fd, err;
+ int fd, err = 0;
for (i = 0; i < map->init_slots_sz; i++) {
if (!map->init_slots[i])
@@ -4502,7 +4585,13 @@ static int init_map_slots(struct bpf_map *map)
targ_map = map->init_slots[i];
fd = bpf_map__fd(targ_map);
- err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ if (obj->gen_loader) {
+ pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
+ map - obj->maps, i, targ_map - obj->maps);
+ return -ENOTSUP;
+ } else {
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ }
if (err) {
err = -errno;
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
@@ -4544,7 +4633,7 @@ bpf_object__create_maps(struct bpf_object *obj)
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
map->name, map->fd);
} else {
- err = bpf_object__create_map(obj, map);
+ err = bpf_object__create_map(obj, map, false);
if (err)
goto err_out;
@@ -4560,7 +4649,7 @@ bpf_object__create_maps(struct bpf_object *obj)
}
if (map->init_slots_sz) {
- err = init_map_slots(map);
+ err = init_map_slots(obj, map);
if (err < 0) {
zclose(map->fd);
goto err_out;
@@ -4970,11 +5059,14 @@ static int load_module_btfs(struct bpf_object *obj)
if (obj->btf_modules_loaded)
return 0;
+ if (obj->gen_loader)
+ return 0;
+
/* don't do this again, even if we find no module BTFs */
obj->btf_modules_loaded = true;
/* kernel too old to support module BTFs */
- if (!kernel_supports(FEAT_MODULE_BTF))
+ if (!kernel_supports(obj, FEAT_MODULE_BTF))
return 0;
while (true) {
@@ -5015,10 +5107,10 @@ static int load_module_btfs(struct bpf_object *obj)
}
btf = btf_get_from_fd(fd, obj->btf_vmlinux);
- if (IS_ERR(btf)) {
- pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
- name, id, PTR_ERR(btf));
- err = PTR_ERR(btf);
+ err = libbpf_get_error(btf);
+ if (err) {
+ pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
+ name, id, err);
goto err_out;
}
@@ -6117,6 +6209,12 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
if (str_is_empty(spec_str))
return -EINVAL;
+ if (prog->obj->gen_loader) {
+ pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+ prog - prog->obj->programs, relo->insn_off / 8,
+ local_name, spec_str, relo->kind);
+ return -ENOTSUP;
+ }
err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
if (err) {
pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
@@ -6272,8 +6370,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
if (targ_btf_path) {
obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
- if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
- err = PTR_ERR(obj->btf_vmlinux_override);
+ err = libbpf_get_error(obj->btf_vmlinux_override);
+ if (err) {
pr_warn("failed to parse target BTF: %d\n", err);
return err;
}
@@ -6368,19 +6466,34 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
switch (relo->type) {
case RELO_LD64:
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- insn[0].imm = obj->maps[relo->map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
+ insn[0].imm = relo->map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ }
break;
case RELO_DATA:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[1].imm = insn[0].imm + relo->sym_off;
- insn[0].imm = obj->maps[relo->map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+ insn[0].imm = relo->map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ }
break;
case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
if (ext->type == EXT_KCFG) {
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ if (obj->gen_loader) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+ insn[0].imm = obj->kconfig_map_idx;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ }
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
if (ext->ksym.type_id) { /* typed ksyms */
@@ -6399,11 +6512,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[0].imm = ext->ksym.kernel_btf_id;
break;
case RELO_SUBPROG_ADDR:
- insn[0].src_reg = BPF_PSEUDO_FUNC;
- /* will be handled as a follow up pass */
+ if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
+ pr_warn("prog '%s': relo #%d: bad insn\n",
+ prog->name, i);
+ return -EINVAL;
+ }
+ /* handled already */
break;
case RELO_CALL:
- /* will be handled as a follow up pass */
+ /* handled already */
break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
@@ -6494,7 +6611,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj,
/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
* supprot func/line info
*/
- if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
+ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
return 0;
/* only attempt func info relocation if main program's func_info
@@ -6572,6 +6689,30 @@ static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, si
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
}
+static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+ int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
+ struct reloc_desc *relos;
+ int i;
+
+ if (main_prog == subprog)
+ return 0;
+ relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+ sizeof(*relos) * subprog->nr_reloc);
+
+ for (i = main_prog->nr_reloc; i < new_cnt; i++)
+ relos[i].insn_idx += subprog->sub_insn_off;
+ /* After insn_idx adjustment the 'relos' array is still sorted
+ * by insn_idx and doesn't break bsearch.
+ */
+ main_prog->reloc_desc = relos;
+ main_prog->nr_reloc = new_cnt;
+ return 0;
+}
+
static int
bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
struct bpf_program *prog)
@@ -6592,6 +6733,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
continue;
relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type == RELO_EXTERN_FUNC)
+ /* kfunc relocations will be handled later
+ * in bpf_object__relocate_data()
+ */
+ continue;
if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
@@ -6666,6 +6812,10 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
main_prog->name, subprog->insns_cnt, subprog->name);
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
err = bpf_object__reloc_code(obj, main_prog, subprog);
if (err)
return err;
@@ -6795,11 +6945,25 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
return 0;
}
+static void
+bpf_object__free_relocs(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ int i;
+
+ /* free up relocation descriptors */
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ }
+}
+
static int
bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
{
struct bpf_program *prog;
- size_t i;
+ size_t i, j;
int err;
if (obj->btf_ext) {
@@ -6810,23 +6974,32 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* relocate data references first for all programs and sub-programs,
- * as they don't change relative to code locations, so subsequent
- * subprogram processing won't need to re-calculate any of them
+
+ /* Before relocating calls pre-process relocations and mark
+ * few ld_imm64 instructions that points to subprogs.
+ * Otherwise bpf_object__reloc_code() later would have to consider
+ * all ld_imm64 insns as relocation candidates. That would
+ * reduce relocation speed, since amount of find_prog_insn_relo()
+ * would increase and most of them will fail to find a relo.
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_object__relocate_data(obj, prog);
- if (err) {
- pr_warn("prog '%s': failed to relocate data references: %d\n",
- prog->name, err);
- return err;
+ for (j = 0; j < prog->nr_reloc; j++) {
+ struct reloc_desc *relo = &prog->reloc_desc[j];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+
+ /* mark the insn, so it's recognized by insn_is_pseudo_func() */
+ if (relo->type == RELO_SUBPROG_ADDR)
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
}
}
- /* now relocate subprogram calls and append used subprograms to main
+
+ /* relocate subprogram calls and append used subprograms to main
* programs; each copy of subprogram code needs to be relocated
* differently for each main program, because its code location might
- * have changed
+ * have changed.
+ * Append subprog relos to main programs to allow data relos to be
+ * processed after text is completely relocated.
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
@@ -6843,12 +7016,20 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* free up relocation descriptors */
+ /* Process data relos for main programs */
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- zfree(&prog->reloc_desc);
- prog->nr_reloc = 0;
+ if (prog_is_subprog(obj, prog))
+ continue;
+ err = bpf_object__relocate_data(obj, prog);
+ if (err) {
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
+ return err;
+ }
}
+ if (!obj->gen_loader)
+ bpf_object__free_relocs(obj);
return 0;
}
@@ -7037,6 +7218,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program
enum bpf_func_id func_id;
int i;
+ if (obj->gen_loader)
+ return 0;
+
for (i = 0; i < prog->insns_cnt; i++, insn++) {
if (!insn_is_helper_call(insn, &func_id))
continue;
@@ -7048,12 +7232,12 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program
switch (func_id) {
case BPF_FUNC_probe_read_kernel:
case BPF_FUNC_probe_read_user:
- if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
insn->imm = BPF_FUNC_probe_read;
break;
case BPF_FUNC_probe_read_kernel_str:
case BPF_FUNC_probe_read_user_str:
- if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
insn->imm = BPF_FUNC_probe_read_str;
break;
default:
@@ -7088,12 +7272,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.prog_type = prog->type;
/* old kernels might not support specifying expected_attach_type */
- if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
+ if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
prog->sec_def->is_exp_attach_type_optional)
load_attr.expected_attach_type = 0;
else
load_attr.expected_attach_type = prog->expected_attach_type;
- if (kernel_supports(FEAT_PROG_NAME))
+ if (kernel_supports(prog->obj, FEAT_PROG_NAME))
load_attr.name = prog->name;
load_attr.insns = insns;
load_attr.insn_cnt = insns_cnt;
@@ -7109,7 +7293,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
/* specify func_info/line_info only if kernel supports them */
btf_fd = bpf_object__btf_fd(prog->obj);
- if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
+ if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) {
load_attr.prog_btf_fd = btf_fd;
load_attr.func_info = prog->func_info;
load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -7121,6 +7305,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.log_level = prog->log_level;
load_attr.prog_flags = prog->prog_flags;
+ if (prog->obj->gen_loader) {
+ bpf_gen__prog_load(prog->obj->gen_loader, &load_attr,
+ prog - prog->obj->programs);
+ *pfd = -1;
+ return 0;
+ }
retry_load:
if (log_buf_size) {
log_buf = malloc(log_buf_size);
@@ -7139,7 +7329,7 @@ retry_load:
pr_debug("verifier log:\n%s", log_buf);
if (prog->obj->rodata_map_idx >= 0 &&
- kernel_supports(FEAT_PROG_BIND_MAP)) {
+ kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) {
struct bpf_map *rodata_map =
&prog->obj->maps[prog->obj->rodata_map_idx];
@@ -7198,6 +7388,38 @@ out:
return ret;
}
+static int bpf_program__record_externs(struct bpf_program *prog)
+{
+ struct bpf_object *obj = prog->obj;
+ int i;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct extern_desc *ext = &obj->externs[relo->sym_off];
+
+ switch (relo->type) {
+ case RELO_EXTERN_VAR:
+ if (ext->type != EXT_KSYM)
+ continue;
+ if (!ext->ksym.type_id) {
+ pr_warn("typeless ksym %s is not supported yet\n",
+ ext->name);
+ return -ENOTSUP;
+ }
+ bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR,
+ relo->insn_idx);
+ break;
+ case RELO_EXTERN_FUNC:
+ bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC,
+ relo->insn_idx);
+ break;
+ default:
+ continue;
+ }
+ }
+ return 0;
+}
+
static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
@@ -7206,7 +7428,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->obj->loaded) {
pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if ((prog->type == BPF_PROG_TYPE_TRACING ||
@@ -7216,7 +7438,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
if (err)
- return err;
+ return libbpf_err(err);
prog->attach_btf_obj_fd = btf_obj_fd;
prog->attach_btf_id = btf_type_id;
@@ -7226,13 +7448,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->preprocessor) {
pr_warn("Internal error: can't load program '%s'\n",
prog->name);
- return -LIBBPF_ERRNO__INTERNAL;
+ return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
}
prog->instances.fds = malloc(sizeof(int));
if (!prog->instances.fds) {
pr_warn("Not enough memory for BPF fds\n");
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
}
prog->instances.nr = 1;
prog->instances.fds[0] = -1;
@@ -7243,6 +7465,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
prog->name, prog->instances.nr);
}
+ if (prog->obj->gen_loader)
+ bpf_program__record_externs(prog);
err = load_program(prog, prog->insns, prog->insns_cnt,
license, kern_ver, &fd);
if (!err)
@@ -7289,7 +7513,7 @@ out:
pr_warn("failed to load program '%s'\n", prog->name);
zfree(&prog->insns);
prog->insns_cnt = 0;
- return err;
+ return libbpf_err(err);
}
static int
@@ -7319,6 +7543,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
if (err)
return err;
}
+ if (obj->gen_loader)
+ bpf_object__free_relocs(obj);
return 0;
}
@@ -7420,7 +7646,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
{
- return __bpf_object__open_xattr(attr, 0);
+ return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
}
struct bpf_object *bpf_object__open(const char *path)
@@ -7430,18 +7656,18 @@ struct bpf_object *bpf_object__open(const char *path)
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
- return bpf_object__open_xattr(&attr);
+ return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
}
struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
{
if (!path)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
pr_debug("loading %s\n", path);
- return __bpf_object__open(path, NULL, 0, opts);
+ return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
}
struct bpf_object *
@@ -7449,9 +7675,9 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
if (!obj_buf || obj_buf_sz == 0)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
- return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+ return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
}
struct bpf_object *
@@ -7466,9 +7692,9 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
/* returning NULL is wrong, but backwards-compatible */
if (!obj_buf || obj_buf_sz == 0)
- return NULL;
+ return errno = EINVAL, NULL;
- return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+ return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
}
int bpf_object__unload(struct bpf_object *obj)
@@ -7476,7 +7702,7 @@ int bpf_object__unload(struct bpf_object *obj)
size_t i;
if (!obj)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
for (i = 0; i < obj->nr_maps; i++) {
zclose(obj->maps[i].fd);
@@ -7497,11 +7723,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!kernel_supports(FEAT_GLOBAL_DATA)) {
+ if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
pr_warn("kernel doesn't support global data\n");
return -ENOTSUP;
}
- if (!kernel_supports(FEAT_ARRAY_MMAP))
+ if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
m->def.map_flags ^= BPF_F_MMAPABLE;
}
@@ -7699,6 +7925,12 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
continue;
+ if (obj->gen_loader) {
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = 0;
+ ext->ksym.kernel_btf_id = 0;
+ continue;
+ }
t = btf__type_by_id(obj->btf, ext->btf_id);
if (btf_is_var(t))
err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
@@ -7803,16 +8035,19 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
int err, i;
if (!attr)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
obj = attr->obj;
if (!obj)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (obj->loaded) {
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
+ if (obj->gen_loader)
+ bpf_gen__init(obj->gen_loader, attr->log_level);
+
err = bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -7823,6 +8058,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
err = err ? : bpf_object__load_progs(obj, attr->log_level);
+ if (obj->gen_loader) {
+ /* reset FDs */
+ btf__set_fd(obj->btf, -1);
+ for (i = 0; i < obj->nr_maps; i++)
+ obj->maps[i].fd = -1;
+ if (!err)
+ err = bpf_gen__finish(obj->gen_loader);
+ }
+
/* clean up module BTFs */
for (i = 0; i < obj->btf_module_cnt; i++) {
close(obj->btf_modules[i].fd);
@@ -7849,7 +8093,7 @@ out:
bpf_object__unload(obj);
pr_warn("failed to load object '%s'\n", obj->path);
- return err;
+ return libbpf_err(err);
}
int bpf_object__load(struct bpf_object *obj)
@@ -7921,28 +8165,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
instance, prog->name, prog->instances.nr);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("failed to pin program: %s\n", cp);
- return err;
+ return libbpf_err(err);
}
pr_debug("pinned program '%s'\n", path);
@@ -7956,22 +8200,23 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
instance, prog->name, prog->instances.nr);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
err = unlink(path);
if (err != 0)
- return -errno;
+ return libbpf_err(-errno);
+
pr_debug("unpinned program '%s'\n", path);
return 0;
@@ -7983,20 +8228,20 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr <= 0) {
pr_warn("no instances of prog %s to pin\n", prog->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr == 1) {
@@ -8040,7 +8285,7 @@ err_unpin:
rmdir(path);
- return err;
+ return libbpf_err(err);
}
int bpf_program__unpin(struct bpf_program *prog, const char *path)
@@ -8049,16 +8294,16 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
if (prog == NULL) {
pr_warn("invalid program pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr <= 0) {
pr_warn("no instances of prog %s to pin\n", prog->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (prog->instances.nr == 1) {
@@ -8072,9 +8317,9 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
if (len < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ return libbpf_err(-ENAMETOOLONG);
err = bpf_program__unpin_instance(prog, buf, i);
if (err)
@@ -8083,7 +8328,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
err = rmdir(path);
if (err)
- return -errno;
+ return libbpf_err(-errno);
return 0;
}
@@ -8095,14 +8340,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
if (map == NULL) {
pr_warn("invalid map pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->pin_path) {
if (path && strcmp(path, map->pin_path)) {
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
bpf_map__name(map), map->pin_path, path);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
} else if (map->pinned) {
pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
bpf_map__name(map), map->pin_path);
@@ -8112,10 +8357,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
if (!path) {
pr_warn("missing a path to pin map '%s' at\n",
bpf_map__name(map));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
} else if (map->pinned) {
pr_warn("map '%s' already pinned\n", bpf_map__name(map));
- return -EEXIST;
+ return libbpf_err(-EEXIST);
}
map->pin_path = strdup(path);
@@ -8127,11 +8372,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
err = make_parent_dir(map->pin_path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(map->pin_path);
if (err)
- return err;
+ return libbpf_err(err);
if (bpf_obj_pin(map->fd, map->pin_path)) {
err = -errno;
@@ -8146,7 +8391,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
out_err:
cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
pr_warn("failed to pin map: %s\n", cp);
- return err;
+ return libbpf_err(err);
}
int bpf_map__unpin(struct bpf_map *map, const char *path)
@@ -8155,29 +8400,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
if (map == NULL) {
pr_warn("invalid map pointer\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->pin_path) {
if (path && strcmp(path, map->pin_path)) {
pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
bpf_map__name(map), map->pin_path, path);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
path = map->pin_path;
} else if (!path) {
pr_warn("no path to unpin map '%s' from\n",
bpf_map__name(map));
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
err = unlink(path);
if (err != 0)
- return -errno;
+ return libbpf_err(-errno);
map->pinned = false;
pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
@@ -8192,7 +8437,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
if (path) {
new = strdup(path);
if (!new)
- return -errno;
+ return libbpf_err(-errno);
}
free(map->pin_path);
@@ -8226,11 +8471,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
if (!obj->loaded) {
pr_warn("object not yet loaded; load it first\n");
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
bpf_object__for_each_map(map, obj) {
@@ -8270,7 +8515,7 @@ err_unpin_maps:
bpf_map__unpin(map, NULL);
}
- return err;
+ return libbpf_err(err);
}
int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
@@ -8279,7 +8524,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
bpf_object__for_each_map(map, obj) {
char *pin_path = NULL;
@@ -8291,9 +8536,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
len = snprintf(buf, PATH_MAX, "%s/%s", path,
bpf_map__name(map));
if (len < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ return libbpf_err(-ENAMETOOLONG);
sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
@@ -8302,7 +8547,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
err = bpf_map__unpin(map, pin_path);
if (err)
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -8314,11 +8559,11 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
if (!obj->loaded) {
pr_warn("object not yet loaded; load it first\n");
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
bpf_object__for_each_program(prog, obj) {
@@ -8357,7 +8602,7 @@ err_unpin_programs:
bpf_program__unpin(prog, buf);
}
- return err;
+ return libbpf_err(err);
}
int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
@@ -8366,7 +8611,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
int err;
if (!obj)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
bpf_object__for_each_program(prog, obj) {
char buf[PATH_MAX];
@@ -8375,13 +8620,13 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
len = snprintf(buf, PATH_MAX, "%s/%s", path,
prog->pin_name);
if (len < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ return libbpf_err(-ENAMETOOLONG);
err = bpf_program__unpin(prog, buf);
if (err)
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -8393,12 +8638,12 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
err = bpf_object__pin_maps(obj, path);
if (err)
- return err;
+ return libbpf_err(err);
err = bpf_object__pin_programs(obj, path);
if (err) {
bpf_object__unpin_maps(obj, path);
- return err;
+ return libbpf_err(err);
}
return 0;
@@ -8448,6 +8693,7 @@ void bpf_object__close(struct bpf_object *obj)
if (obj->clear_priv)
obj->clear_priv(obj, obj->priv);
+ bpf_gen__free(obj->gen_loader);
bpf_object__elf_finish(obj);
bpf_object__unload(obj);
btf__free(obj->btf);
@@ -8494,7 +8740,7 @@ bpf_object__next(struct bpf_object *prev)
const char *bpf_object__name(const struct bpf_object *obj)
{
- return obj ? obj->name : ERR_PTR(-EINVAL);
+ return obj ? obj->name : libbpf_err_ptr(-EINVAL);
}
unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -8515,7 +8761,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
{
if (obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
obj->kern_version = kern_version;
@@ -8535,7 +8781,23 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
void *bpf_object__priv(const struct bpf_object *obj)
{
- return obj ? obj->priv : ERR_PTR(-EINVAL);
+ return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
+}
+
+int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
+{
+ struct bpf_gen *gen;
+
+ if (!opts)
+ return -EFAULT;
+ if (!OPTS_VALID(opts, gen_loader_opts))
+ return -EINVAL;
+ gen = calloc(sizeof(*gen), 1);
+ if (!gen)
+ return -ENOMEM;
+ gen->opts = opts;
+ obj->gen_loader = gen;
+ return 0;
}
static struct bpf_program *
@@ -8555,7 +8817,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
if (p->obj != obj) {
pr_warn("error: program handler doesn't match object\n");
- return NULL;
+ return errno = EINVAL, NULL;
}
idx = (p - obj->programs) + (forward ? 1 : -1);
@@ -8601,7 +8863,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
void *bpf_program__priv(const struct bpf_program *prog)
{
- return prog ? prog->priv : ERR_PTR(-EINVAL);
+ return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
}
void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
@@ -8628,7 +8890,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
title = strdup(title);
if (!title) {
pr_warn("failed to strdup program title\n");
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
}
@@ -8643,7 +8905,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
{
if (prog->obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
prog->load = autoload;
return 0;
@@ -8665,17 +8927,17 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
int *instances_fds;
if (nr_instances <= 0 || !prep)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (prog->instances.nr > 0 || prog->instances.fds) {
pr_warn("Can't set pre-processor after loading\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
instances_fds = malloc(sizeof(int) * nr_instances);
if (!instances_fds) {
pr_warn("alloc memory failed for fds\n");
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
}
/* fill all fd with -1 */
@@ -8692,19 +8954,19 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
int fd;
if (!prog)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (n >= prog->instances.nr || n < 0) {
pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
n, prog->name, prog->instances.nr);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
fd = prog->instances.fds[n];
if (fd < 0) {
pr_warn("%dth instance of program '%s' is invalid\n",
n, prog->name);
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
return fd;
@@ -8730,7 +8992,7 @@ static bool bpf_program__is_type(const struct bpf_program *prog,
int bpf_program__set_##NAME(struct bpf_program *prog) \
{ \
if (!prog) \
- return -EINVAL; \
+ return libbpf_err(-EINVAL); \
bpf_program__set_type(prog, TYPE); \
return 0; \
} \
@@ -8820,7 +9082,10 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
- BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
+ BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
+ BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_SK_REUSEPORT_SELECT),
SEC_DEF("kprobe/", KPROBE,
.attach_fn = attach_kprobe),
BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
@@ -8884,6 +9149,8 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
.attach_fn = attach_iter),
+ SEC_DEF("syscall", SYSCALL,
+ .is_sleepable = true),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
@@ -9015,7 +9282,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
char *type_names;
if (!name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
sec_def = find_sec_def(name);
if (sec_def) {
@@ -9031,7 +9298,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
free(type_names);
}
- return -ESRCH;
+ return libbpf_err(-ESRCH);
}
static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
@@ -9173,6 +9440,28 @@ invalid_prog:
#define BTF_ITER_PREFIX "bpf_iter_"
#define BTF_MAX_NAME_SIZE 128
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+ const char **prefix, int *kind)
+{
+ switch (attach_type) {
+ case BPF_TRACE_RAW_TP:
+ *prefix = BTF_TRACE_PREFIX;
+ *kind = BTF_KIND_TYPEDEF;
+ break;
+ case BPF_LSM_MAC:
+ *prefix = BTF_LSM_PREFIX;
+ *kind = BTF_KIND_FUNC;
+ break;
+ case BPF_TRACE_ITER:
+ *prefix = BTF_ITER_PREFIX;
+ *kind = BTF_KIND_FUNC;
+ break;
+ default:
+ *prefix = "";
+ *kind = BTF_KIND_FUNC;
+ }
+}
+
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
const char *name, __u32 kind)
{
@@ -9193,21 +9482,11 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
static inline int find_attach_btf_id(struct btf *btf, const char *name,
enum bpf_attach_type attach_type)
{
- int err;
+ const char *prefix;
+ int kind;
- if (attach_type == BPF_TRACE_RAW_TP)
- err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
- BTF_KIND_TYPEDEF);
- else if (attach_type == BPF_LSM_MAC)
- err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
- BTF_KIND_FUNC);
- else if (attach_type == BPF_TRACE_ITER)
- err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
- BTF_KIND_FUNC);
- else
- err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
-
- return err;
+ btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
+ return find_btf_by_prefix_kind(btf, prefix, name, kind);
}
int libbpf_find_vmlinux_btf_id(const char *name,
@@ -9217,9 +9496,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
int err;
btf = libbpf_find_kernel_btf();
- if (IS_ERR(btf)) {
+ err = libbpf_get_error(btf);
+ if (err) {
pr_warn("vmlinux BTF is not found\n");
- return -EINVAL;
+ return libbpf_err(err);
}
err = find_attach_btf_id(btf, name, attach_type);
@@ -9227,7 +9507,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
pr_warn("%s is not found in vmlinux BTF\n", name);
btf__free(btf);
- return err;
+ return libbpf_err(err);
}
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -9238,10 +9518,11 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
int err = -EINVAL;
info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
- if (IS_ERR_OR_NULL(info_linear)) {
+ err = libbpf_get_error(info_linear);
+ if (err) {
pr_warn("failed get_prog_info_linear for FD %d\n",
attach_prog_fd);
- return -EINVAL;
+ return err;
}
info = &info_linear->info;
if (!info->btf_id) {
@@ -9306,7 +9587,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
__u32 attach_prog_fd = prog->attach_prog_fd;
const char *name = prog->sec_name, *attach_name;
const struct bpf_sec_def *sec = NULL;
- int i, err;
+ int i, err = 0;
if (!name)
return -EINVAL;
@@ -9341,7 +9622,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
}
/* kernel/module BTF ID */
- err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+ if (prog->obj->gen_loader) {
+ bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
+ *btf_obj_fd = 0;
+ *btf_type_id = 1;
+ } else {
+ err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+ }
if (err) {
pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
return err;
@@ -9356,13 +9643,13 @@ int libbpf_attach_type_by_name(const char *name,
int i;
if (!name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
if (strncmp(name, section_defs[i].sec, section_defs[i].len))
continue;
if (!section_defs[i].is_attachable)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
*attach_type = section_defs[i].expected_attach_type;
return 0;
}
@@ -9373,17 +9660,17 @@ int libbpf_attach_type_by_name(const char *name,
free(type_names);
}
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
int bpf_map__fd(const struct bpf_map *map)
{
- return map ? map->fd : -EINVAL;
+ return map ? map->fd : libbpf_err(-EINVAL);
}
const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
{
- return map ? &map->def : ERR_PTR(-EINVAL);
+ return map ? &map->def : libbpf_err_ptr(-EINVAL);
}
const char *bpf_map__name(const struct bpf_map *map)
@@ -9399,7 +9686,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.type = type;
return 0;
}
@@ -9412,7 +9699,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.map_flags = flags;
return 0;
}
@@ -9425,7 +9712,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->numa_node = numa_node;
return 0;
}
@@ -9438,7 +9725,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.key_size = size;
return 0;
}
@@ -9451,7 +9738,7 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->def.value_size = size;
return 0;
}
@@ -9470,7 +9757,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv)
{
if (!map)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (map->priv) {
if (map->clear_priv)
@@ -9484,7 +9771,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
void *bpf_map__priv(const struct bpf_map *map)
{
- return map ? map->priv : ERR_PTR(-EINVAL);
+ return map ? map->priv : libbpf_err_ptr(-EINVAL);
}
int bpf_map__set_initial_value(struct bpf_map *map,
@@ -9492,12 +9779,20 @@ int bpf_map__set_initial_value(struct bpf_map *map,
{
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
size != map->def.value_size || map->fd >= 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
memcpy(map->mmaped, data, size);
return 0;
}
+const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+{
+ if (!map->mmaped)
+ return NULL;
+ *psize = map->def.value_size;
+ return map->mmaped;
+}
+
bool bpf_map__is_offload_neutral(const struct bpf_map *map)
{
return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
@@ -9516,7 +9811,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
{
if (map->fd >= 0)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
map->map_ifindex = ifindex;
return 0;
}
@@ -9525,11 +9820,11 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
{
if (!bpf_map_type__is_map_in_map(map->def.type)) {
pr_warn("error: unsupported map type\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
if (map->inner_map_fd != -1) {
pr_warn("error: inner_map_fd already specified\n");
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
zfree(&map->inner_map);
map->inner_map_fd = fd;
@@ -9543,7 +9838,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
struct bpf_map *s, *e;
if (!obj || !obj->maps)
- return NULL;
+ return errno = EINVAL, NULL;
s = obj->maps;
e = obj->maps + obj->nr_maps;
@@ -9551,7 +9846,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
if ((m < s) || (m >= e)) {
pr_warn("error in %s: map handler doesn't belong to object\n",
__func__);
- return NULL;
+ return errno = EINVAL, NULL;
}
idx = (m - obj->maps) + i;
@@ -9590,7 +9885,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
if (pos->name && !strcmp(pos->name, name))
return pos;
}
- return NULL;
+ return errno = ENOENT, NULL;
}
int
@@ -9602,12 +9897,23 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
struct bpf_map *
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
{
- return ERR_PTR(-ENOTSUP);
+ return libbpf_err_ptr(-ENOTSUP);
}
long libbpf_get_error(const void *ptr)
{
- return PTR_ERR_OR_ZERO(ptr);
+ if (!IS_ERR_OR_NULL(ptr))
+ return 0;
+
+ if (IS_ERR(ptr))
+ errno = -PTR_ERR(ptr);
+
+ /* If ptr == NULL, then errno should be already set by the failing
+ * API, because libbpf never returns NULL on success and it now always
+ * sets errno on error. So no extra errno handling for ptr == NULL
+ * case.
+ */
+ return -errno;
}
int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -9633,16 +9939,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
int err;
if (!attr)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (!attr->file)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
open_attr.file = attr->file;
open_attr.prog_type = attr->prog_type;
obj = bpf_object__open_xattr(&open_attr);
- if (IS_ERR_OR_NULL(obj))
- return -ENOENT;
+ err = libbpf_get_error(obj);
+ if (err)
+ return libbpf_err(-ENOENT);
bpf_object__for_each_program(prog, obj) {
enum bpf_attach_type attach_type = attr->expected_attach_type;
@@ -9662,7 +9969,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
* didn't provide a fallback type, too bad...
*/
bpf_object__close(obj);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
prog->prog_ifindex = attr->ifindex;
@@ -9680,13 +9987,13 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
if (!first_prog) {
pr_warn("object file doesn't contain bpf program\n");
bpf_object__close(obj);
- return -ENOENT;
+ return libbpf_err(-ENOENT);
}
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
- return err;
+ return libbpf_err(err);
}
*pobj = obj;
@@ -9705,7 +10012,10 @@ struct bpf_link {
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
- return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+ int ret;
+
+ ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+ return libbpf_err_errno(ret);
}
/* Release "ownership" of underlying BPF resource (typically, BPF program
@@ -9738,7 +10048,7 @@ int bpf_link__destroy(struct bpf_link *link)
free(link->pin_path);
free(link);
- return err;
+ return libbpf_err(err);
}
int bpf_link__fd(const struct bpf_link *link)
@@ -9753,7 +10063,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link)
static int bpf_link__detach_fd(struct bpf_link *link)
{
- return close(link->fd);
+ return libbpf_err_errno(close(link->fd));
}
struct bpf_link *bpf_link__open(const char *path)
@@ -9765,13 +10075,13 @@ struct bpf_link *bpf_link__open(const char *path)
if (fd < 0) {
fd = -errno;
pr_warn("failed to open link at %s: %d\n", path, fd);
- return ERR_PTR(fd);
+ return libbpf_err_ptr(fd);
}
link = calloc(1, sizeof(*link));
if (!link) {
close(fd);
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
link->detach = &bpf_link__detach_fd;
link->fd = fd;
@@ -9779,7 +10089,7 @@ struct bpf_link *bpf_link__open(const char *path)
link->pin_path = strdup(path);
if (!link->pin_path) {
bpf_link__destroy(link);
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
}
return link;
@@ -9795,22 +10105,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path)
int err;
if (link->pin_path)
- return -EBUSY;
+ return libbpf_err(-EBUSY);
err = make_parent_dir(path);
if (err)
- return err;
+ return libbpf_err(err);
err = check_path(path);
if (err)
- return err;
+ return libbpf_err(err);
link->pin_path = strdup(path);
if (!link->pin_path)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
if (bpf_obj_pin(link->fd, link->pin_path)) {
err = -errno;
zfree(&link->pin_path);
- return err;
+ return libbpf_err(err);
}
pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
@@ -9822,11 +10132,11 @@ int bpf_link__unpin(struct bpf_link *link)
int err;
if (!link->pin_path)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = unlink(link->pin_path);
if (err != 0)
- return -errno;
+ return libbpf_err_errno(err);
pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
zfree(&link->pin_path);
@@ -9842,11 +10152,10 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
err = -errno;
close(link->fd);
- return err;
+ return libbpf_err(err);
}
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
- int pfd)
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
{
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
@@ -9855,18 +10164,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (pfd < 0) {
pr_warn("prog '%s': invalid perf event FD %d\n",
prog->name, pfd);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_perf_event;
link->fd = pfd;
@@ -9878,14 +10187,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (err == -EPROTO)
pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
prog->name, pfd);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
free(link);
pr_warn("prog '%s': failed to enable pfd %d: %s\n",
prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10009,16 +10318,16 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10051,17 +10360,17 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10129,16 +10438,16 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
prog->name, tp_category, tp_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link = bpf_program__attach_perf_event(prog, pfd);
- if (IS_ERR(link)) {
+ err = libbpf_get_error(link);
+ if (err) {
close(pfd);
- err = PTR_ERR(link);
pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
prog->name, tp_category, tp_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return link;
+ return libbpf_err_ptr(err);
}
return link;
}
@@ -10151,20 +10460,19 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
sec_name = strdup(prog->sec_name);
if (!sec_name)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
/* extract "tp/<category>/<name>" */
tp_cat = sec_name + sec->len;
tp_name = strchr(tp_cat, '/');
if (!tp_name) {
- link = ERR_PTR(-EINVAL);
- goto out;
+ free(sec_name);
+ return libbpf_err_ptr(-EINVAL);
}
*tp_name = '\0';
tp_name++;
link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
-out:
free(sec_name);
return link;
}
@@ -10179,12 +10487,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
@@ -10193,7 +10501,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
free(link);
pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link->fd = pfd;
return link;
@@ -10217,12 +10525,12 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
@@ -10231,7 +10539,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
free(link);
pr_warn("prog '%s': failed to attach: %s\n",
prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
- return ERR_PTR(pfd);
+ return libbpf_err_ptr(pfd);
}
link->fd = pfd;
return (struct bpf_link *)link;
@@ -10259,12 +10567,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
return bpf_program__attach_lsm(prog);
}
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
- struct bpf_program *prog)
-{
- return bpf_program__attach_iter(prog, NULL);
-}
-
static struct bpf_link *
bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
const char *target_name)
@@ -10279,12 +10581,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__get_expected_attach_type(prog);
@@ -10295,7 +10597,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
pr_warn("prog '%s': failed to attach to %s: %s\n",
prog->name, target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
- return ERR_PTR(link_fd);
+ return libbpf_err_ptr(link_fd);
}
link->fd = link_fd;
return link;
@@ -10328,19 +10630,19 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
if (!!target_fd != !!attach_func_name) {
pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
if (prog->type != BPF_PROG_TYPE_EXT) {
pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
if (target_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
if (btf_id < 0)
- return ERR_PTR(btf_id);
+ return libbpf_err_ptr(btf_id);
return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
} else {
@@ -10362,7 +10664,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
__u32 target_fd = 0;
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
@@ -10370,12 +10672,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("prog '%s': can't attach before loaded\n", prog->name);
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
@@ -10385,19 +10687,25 @@ bpf_program__attach_iter(struct bpf_program *prog,
free(link);
pr_warn("prog '%s': failed to attach to iterator: %s\n",
prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
- return ERR_PTR(link_fd);
+ return libbpf_err_ptr(link_fd);
}
link->fd = link_fd;
return link;
}
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_iter(prog, NULL);
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
- return ERR_PTR(-ESRCH);
+ return libbpf_err_ptr(-ESRCH);
return sec_def->attach_fn(sec_def, prog);
}
@@ -10420,11 +10728,11 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
int err;
if (!bpf_map__is_struct_ops(map) || map->fd == -1)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
link = calloc(1, sizeof(*link));
if (!link)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
st_ops = map->st_ops;
for (i = 0; i < btf_vlen(st_ops->type); i++) {
@@ -10444,7 +10752,7 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
if (err) {
err = -errno;
free(link);
- return ERR_PTR(err);
+ return libbpf_err_ptr(err);
}
link->detach = bpf_link__detach_struct_ops;
@@ -10498,7 +10806,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
}
ring_buffer_write_tail(header, data_tail);
- return ret;
+ return libbpf_err(ret);
}
struct perf_buffer;
@@ -10651,7 +10959,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
p.lost_cb = opts ? opts->lost_cb : NULL;
p.ctx = opts ? opts->ctx : NULL;
- return __perf_buffer__new(map_fd, page_cnt, &p);
+ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
struct perf_buffer *
@@ -10667,7 +10975,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
p.cpus = opts->cpus;
p.map_keys = opts->map_keys;
- return __perf_buffer__new(map_fd, page_cnt, &p);
+ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
}
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
@@ -10888,16 +11196,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
int i, cnt, err;
cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+ if (cnt < 0)
+ return libbpf_err_errno(cnt);
+
for (i = 0; i < cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
pr_warn("error while processing records: %d\n", err);
- return err;
+ return libbpf_err(err);
}
}
- return cnt < 0 ? -errno : cnt;
+ return cnt;
}
/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
@@ -10918,11 +11229,11 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
struct perf_cpu_buf *cpu_buf;
if (buf_idx >= pb->cpu_cnt)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
cpu_buf = pb->cpu_bufs[buf_idx];
if (!cpu_buf)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
return cpu_buf->fd;
}
@@ -10940,11 +11251,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
struct perf_cpu_buf *cpu_buf;
if (buf_idx >= pb->cpu_cnt)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
cpu_buf = pb->cpu_bufs[buf_idx];
if (!cpu_buf)
- return -ENOENT;
+ return libbpf_err(-ENOENT);
return perf_buffer__process_records(pb, cpu_buf);
}
@@ -10962,7 +11273,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
err = perf_buffer__process_records(pb, cpu_buf);
if (err) {
pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
- return err;
+ return libbpf_err(err);
}
}
return 0;
@@ -11074,13 +11385,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
void *ptr;
if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
- return ERR_PTR(-EINVAL);
+ return libbpf_err_ptr(-EINVAL);
/* step 1: get array dimensions */
err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
if (err) {
pr_debug("can't get prog info: %s", strerror(errno));
- return ERR_PTR(-EFAULT);
+ return libbpf_err_ptr(-EFAULT);
}
/* step 2: calculate total size of all arrays */
@@ -11112,7 +11423,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
data_len = roundup(data_len, sizeof(__u64));
info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
if (!info_linear)
- return ERR_PTR(-ENOMEM);
+ return libbpf_err_ptr(-ENOMEM);
/* step 4: fill data to info_linear->info */
info_linear->arrays = arrays;
@@ -11144,7 +11455,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
if (err) {
pr_debug("can't get prog info: %s", strerror(errno));
free(info_linear);
- return ERR_PTR(-EFAULT);
+ return libbpf_err_ptr(-EFAULT);
}
/* step 6: verify the data */
@@ -11223,26 +11534,26 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
int btf_obj_fd = 0, btf_id = 0, err;
if (!prog || attach_prog_fd < 0 || !attach_func_name)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (prog->obj->loaded)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (attach_prog_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
if (btf_id < 0)
- return btf_id;
+ return libbpf_err(btf_id);
} else {
/* load btf_vmlinux, if not yet */
err = bpf_object__load_vmlinux_btf(prog->obj, true);
if (err)
- return err;
+ return libbpf_err(err);
err = find_kernel_btf_id(prog->obj, attach_func_name,
prog->expected_attach_type,
&btf_obj_fd, &btf_id);
if (err)
- return err;
+ return libbpf_err(err);
}
prog->attach_btf_id = btf_id;
@@ -11341,7 +11652,7 @@ int libbpf_num_possible_cpus(void)
err = parse_cpu_mask_file(fcpu, &mask, &n);
if (err)
- return err;
+ return libbpf_err(err);
tmp_cpus = 0;
for (i = 0; i < n; i++) {
@@ -11361,7 +11672,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
.object_name = s->name,
);
struct bpf_object *obj;
- int i;
+ int i, err;
/* Attempt to preserve opts->object_name, unless overriden by user
* explicitly. Overwriting object name for skeletons is discouraged,
@@ -11376,10 +11687,11 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
}
obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
- if (IS_ERR(obj)) {
- pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
- s->name, PTR_ERR(obj));
- return PTR_ERR(obj);
+ err = libbpf_get_error(obj);
+ if (err) {
+ pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
+ s->name, err);
+ return libbpf_err(err);
}
*s->obj = obj;
@@ -11392,7 +11704,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
*map = bpf_object__find_map_by_name(obj, name);
if (!*map) {
pr_warn("failed to find skeleton map '%s'\n", name);
- return -ESRCH;
+ return libbpf_err(-ESRCH);
}
/* externs shouldn't be pre-setup from user code */
@@ -11407,7 +11719,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
*prog = bpf_object__find_program_by_name(obj, name);
if (!*prog) {
pr_warn("failed to find skeleton program '%s'\n", name);
- return -ESRCH;
+ return libbpf_err(-ESRCH);
}
}
@@ -11421,7 +11733,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
err = bpf_object__load(*s->obj);
if (err) {
pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
- return err;
+ return libbpf_err(err);
}
for (i = 0; i < s->map_cnt; i++) {
@@ -11460,7 +11772,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
*mmaped = NULL;
pr_warn("failed to re-mmap() map '%s': %d\n",
bpf_map__name(map), err);
- return err;
+ return libbpf_err(err);
}
}
@@ -11469,7 +11781,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
{
- int i;
+ int i, err;
for (i = 0; i < s->prog_cnt; i++) {
struct bpf_program *prog = *s->progs[i].prog;
@@ -11484,10 +11796,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
continue;
*link = sec_def->attach_fn(sec_def, prog);
- if (IS_ERR(*link)) {
- pr_warn("failed to auto-attach program '%s': %ld\n",
- bpf_program__name(prog), PTR_ERR(*link));
- return PTR_ERR(*link);
+ err = libbpf_get_error(*link);
+ if (err) {
+ pr_warn("failed to auto-attach program '%s': %d\n",
+ bpf_program__name(prog), err);
+ return libbpf_err(err);
}
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index bec4e6a6e31d..6e61342ba56c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -18,6 +18,7 @@
#include <linux/bpf.h>
#include "libbpf_common.h"
+#include "libbpf_legacy.h"
#ifdef __cplusplus
extern "C" {
@@ -471,6 +472,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
+LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
@@ -498,6 +500,7 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
+/* XDP related API */
struct xdp_link_info {
__u32 prog_id;
__u32 drv_prog_id;
@@ -520,6 +523,49 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+/* TC related API */
+enum bpf_tc_attach_point {
+ BPF_TC_INGRESS = 1 << 0,
+ BPF_TC_EGRESS = 1 << 1,
+ BPF_TC_CUSTOM = 1 << 2,
+};
+
+#define BPF_TC_PARENT(a, b) \
+ ((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU))
+
+enum bpf_tc_flags {
+ BPF_TC_F_REPLACE = 1 << 0,
+};
+
+struct bpf_tc_hook {
+ size_t sz;
+ int ifindex;
+ enum bpf_tc_attach_point attach_point;
+ __u32 parent;
+ size_t :0;
+};
+#define bpf_tc_hook__last_field parent
+
+struct bpf_tc_opts {
+ size_t sz;
+ int prog_fd;
+ __u32 flags;
+ __u32 prog_id;
+ __u32 handle;
+ __u32 priority;
+ size_t :0;
+};
+#define bpf_tc_opts__last_field priority
+
+LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook,
+ struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
+ struct bpf_tc_opts *opts);
+
/* Ring buffer APIs */
struct ring_buffer;
@@ -756,6 +802,18 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
+struct gen_loader_opts {
+ size_t sz; /* size of this struct, for forward/backward compatiblity */
+ const char *data;
+ const char *insns;
+ __u32 data_sz;
+ __u32 insns_sz;
+};
+
+#define gen_loader_opts__last_field insns_sz
+LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
+ struct gen_loader_opts *opts);
+
enum libbpf_tristate {
TRI_NO = 0,
TRI_YES = 1,
@@ -768,10 +826,18 @@ struct bpf_linker_opts {
};
#define bpf_linker_opts__last_field sz
+struct bpf_linker_file_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+};
+#define bpf_linker_file_opts__last_field sz
+
struct bpf_linker;
LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
-LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker,
+ const char *filename,
+ const struct bpf_linker_file_opts *opts);
LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b9b29baf1df8..944c99d1ded3 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -361,4 +361,17 @@ LIBBPF_0.4.0 {
bpf_linker__new;
bpf_map__inner_map;
bpf_object__set_kversion;
+ bpf_tc_attach;
+ bpf_tc_detach;
+ bpf_tc_hook_create;
+ bpf_tc_hook_destroy;
+ bpf_tc_query;
} LIBBPF_0.3.0;
+
+LIBBPF_0.5.0 {
+ global:
+ bpf_map__initial_value;
+ bpf_map_lookup_and_delete_elem_flags;
+ bpf_object__gen_loader;
+ libbpf_set_strict_mode;
+} LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index 0afb51f7a919..96f67a772a1b 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -12,6 +12,7 @@
#include <string.h>
#include "libbpf.h"
+#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -39,7 +40,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
int libbpf_strerror(int err, char *buf, size_t size)
{
if (!buf || !size)
- return -1;
+ return libbpf_err(-EINVAL);
err = err > 0 ? err : -err;
@@ -48,7 +49,7 @@ int libbpf_strerror(int err, char *buf, size_t size)
ret = strerror_r(err, buf, size);
buf[size - 1] = '\0';
- return ret;
+ return libbpf_err_errno(ret);
}
if (err < __LIBBPF_ERRNO__END) {
@@ -62,5 +63,5 @@ int libbpf_strerror(int err, char *buf, size_t size)
snprintf(buf, size, "Unknown libbpf error %d", err);
buf[size - 1] = '\0';
- return -1;
+ return libbpf_err(-ENOENT);
}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index ee426226928f..016ca7cb4f8a 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -11,6 +11,9 @@
#include <stdlib.h>
#include <limits.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "libbpf_legacy.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -28,6 +31,12 @@
#ifndef R_BPF_64_64
#define R_BPF_64_64 1
#endif
+#ifndef R_BPF_64_ABS64
+#define R_BPF_64_ABS64 2
+#endif
+#ifndef R_BPF_64_ABS32
+#define R_BPF_64_ABS32 3
+#endif
#ifndef R_BPF_64_32
#define R_BPF_64_32 10
#endif
@@ -41,6 +50,11 @@
#define ELF_C_READ_MMAP ELF_C_READ
#endif
+/* Older libelf all end up in this expression, for both 32 and 64 bit */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -258,6 +272,8 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+ const char **prefix, int *kind);
struct btf_ext_info {
/*
@@ -428,4 +444,54 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
+extern enum libbpf_strict_mode libbpf_mode;
+
+/* handle direct returned errors */
+static inline int libbpf_err(int ret)
+{
+ if (ret < 0)
+ errno = -ret;
+ return ret;
+}
+
+/* handle errno-based (e.g., syscall or libc) errors according to libbpf's
+ * strict mode settings
+ */
+static inline int libbpf_err_errno(int ret)
+{
+ if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
+ /* errno is already assumed to be set on error */
+ return ret < 0 ? -errno : ret;
+
+ /* legacy: on error return -1 directly and don't touch errno */
+ return ret;
+}
+
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libbpf_err_ptr(int err)
+{
+ /* set errno on error, this doesn't break anything */
+ errno = -err;
+
+ if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+ return NULL;
+
+ /* legacy: encode err as ptr */
+ return ERR_PTR(err);
+}
+
+/* handle pointer-returning APIs' error handling */
+static inline void *libbpf_ptr(void *ret)
+{
+ /* set errno on error, this doesn't break anything */
+ if (IS_ERR(ret))
+ errno = -PTR_ERR(ret);
+
+ if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+ return IS_ERR(ret) ? NULL : ret;
+
+ /* legacy: pass-through original pointer */
+ return ret;
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
new file mode 100644
index 000000000000..df0d03dcffab
--- /dev/null
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0])
+ *
+ * [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY
+ *
+ * Copyright (C) 2021 Facebook
+ */
+#ifndef __LIBBPF_LEGACY_BPF_H
+#define __LIBBPF_LEGACY_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum libbpf_strict_mode {
+ /* Turn on all supported strict features of libbpf to simulate libbpf
+ * v1.0 behavior.
+ * This will be the default behavior in libbpf v1.0.
+ */
+ LIBBPF_STRICT_ALL = 0xffffffff,
+
+ /*
+ * Disable any libbpf 1.0 behaviors. This is the default before libbpf
+ * v1.0. It won't be supported anymore in v1.0, please update your
+ * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
+ */
+ LIBBPF_STRICT_NONE = 0x00,
+ /*
+ * Return NULL pointers on error, not ERR_PTR(err).
+ * Additionally, libbpf also always sets errno to corresponding Exx
+ * (positive) error code.
+ */
+ LIBBPF_STRICT_CLEAN_PTRS = 0x01,
+ /*
+ * Return actual error codes from low-level APIs directly, not just -1.
+ * Additionally, libbpf also always sets errno to corresponding Exx
+ * (positive) error code.
+ */
+ LIBBPF_STRICT_DIRECT_ERRS = 0x02,
+
+ __LIBBPF_STRICT_LAST,
+};
+
+LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LEGACY_BPF_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 9de084b1c699..10911a8cad0f 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -158,7 +158,9 @@ struct bpf_linker {
static int init_output_elf(struct bpf_linker *linker, const char *file);
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts,
+ struct src_obj *obj);
static int linker_sanity_check_elf(struct src_obj *obj);
static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
@@ -218,16 +220,16 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
int err;
if (!OPTS_VALID(opts, bpf_linker_opts))
- return NULL;
+ return errno = EINVAL, NULL;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_warn_elf("libelf initialization failed");
- return NULL;
+ return errno = EINVAL, NULL;
}
linker = calloc(1, sizeof(*linker));
if (!linker)
- return NULL;
+ return errno = ENOMEM, NULL;
linker->fd = -1;
@@ -239,7 +241,7 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
err_out:
bpf_linker__free(linker);
- return NULL;
+ return errno = -err, NULL;
}
static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
@@ -435,15 +437,19 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
return 0;
}
-int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts)
{
struct src_obj obj = {};
int err = 0;
+ if (!OPTS_VALID(opts, bpf_linker_file_opts))
+ return libbpf_err(-EINVAL);
+
if (!linker->elf)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
- err = err ?: linker_load_obj_file(linker, filename, &obj);
+ err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
err = err ?: linker_append_sec_data(linker, &obj);
err = err ?: linker_append_elf_syms(linker, &obj);
err = err ?: linker_append_elf_relos(linker, &obj);
@@ -461,7 +467,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
if (obj.fd >= 0)
close(obj.fd);
- return err;
+ return libbpf_err(err);
}
static bool is_dwarf_sec_name(const char *name)
@@ -529,7 +535,9 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
return sec;
}
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj)
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+ const struct bpf_linker_file_opts *opts,
+ struct src_obj *obj)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
const int host_endianness = ELFDATA2LSB;
@@ -884,7 +892,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se
size_t sym_idx = ELF64_R_SYM(relo->r_info);
size_t sym_type = ELF64_R_TYPE(relo->r_info);
- if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
+ sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
i, sec->sec_idx, sym_type, obj->filename);
return -EINVAL;
@@ -1780,7 +1789,7 @@ static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
/* libelf doesn't provide setters for ST_VISIBILITY,
* but it is stored in the lower 2 bits of st_other
*/
- sym->st_other &= 0x03;
+ sym->st_other &= ~0x03;
sym->st_other |= sym_vis;
}
@@ -2539,11 +2548,11 @@ int bpf_linker__finalize(struct bpf_linker *linker)
int err, i;
if (!linker->elf)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = finalize_btf(linker);
if (err)
- return err;
+ return libbpf_err(err);
/* Finalize strings */
strs_sz = strset__data_size(linker->strtab_strs);
@@ -2575,14 +2584,14 @@ int bpf_linker__finalize(struct bpf_linker *linker)
if (elf_update(linker->elf, ELF_C_NULL) < 0) {
err = -errno;
pr_warn_elf("failed to finalize ELF layout");
- return err;
+ return libbpf_err(err);
}
/* Write out final ELF contents */
if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
err = -errno;
pr_warn_elf("failed to write ELF contents");
- return err;
+ return libbpf_err(err);
}
elf_end(linker->elf);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index d2cb28e9ef52..39f25e09b51e 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -4,7 +4,10 @@
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
+#include <arpa/inet.h>
#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
#include <linux/rtnetlink.h>
#include <sys/socket.h>
#include <errno.h>
@@ -73,9 +76,20 @@ cleanup:
return ret;
}
-static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
- __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
- void *cookie)
+static void libbpf_netlink_close(int sock)
+{
+ close(sock);
+}
+
+enum {
+ NL_CONT,
+ NL_NEXT,
+ NL_DONE,
+};
+
+static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+ void *cookie)
{
bool multipart = true;
struct nlmsgerr *err;
@@ -84,6 +98,7 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
int len, ret;
while (multipart) {
+start:
multipart = false;
len = recv(sock, buf, sizeof(buf), 0);
if (len < 0) {
@@ -121,8 +136,16 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
}
if (_fn) {
ret = _fn(nh, fn, cookie);
- if (ret)
+ switch (ret) {
+ case NL_CONT:
+ break;
+ case NL_NEXT:
+ goto start;
+ case NL_DONE:
+ return 0;
+ default:
return ret;
+ }
}
}
}
@@ -131,95 +154,92 @@ done:
return ret;
}
-static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
- __u32 flags)
+static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
+ __dump_nlmsg_t parse_msg,
+ libbpf_dump_nlmsg_t parse_attr,
+ void *cookie)
{
- int sock, seq = 0, ret;
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
__u32 nl_pid = 0;
+ int sock, ret;
sock = libbpf_netlink_open(&nl_pid);
if (sock < 0)
return sock;
+ req->nh.nlmsg_pid = 0;
+ req->nh.nlmsg_seq = time(NULL);
+
+ if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
+ parse_msg, parse_attr, cookie);
+out:
+ libbpf_netlink_close(sock);
+ return ret;
+}
+
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+ __u32 flags)
+{
+ struct nlattr *nla;
+ int ret;
+ struct libbpf_nla_req req;
+
memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | IFLA_XDP;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
+ req.ifinfo.ifi_index = ifindex;
+
+ nla = nlattr_begin_nested(&req, IFLA_XDP);
+ if (!nla)
+ return -EMSGSIZE;
+ ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
+ if (ret < 0)
+ return ret;
if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FLAGS;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
+ ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
+ if (ret < 0)
+ return ret;
}
-
if (flags & XDP_FLAGS_REPLACE) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
- nla->nla_len += nla_xdp->nla_len;
+ ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
+ sizeof(old_fd));
+ if (ret < 0)
+ return ret;
}
+ nlattr_end_nested(&req, nla);
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- ret = -errno;
- goto cleanup;
- }
- ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
-
-cleanup:
- close(sock);
- return ret;
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
const struct bpf_xdp_set_link_opts *opts)
{
- int old_fd = -1;
+ int old_fd = -1, ret;
if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (OPTS_HAS(opts, old_fd)) {
old_fd = OPTS_GET(opts, old_fd, -1);
flags |= XDP_FLAGS_REPLACE;
}
- return __bpf_set_link_xdp_fd_replace(ifindex, fd,
- old_fd,
- flags);
+ ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+ return libbpf_err(ret);
}
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
{
- return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+ int ret;
+
+ ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+ return libbpf_err(ret);
}
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
@@ -231,6 +251,7 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh,
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+
if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
@@ -282,34 +303,33 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
-static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
struct xdp_id_md xdp_id = {};
- int sock, ret;
- __u32 nl_pid = 0;
__u32 mask;
+ int ret;
+ struct libbpf_nla_req req = {
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nh.nlmsg_type = RTM_GETLINK,
+ .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifinfo.ifi_family = AF_PACKET,
+ };
if (flags & ~XDP_FLAGS_MASK || !info_size)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
/* Check whether the single {HW,DRV,SKB} mode is set */
flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
mask = flags - 1;
if (flags && flags & mask)
- return -EINVAL;
-
- sock = libbpf_netlink_open(&nl_pid);
- if (sock < 0)
- return sock;
+ return libbpf_err(-EINVAL);
xdp_id.ifindex = ifindex;
xdp_id.flags = flags;
- ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
+ ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+ get_xdp_info, &xdp_id);
if (!ret) {
size_t sz = min(info_size, sizeof(xdp_id.info));
@@ -317,8 +337,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
memset((void *) info + sz, 0, info_size - sz);
}
- close(sock);
- return ret;
+ return libbpf_err(ret);
}
static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
@@ -346,27 +365,394 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
if (!ret)
*prog_id = get_xdp_id(&info, flags);
- return ret;
+ return libbpf_err(ret);
}
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+
+static int clsact_config(struct libbpf_nla_req *req)
{
- struct {
- struct nlmsghdr nlh;
- struct ifinfomsg ifm;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- .nlh.nlmsg_type = RTM_GETLINK,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .ifm.ifi_family = AF_PACKET,
- };
- int seq = time(NULL);
+ req->tc.tcm_parent = TC_H_CLSACT;
+ req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+
+ return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
+}
+
+static int attach_point_to_config(struct bpf_tc_hook *hook,
+ qdisc_config_t *config)
+{
+ switch (OPTS_GET(hook, attach_point, 0)) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ case BPF_TC_INGRESS | BPF_TC_EGRESS:
+ if (OPTS_GET(hook, parent, 0))
+ return -EINVAL;
+ *config = &clsact_config;
+ return 0;
+ case BPF_TC_CUSTOM:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
+ __u32 *parent)
+{
+ switch (attach_point) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ if (*parent)
+ return -EINVAL;
+ *parent = TC_H_MAKE(TC_H_CLSACT,
+ attach_point == BPF_TC_INGRESS ?
+ TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
+ break;
+ case BPF_TC_CUSTOM:
+ if (!*parent)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
+{
+ qdisc_config_t config;
+ int ret;
+ struct libbpf_nla_req req;
+
+ ret = attach_point_to_config(hook, &config);
+ if (ret < 0)
+ return ret;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
+ req.nh.nlmsg_type = cmd;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
+
+ ret = config(&req);
+ if (ret < 0)
+ return ret;
+
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+}
+
+static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
+{
+ return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
+}
+
+static int tc_qdisc_delete(struct bpf_tc_hook *hook)
+{
+ return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
+}
+
+int bpf_tc_hook_create(struct bpf_tc_hook *hook)
+{
+ int ret;
+
+ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+ OPTS_GET(hook, ifindex, 0) <= 0)
+ return libbpf_err(-EINVAL);
+
+ ret = tc_qdisc_create_excl(hook);
+ return libbpf_err(ret);
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts,
+ const bool flush);
+
+int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
+{
+ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+ OPTS_GET(hook, ifindex, 0) <= 0)
+ return libbpf_err(-EINVAL);
+
+ switch (OPTS_GET(hook, attach_point, 0)) {
+ case BPF_TC_INGRESS:
+ case BPF_TC_EGRESS:
+ return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+ case BPF_TC_INGRESS | BPF_TC_EGRESS:
+ return libbpf_err(tc_qdisc_delete(hook));
+ case BPF_TC_CUSTOM:
+ return libbpf_err(-EOPNOTSUPP);
+ default:
+ return libbpf_err(-EINVAL);
+ }
+}
+
+struct bpf_cb_ctx {
+ struct bpf_tc_opts *opts;
+ bool processed;
+};
+
+static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
+ bool unicast)
+{
+ struct nlattr *tbb[TCA_BPF_MAX + 1];
+ struct bpf_cb_ctx *info = cookie;
+
+ if (!info || !info->opts)
+ return -EINVAL;
+ if (unicast && info->processed)
+ return -EINVAL;
+ if (!tb[TCA_OPTIONS])
+ return NL_CONT;
+
+ libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
+ if (!tbb[TCA_BPF_ID])
+ return -EINVAL;
+
+ OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
+ OPTS_SET(info->opts, handle, tc->tcm_handle);
+ OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
+
+ info->processed = true;
+ return unicast ? NL_NEXT : NL_DONE;
+}
+
+static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ struct tcmsg *tc = NLMSG_DATA(nh);
+ struct nlattr *tb[TCA_MAX + 1];
+
+ libbpf_nla_parse(tb, TCA_MAX,
+ (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
+ NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
+ if (!tb[TCA_KIND])
+ return NL_CONT;
+ return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ char name[256];
+ int len, ret;
+
+ ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (ret < 0)
+ return ret;
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
+ if (ret < 0)
+ return ret;
+ len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
+ if (len < 0)
return -errno;
+ if (len >= sizeof(name))
+ return -ENAMETOOLONG;
+ return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
+}
+
+int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+ __u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
+ struct nlattr *nla;
+
+ if (!hook || !opts ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return libbpf_err(-EINVAL);
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || !prog_fd || prog_id)
+ return libbpf_err(-EINVAL);
+ if (priority > UINT16_MAX)
+ return libbpf_err(-EINVAL);
+ if (flags & ~BPF_TC_F_REPLACE)
+ return libbpf_err(-EINVAL);
+
+ flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
+ protocol = ETH_P_ALL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
+ NLM_F_ECHO | flags;
+ req.nh.nlmsg_type = RTM_NEWTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return libbpf_err(ret);
+ req.tc.tcm_parent = parent;
+
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return libbpf_err(ret);
+ nla = nlattr_begin_nested(&req, TCA_OPTIONS);
+ if (!nla)
+ return libbpf_err(-EMSGSIZE);
+ ret = tc_add_fd_and_name(&req, prog_fd);
+ if (ret < 0)
+ return libbpf_err(ret);
+ bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
+ ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
+ if (ret < 0)
+ return libbpf_err(ret);
+ nlattr_end_nested(&req, nla);
+
+ info.opts = opts;
+
+ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
+ if (ret < 0)
+ return libbpf_err(ret);
+ if (!info.processed)
+ return libbpf_err(-ENOENT);
+ return ret;
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts,
+ const bool flush)
+{
+ __u32 protocol = 0, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct libbpf_nla_req req;
+
+ if (!hook ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return -EINVAL;
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || flags || prog_fd || prog_id)
+ return -EINVAL;
+ if (priority > UINT16_MAX)
+ return -EINVAL;
+ if (!flush) {
+ if (!handle || !priority)
+ return -EINVAL;
+ protocol = ETH_P_ALL;
+ } else {
+ if (handle || priority)
+ return -EINVAL;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_DELTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ if (!flush) {
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+ }
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return ret;
+ req.tc.tcm_parent = parent;
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
- dump_link_nlmsg, cookie);
+ if (!flush) {
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return ret;
+ }
+
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+}
+
+int bpf_tc_detach(const struct bpf_tc_hook *hook,
+ const struct bpf_tc_opts *opts)
+{
+ int ret;
+
+ if (!opts)
+ return libbpf_err(-EINVAL);
+
+ ret = __bpf_tc_detach(hook, opts, false);
+ return libbpf_err(ret);
+}
+
+int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+ __u32 protocol, handle, priority, parent, prog_id, flags;
+ int ret, ifindex, attach_point, prog_fd;
+ struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
+
+ if (!hook || !opts ||
+ !OPTS_VALID(hook, bpf_tc_hook) ||
+ !OPTS_VALID(opts, bpf_tc_opts))
+ return libbpf_err(-EINVAL);
+
+ ifindex = OPTS_GET(hook, ifindex, 0);
+ parent = OPTS_GET(hook, parent, 0);
+ attach_point = OPTS_GET(hook, attach_point, 0);
+
+ handle = OPTS_GET(opts, handle, 0);
+ priority = OPTS_GET(opts, priority, 0);
+ prog_fd = OPTS_GET(opts, prog_fd, 0);
+ prog_id = OPTS_GET(opts, prog_id, 0);
+ flags = OPTS_GET(opts, flags, 0);
+
+ if (ifindex <= 0 || flags || prog_fd || prog_id ||
+ !handle || !priority)
+ return libbpf_err(-EINVAL);
+ if (priority > UINT16_MAX)
+ return libbpf_err(-EINVAL);
+
+ protocol = ETH_P_ALL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_GETTFILTER;
+ req.tc.tcm_family = AF_UNSPEC;
+ req.tc.tcm_ifindex = ifindex;
+ req.tc.tcm_handle = handle;
+ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol));
+
+ ret = tc_get_tcm_parent(attach_point, &parent);
+ if (ret < 0)
+ return libbpf_err(ret);
+ req.tc.tcm_parent = parent;
+
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
+ if (ret < 0)
+ return libbpf_err(ret);
+
+ info.opts = opts;
+
+ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
+ if (ret < 0)
+ return libbpf_err(ret);
+ if (!info.processed)
+ return libbpf_err(-ENOENT);
+ return ret;
}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index b607fa9852b1..f57e77a6e40f 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -27,7 +27,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
int totlen = NLA_ALIGN(nla->nla_len);
*remaining -= totlen;
- return (struct nlattr *) ((char *) nla + totlen);
+ return (struct nlattr *)((void *)nla + totlen);
}
static int nla_ok(const struct nlattr *nla, int remaining)
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 6cc3ac91690f..4d15ae2ff812 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -10,7 +10,11 @@
#define __LIBBPF_NLATTR_H
#include <stdint.h>
+#include <string.h>
+#include <errno.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
/* avoid multiple definition of netlink features */
#define __LINUX_NETLINK_H
@@ -49,6 +53,15 @@ struct libbpf_nla_policy {
uint16_t maxlen;
};
+struct libbpf_nla_req {
+ struct nlmsghdr nh;
+ union {
+ struct ifinfomsg ifinfo;
+ struct tcmsg tc;
+ };
+ char buf[128];
+};
+
/**
* @ingroup attr
* Iterate over a stream of attributes
@@ -68,7 +81,7 @@ struct libbpf_nla_policy {
*/
static inline void *libbpf_nla_data(const struct nlattr *nla)
{
- return (char *) nla + NLA_HDRLEN;
+ return (void *)nla + NLA_HDRLEN;
}
static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
@@ -103,4 +116,49 @@ int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
+static inline struct nlattr *nla_data(struct nlattr *nla)
+{
+ return (struct nlattr *)((void *)nla + NLA_HDRLEN);
+}
+
+static inline struct nlattr *req_tail(struct libbpf_nla_req *req)
+{
+ return (struct nlattr *)((void *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
+}
+
+static inline int nlattr_add(struct libbpf_nla_req *req, int type,
+ const void *data, int len)
+{
+ struct nlattr *nla;
+
+ if (NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > sizeof(*req))
+ return -EMSGSIZE;
+ if (!!data != !!len)
+ return -EINVAL;
+
+ nla = req_tail(req);
+ nla->nla_type = type;
+ nla->nla_len = NLA_HDRLEN + len;
+ if (data)
+ memcpy(nla_data(nla), data, len);
+ req->nh.nlmsg_len = NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(nla->nla_len);
+ return 0;
+}
+
+static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int type)
+{
+ struct nlattr *tail;
+
+ tail = req_tail(req);
+ if (nlattr_add(req, type | NLA_F_NESTED, NULL, 0))
+ return NULL;
+ return tail;
+}
+
+static inline void nlattr_end_nested(struct libbpf_nla_req *req,
+ struct nlattr *tail)
+{
+ tail->nla_len = (void *)req_tail(req) - (void *)tail;
+}
+
#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 1d80ad4e0de8..8bc117bcc7bc 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -69,23 +69,23 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
err = -errno;
pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
if (info.type != BPF_MAP_TYPE_RINGBUF) {
pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
map_fd);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
if (!tmp)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
rb->rings = tmp;
tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
if (!tmp)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
rb->events = tmp;
r = &rb->rings[rb->ring_cnt];
@@ -103,7 +103,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
err = -errno;
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
r->consumer_pos = tmp;
@@ -118,7 +118,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
r->producer_pos = tmp;
r->data = tmp + rb->page_size;
@@ -133,7 +133,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
map_fd, err);
- return err;
+ return libbpf_err(err);
}
rb->ring_cnt++;
@@ -165,11 +165,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
int err;
if (!OPTS_VALID(opts, ring_buffer_opts))
- return NULL;
+ return errno = EINVAL, NULL;
rb = calloc(1, sizeof(*rb));
if (!rb)
- return NULL;
+ return errno = ENOMEM, NULL;
rb->page_size = getpagesize();
@@ -188,7 +188,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
err_out:
ring_buffer__free(rb);
- return NULL;
+ return errno = -err, NULL;
}
static inline int roundup_len(__u32 len)
@@ -260,7 +260,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
err = ringbuf_process_ring(ring);
if (err < 0)
- return err;
+ return libbpf_err(err);
res += err;
}
if (res > INT_MAX)
@@ -279,7 +279,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
if (cnt < 0)
- return -errno;
+ return libbpf_err(-errno);
for (i = 0; i < cnt; i++) {
__u32 ring_id = rb->events[i].data.fd;
@@ -287,7 +287,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
err = ringbuf_process_ring(ring);
if (err < 0)
- return err;
+ return libbpf_err(err);
res += err;
}
if (res > INT_MAX)
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
new file mode 100644
index 000000000000..b22b50c1b173
--- /dev/null
+++ b/tools/lib/bpf/skel_internal.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __SKEL_INTERNAL_H
+#define __SKEL_INTERNAL_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+
+/* This file is a base header for auto-generated *.lskel.h files.
+ * Its contents will change and may become part of auto-generation in the future.
+ *
+ * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent
+ * and will change from one version of libbpf to another and features
+ * requested during loader program generation.
+ */
+struct bpf_map_desc {
+ union {
+ /* input for the loader prog */
+ struct {
+ __aligned_u64 initial_value;
+ __u32 max_entries;
+ };
+ /* output of the loader prog */
+ struct {
+ int map_fd;
+ };
+ };
+};
+struct bpf_prog_desc {
+ int prog_fd;
+};
+
+struct bpf_loader_ctx {
+ size_t sz;
+ __u32 log_level;
+ __u32 log_size;
+ __u64 log_buf;
+};
+
+struct bpf_load_and_run_opts {
+ struct bpf_loader_ctx *ctx;
+ const void *data;
+ const void *insns;
+ __u32 data_sz;
+ __u32 insns_sz;
+ const char *errstr;
+};
+
+static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int skel_closenz(int fd)
+{
+ if (fd > 0)
+ return close(fd);
+ return -EINVAL;
+}
+
+static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
+{
+ int map_fd = -1, prog_fd = -1, key = 0, err;
+ union bpf_attr attr;
+
+ map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4,
+ opts->data_sz, 1, 0);
+ if (map_fd < 0) {
+ opts->errstr = "failed to create loader map";
+ err = -errno;
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map_fd, &key, opts->data, 0);
+ if (err < 0) {
+ opts->errstr = "failed to update loader map";
+ err = -errno;
+ goto out;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SYSCALL;
+ attr.insns = (long) opts->insns;
+ attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
+ attr.license = (long) "Dual BSD/GPL";
+ memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
+ attr.fd_array = (long) &map_fd;
+ attr.log_level = opts->ctx->log_level;
+ attr.log_size = opts->ctx->log_size;
+ attr.log_buf = opts->ctx->log_buf;
+ attr.prog_flags = BPF_F_SLEEPABLE;
+ prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (prog_fd < 0) {
+ opts->errstr = "failed to load loader prog";
+ err = -errno;
+ goto out;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.ctx_in = (long) opts->ctx;
+ attr.test.ctx_size_in = opts->ctx->sz;
+ err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
+ if (err < 0 || (int)attr.test.retval < 0) {
+ opts->errstr = "failed to execute loader prog";
+ if (err < 0)
+ err = -errno;
+ else
+ err = (int)attr.test.retval;
+ goto out;
+ }
+ err = 0;
+out:
+ if (map_fd >= 0)
+ close(map_fd);
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return err;
+}
+
+#endif
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 6061431ee04c..e9b619aa0cdf 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
goto out_put_ctx;
}
if (xsk->fd == umem->fd)
- umem->rx_ring_setup_done = true;
+ umem->tx_ring_setup_done = true;
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index 51ceeb9147eb..9ce7b4b68e3f 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -366,7 +366,7 @@ union kvm_mmu_page_role {
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
- unsigned nxe:1;
+ unsigned efer_nx:1;
unsigned cr0_wp:1;
unsigned smep_and_not_wp:1;
unsigned smap_and_not_wp:1;
@@ -403,7 +403,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
access_str[role.access],
role.invalid ? " invalid" : "",
role.cr4_pae ? "" : "!",
- role.nxe ? "" : "!",
+ role.efer_nx ? "" : "!",
role.cr0_wp ? "" : "!",
role.smep_and_not_wp ? " smep" : "",
role.smap_and_not_wp ? " smap" : "",
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 24295d39713b..bc821056aba9 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -684,7 +684,7 @@ static int elf_add_alternative(struct elf *elf,
sec = find_section_by_name(elf, ".altinstructions");
if (!sec) {
sec = elf_create_section(elf, ".altinstructions",
- SHF_WRITE, size, 0);
+ SHF_ALLOC, size, 0);
if (!sec) {
WARN_ELF("elf_create_section");
@@ -747,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file)
list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC)
+ continue;
+
if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
continue;
diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index 14271cca0c74..f2918f789a0a 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -9,6 +9,7 @@
#define JUMP_ENTRY_SIZE 16
#define JUMP_ORIG_OFFSET 0
#define JUMP_NEW_OFFSET 4
+#define JUMP_KEY_OFFSET 8
#define ALT_ENTRY_SIZE 12
#define ALT_ORIG_OFFSET 0
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9ed1a4cd00dc..e5947fbb9e7a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,15 +1225,41 @@ static int handle_jump_alt(struct objtool_file *file,
struct instruction *orig_insn,
struct instruction **new_insn)
{
- if (orig_insn->type == INSN_NOP)
- return 0;
+ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL &&
+ orig_insn->type != INSN_NOP) {
- if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
WARN_FUNC("unsupported instruction at jump label",
orig_insn->sec, orig_insn->offset);
return -1;
}
+ if (special_alt->key_addend & 2) {
+ struct reloc *reloc = insn_reloc(file, orig_insn);
+
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+ }
+ elf_write_insn(file->elf, orig_insn->sec,
+ orig_insn->offset, orig_insn->len,
+ arch_nop_insn(orig_insn->len));
+ orig_insn->type = INSN_NOP;
+ }
+
+ if (orig_insn->type == INSN_NOP) {
+ if (orig_insn->len == 2)
+ file->jl_nop_short++;
+ else
+ file->jl_nop_long++;
+
+ return 0;
+ }
+
+ if (orig_insn->len == 2)
+ file->jl_short++;
+ else
+ file->jl_long++;
+
*new_insn = list_next_entry(orig_insn, list);
return 0;
}
@@ -1314,6 +1340,12 @@ static int add_special_section_alts(struct objtool_file *file)
free(special_alt);
}
+ if (stats) {
+ printf("jl\\\tNOP\tJMP\n");
+ printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
+ printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
+ }
+
out:
return ret;
}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 743c2e9d0f56..8676c7598728 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -9,6 +9,7 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/mman.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -27,21 +28,27 @@ static inline u32 str_hash(const char *str)
return jhash(str, strlen(str), 0);
}
-static inline int elf_hash_bits(void)
-{
- return vmlinux ? ELF_HASH_BITS : 16;
-}
+#define __elf_table(name) (elf->name##_hash)
+#define __elf_bits(name) (elf->name##_bits)
-#define elf_hash_add(hashtable, node, key) \
- hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+#define elf_hash_add(name, node, key) \
+ hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))])
-static void elf_hash_init(struct hlist_head *table)
-{
- __hash_init(table, 1U << elf_hash_bits());
-}
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member)
-#define elf_hash_for_each_possible(name, obj, member, key) \
- hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+#define elf_alloc_hash(name, size) \
+({ \
+ __elf_bits(name) = max(10, ilog2(size)); \
+ __elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \
+ PROT_READ|PROT_WRITE, \
+ MAP_PRIVATE|MAP_ANON, -1, 0); \
+ if (__elf_table(name) == (void *)-1L) { \
+ WARN("mmap fail " #name); \
+ __elf_table(name) = NULL; \
+ } \
+ __elf_table(name); \
+})
static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
{
@@ -80,9 +87,10 @@ struct section *find_section_by_name(const struct elf *elf, const char *name)
{
struct section *sec;
- elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+ elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) {
if (!strcmp(sec->name, name))
return sec;
+ }
return NULL;
}
@@ -92,9 +100,10 @@ static struct section *find_section_by_index(struct elf *elf,
{
struct section *sec;
- elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
+ elf_hash_for_each_possible(section, sec, hash, idx) {
if (sec->idx == idx)
return sec;
+ }
return NULL;
}
@@ -103,9 +112,10 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
{
struct symbol *sym;
- elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+ elf_hash_for_each_possible(symbol, sym, hash, idx) {
if (sym->idx == idx)
return sym;
+ }
return NULL;
}
@@ -170,9 +180,10 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
{
struct symbol *sym;
- elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+ elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
if (!strcmp(sym->name, name))
return sym;
+ }
return NULL;
}
@@ -189,8 +200,8 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se
sec = sec->reloc;
for_offset_range(o, offset, offset + len) {
- elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
- sec_offset_hash(sec, o)) {
+ elf_hash_for_each_possible(reloc, reloc, hash,
+ sec_offset_hash(sec, o)) {
if (reloc->sec != sec)
continue;
@@ -228,6 +239,10 @@ static int read_sections(struct elf *elf)
return -1;
}
+ if (!elf_alloc_hash(section, sections_nr) ||
+ !elf_alloc_hash(section_name, sections_nr))
+ return -1;
+
for (i = 0; i < sections_nr; i++) {
sec = malloc(sizeof(*sec));
if (!sec) {
@@ -273,13 +288,18 @@ static int read_sections(struct elf *elf)
}
sec->len = sec->sh.sh_size;
+ if (sec->sh.sh_flags & SHF_EXECINSTR)
+ elf->text_size += sec->len;
+
list_add_tail(&sec->list, &elf->sections);
- elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
- elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(section, &sec->hash, sec->idx);
+ elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
}
- if (stats)
+ if (stats) {
printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+ printf("section_bits: %d\n", elf->section_bits);
+ }
/* sanity check, one more call to elf_nextscn() should return NULL */
if (elf_nextscn(elf->elf, s)) {
@@ -308,8 +328,8 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
- elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
- elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+ elf_hash_add(symbol, &sym->hash, sym->idx);
+ elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
/*
* Don't store empty STT_NOTYPE symbols in the rbtree. They
@@ -329,19 +349,25 @@ static int read_symbols(struct elf *elf)
Elf32_Word shndx;
symtab = find_section_by_name(elf, ".symtab");
- if (!symtab) {
+ if (symtab) {
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx)
+ shndx_data = symtab_shndx->data;
+
+ symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ } else {
/*
* A missing symbol table is actually possible if it's an empty
- * .o file. This can happen for thunk_64.o.
+ * .o file. This can happen for thunk_64.o. Make sure to at
+ * least allocate the symbol hash tables so we can do symbol
+ * lookups without crashing.
*/
- return 0;
+ symbols_nr = 0;
}
- symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
- if (symtab_shndx)
- shndx_data = symtab_shndx->data;
-
- symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ if (!elf_alloc_hash(symbol, symbols_nr) ||
+ !elf_alloc_hash(symbol_name, symbols_nr))
+ return -1;
for (i = 0; i < symbols_nr; i++) {
sym = malloc(sizeof(*sym));
@@ -389,8 +415,10 @@ static int read_symbols(struct elf *elf)
elf_add_symbol(elf, sym);
}
- if (stats)
+ if (stats) {
printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+ printf("symbol_bits: %d\n", elf->symbol_bits);
+ }
/* Create parent/child links for any cold subfunctions */
list_for_each_entry(sec, &elf->sections, list) {
@@ -479,7 +507,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
reloc->addend = addend;
list_add_tail(&reloc->list, &sec->reloc->reloc_list);
- elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+ elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
sec->reloc->changed = true;
@@ -556,6 +584,9 @@ static int read_relocs(struct elf *elf)
unsigned int symndx;
unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
+ if (!elf_alloc_hash(reloc, elf->text_size / 16))
+ return -1;
+
list_for_each_entry(sec, &elf->sections, list) {
if ((sec->sh.sh_type != SHT_RELA) &&
(sec->sh.sh_type != SHT_REL))
@@ -600,7 +631,7 @@ static int read_relocs(struct elf *elf)
}
list_add_tail(&reloc->list, &sec->reloc_list);
- elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+ elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
nr_reloc++;
}
@@ -611,6 +642,7 @@ static int read_relocs(struct elf *elf)
if (stats) {
printf("max_reloc: %lu\n", max_reloc);
printf("tot_reloc: %lu\n", tot_reloc);
+ printf("reloc_bits: %d\n", elf->reloc_bits);
}
return 0;
@@ -632,12 +664,6 @@ struct elf *elf_open_read(const char *name, int flags)
INIT_LIST_HEAD(&elf->sections);
- elf_hash_init(elf->symbol_hash);
- elf_hash_init(elf->symbol_name_hash);
- elf_hash_init(elf->section_hash);
- elf_hash_init(elf->section_name_hash);
- elf_hash_init(elf->reloc_hash);
-
elf->fd = open(name, flags);
if (elf->fd == -1) {
fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -717,7 +743,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
{
- struct section *symtab;
+ struct section *symtab, *symtab_shndx;
struct symbol *sym;
Elf_Data *data;
Elf_Scn *s;
@@ -769,6 +795,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
symtab->len += data->d_size;
symtab->changed = true;
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx) {
+ s = elf_getscn(elf->elf, symtab_shndx->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return NULL;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return NULL;
+ }
+
+ data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+ data->d_size = sizeof(Elf32_Word);
+ data->d_align = 4;
+ data->d_type = ELF_T_WORD;
+
+ symtab_shndx->len += 4;
+ symtab_shndx->changed = true;
+ }
+
sym->sec = find_section_by_index(elf, 0);
elf_add_symbol(elf, sym);
@@ -851,8 +900,8 @@ struct section *elf_create_section(struct elf *elf, const char *name,
return NULL;
list_add_tail(&sec->list, &elf->sections);
- elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
- elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(section, &sec->hash, sec->idx);
+ elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
elf->changed = true;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 45e5ede363b0..e34395047530 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -83,12 +83,20 @@ struct elf {
int fd;
bool changed;
char *name;
+ unsigned int text_size;
struct list_head sections;
- DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
- DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
+
+ int symbol_bits;
+ int symbol_name_bits;
+ int section_bits;
+ int section_name_bits;
+ int reloc_bits;
+
+ struct hlist_head *symbol_hash;
+ struct hlist_head *symbol_name_hash;
+ struct hlist_head *section_hash;
+ struct hlist_head *section_name_hash;
+ struct hlist_head *reloc_hash;
};
#define OFFSET_STRIDE_BITS 4
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index e4084afb2304..24fa83634de4 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -22,6 +22,9 @@ struct objtool_file {
struct list_head static_call_list;
struct list_head mcount_loc_list;
bool ignore_unreachables, c_file, hints, rodata;
+
+ unsigned long jl_short, jl_long;
+ unsigned long jl_nop_short, jl_nop_long;
};
struct objtool_file *objtool_open_read(const char *_objname);
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 8a09f4e9d480..dc4721e19002 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -27,6 +27,7 @@ struct special_alt {
unsigned long new_off;
unsigned int orig_len, new_len; /* group only */
+ u8 key_addend;
};
int special_get_alts(struct elf *elf, struct list_head *alts);
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 07b21cfabf5c..bc925cf19e2d 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@ struct special_entry {
unsigned char size, orig, new;
unsigned char orig_len, new_len; /* group only */
unsigned char feature; /* ALTERNATIVE macro CPU feature */
+ unsigned char key; /* jump_label key */
};
struct special_entry entries[] = {
@@ -42,6 +43,7 @@ struct special_entry entries[] = {
.size = JUMP_ENTRY_SIZE,
.orig = JUMP_ORIG_OFFSET,
.new = JUMP_NEW_OFFSET,
+ .key = JUMP_KEY_OFFSET,
},
{
.sec = "__ex_table",
@@ -122,6 +124,18 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
alt->new_off -= 0x7ffffff0;
}
+ if (entry->key) {
+ struct reloc *key_reloc;
+
+ key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
+ if (!key_reloc) {
+ WARN_FUNC("can't find key reloc",
+ sec, offset + entry->key);
+ return -1;
+ }
+ alt->key_addend = key_reloc->addend;
+ }
+
return 0;
}
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 1dcec73c910c..bcf3eca5afbe 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -108,9 +108,9 @@ displayed as follows:
perf script --itrace=ibxwpe -F+flags
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
+The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end,
+in transaction, VM-entry, and VM-exit respectively.
perf script also supports higher level ways to dump instruction traces:
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5b8b61075039..48a5f5b26dd4 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -183,14 +183,15 @@ OPTIONS
At this point usage is displayed, and perf-script exits.
The flags field is synthesized and may have a value when Instruction
- Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+ Trace decoding. The flags are "bcrosyiABExgh" which stand for branch,
call, return, conditional, system, asynchronous, interrupt,
- transaction abort, trace begin, trace end, and in transaction,
+ transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit
respectively. Known combinations of flags are printed more nicely e.g.
"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
- "tr end" for "bE". However the "x" flag will be display separately in those
+ "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch".
+ However the "x" flag will be displayed separately in those
cases e.g. "jcc (x)" for a condition branch within a transaction.
The callindent field is synthesized and may have a value when
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 406a9519145e..73df23dd664c 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -90,7 +90,6 @@ endif
ifeq ($(ARCH),mips)
NO_PERF_REGS := 0
CFLAGS += -I$(OUTPUT)arch/mips/include/generated
- CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi
LIBUNWIND_LIBS = -lunwind -lunwind-mips
endif
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 9974f5f8e49b..9cd1c34f31b5 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
440 n64 process_madvise sys_process_madvise
441 n64 epoll_pwait2 sys_epoll_pwait2
442 n64 mount_setattr sys_mount_setattr
-443 n64 quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 n64 landlock_create_ruleset sys_landlock_create_ruleset
445 n64 landlock_add_rule sys_landlock_add_rule
446 n64 landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 2e68fbb57cc6..8f052ff4058c 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 7e4a2aba366d..0690263df1dd 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d05..ce18119ea0d0 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 87f5b1a4a7fa..833405c27dae 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
with_hits = true;
+ if (zstd_init(&(session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3337b5f93336..84803abeb942 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2714,6 +2714,12 @@ int cmd_record(int argc, const char **argv)
rec->no_buildid = true;
}
+ if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
+ pr_err("Kernel has no cgroup sampling support.\n");
+ err = -EINVAL;
+ goto out_opts;
+ }
+
if (rec->opts.kcore)
rec->data.is_dir = true;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5a830ae09418..f9f74a514315 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -572,7 +572,8 @@ static int enable_counters(void)
* - we have initial delay configured
*/
if (!target__none(&target) || stat_config.initial_delay) {
- evlist__enable(evsel_list);
+ if (!all_counters_use_bpf)
+ evlist__enable(evsel_list);
if (stat_config.initial_delay > 0)
pr_info(EVLIST_ENABLED_MSG);
}
@@ -581,13 +582,19 @@ static int enable_counters(void)
static void disable_counters(void)
{
+ struct evsel *counter;
+
/*
* If we don't have tracee (attaching to task or cpu), counters may
* still be running. To get accurate group ratios, we must stop groups
* from counting before reading their constituent counters.
*/
- if (!target__none(&target))
- evlist__disable(evsel_list);
+ if (!target__none(&target)) {
+ evlist__for_each_entry(evsel_list, counter)
+ bpf_counter__disable(counter);
+ if (!all_counters_use_bpf)
+ evlist__disable(evsel_list);
+ }
}
static volatile int workload_exec_errno;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index dd8ff287e930..c783558332b8 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -39,6 +39,7 @@ arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
arch/arm/include/uapi/asm/perf_regs.h
arch/arm64/include/uapi/asm/perf_regs.h
+arch/mips/include/uapi/asm/perf_regs.h
arch/powerpc/include/uapi/asm/perf_regs.h
arch/s390/include/uapi/asm/perf_regs.h
arch/x86/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20cb91ef06ff..2f6b67189b42 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -443,6 +443,8 @@ int main(int argc, const char **argv)
const char *cmd;
char sbuf[STRERR_BUFSIZE];
+ perf_debug_setup();
+
/* libsubcmd init */
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
pager_init(PERF_PAGER_ENVIRONMENT);
@@ -531,8 +533,6 @@ int main(int argc, const char **argv)
*/
pthread__block_sigwinch();
- perf_debug_setup();
-
while (1) {
static int done_help;
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
index 616f29098c71..605be14f441c 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
@@ -1,46 +1,56 @@
[
{
- "EventCode": "1003C",
+ "EventCode": "0x1003C",
"EventName": "PM_EXEC_STALL_DMISS_L2L3",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
},
{
- "EventCode": "34056",
+ "EventCode": "0x1E054",
+ "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+ },
+ {
+ "EventCode": "0x34054",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+ },
+ {
+ "EventCode": "0x34056",
"EventName": "PM_EXEC_STALL_LOAD_FINISH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
},
{
- "EventCode": "3006C",
+ "EventCode": "0x3006C",
"EventName": "PM_RUN_CYC_SMT2_MODE",
"BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
},
{
- "EventCode": "300F4",
+ "EventCode": "0x300F4",
"EventName": "PM_RUN_INST_CMPL_CONC",
"BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
},
{
- "EventCode": "4C016",
+ "EventCode": "0x4C016",
"EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
},
{
- "EventCode": "4D014",
+ "EventCode": "0x4D014",
"EventName": "PM_EXEC_STALL_LOAD",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
},
{
- "EventCode": "4D016",
+ "EventCode": "0x4D016",
"EventName": "PM_EXEC_STALL_PTESYNC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
},
{
- "EventCode": "401EA",
+ "EventCode": "0x401EA",
"EventName": "PM_THRESH_EXC_128",
"BriefDescription": "Threshold counter exceeded a value of 128."
},
{
- "EventCode": "400F6",
+ "EventCode": "0x400F6",
"EventName": "PM_BR_MPRED_CMPL",
"BriefDescription": "A mispredicted branch completed. Includes direction and target."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
index 703cd431ae5b..54acb55e2c8c 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
@@ -1,6 +1,6 @@
[
{
- "EventCode": "4016E",
+ "EventCode": "0x4016E",
"EventName": "PM_THRESH_NOT_MET",
"BriefDescription": "Threshold counter did not meet threshold."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
index eac8609dcc90..558f9530f54e 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
@@ -1,216 +1,246 @@
[
{
- "EventCode": "10004",
+ "EventCode": "0x10004",
"EventName": "PM_EXEC_STALL_TRANSLATION",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
},
{
- "EventCode": "10010",
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
+ },
+ {
+ "EventCode": "0x10010",
"EventName": "PM_PMC4_OVERFLOW",
"BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
},
{
- "EventCode": "10020",
+ "EventCode": "0x10020",
"EventName": "PM_PMC4_REWIND",
"BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
},
{
- "EventCode": "10038",
+ "EventCode": "0x10038",
"EventName": "PM_DISP_STALL_TRANSLATION",
"BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
},
{
- "EventCode": "1003A",
+ "EventCode": "0x1003A",
"EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
},
{
- "EventCode": "1E050",
+ "EventCode": "0x1D05E",
+ "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
+ },
+ {
+ "EventCode": "0x1E050",
"EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
},
{
- "EventCode": "1F054",
+ "EventCode": "0x1F054",
"EventName": "PM_DTLB_HIT",
"BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
},
{
- "EventCode": "101E8",
+ "EventCode": "0x10064",
+ "EventName": "PM_DISP_STALL_IC_L2",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+ },
+ {
+ "EventCode": "0x101E8",
"EventName": "PM_THRESH_EXC_256",
"BriefDescription": "Threshold counter exceeded a count of 256."
},
{
- "EventCode": "101EC",
+ "EventCode": "0x101EC",
"EventName": "PM_THRESH_MET",
"BriefDescription": "Threshold exceeded."
},
{
- "EventCode": "100F2",
+ "EventCode": "0x100F2",
"EventName": "PM_1PLUS_PPC_CMPL",
"BriefDescription": "Cycles in which at least one instruction is completed by this thread."
},
{
- "EventCode": "100F6",
+ "EventCode": "0x100F6",
"EventName": "PM_IERAT_MISS",
"BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
},
{
- "EventCode": "100F8",
+ "EventCode": "0x100F8",
"EventName": "PM_DISP_STALL_CYC",
"BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
},
{
- "EventCode": "20114",
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+ },
+ {
+ "EventCode": "0x20114",
"EventName": "PM_MRK_L2_RC_DISP",
"BriefDescription": "Marked instruction RC dispatched in L2."
},
{
- "EventCode": "2C010",
+ "EventCode": "0x2C010",
"EventName": "PM_EXEC_STALL_LSU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
},
{
- "EventCode": "2C016",
+ "EventCode": "0x2C016",
"EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
"BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
},
{
- "EventCode": "2C01E",
+ "EventCode": "0x2C01E",
"EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
},
{
- "EventCode": "2D01A",
+ "EventCode": "0x2D01A",
"EventName": "PM_DISP_STALL_IC_MISS",
"BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
},
{
- "EventCode": "2D01C",
- "EventName": "PM_CMPL_STALL_STCX",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
- },
- {
- "EventCode": "2E018",
+ "EventCode": "0x2E018",
"EventName": "PM_DISP_STALL_FETCH",
"BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
},
{
- "EventCode": "2E01A",
+ "EventCode": "0x2E01A",
"EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
},
{
- "EventCode": "2C142",
+ "EventCode": "0x2C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "24050",
+ "EventCode": "0x24050",
"EventName": "PM_IOPS_DISP",
"BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction."
},
{
- "EventCode": "2405E",
+ "EventCode": "0x2405E",
"EventName": "PM_ISSUE_CANCEL",
"BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction."
},
{
- "EventCode": "200FA",
+ "EventCode": "0x200FA",
"EventName": "PM_BR_TAKEN_CMPL",
"BriefDescription": "Branch Taken instruction completed."
},
{
- "EventCode": "30012",
+ "EventCode": "0x30004",
+ "EventName": "PM_DISP_STALL_FLUSH",
+ "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+ },
+ {
+ "EventCode": "0x3000A",
+ "EventName": "PM_DISP_STALL_ITLB_MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
+ },
+ {
+ "EventCode": "0x30012",
"EventName": "PM_FLUSH_COMPLETION",
"BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
},
{
- "EventCode": "30014",
+ "EventCode": "0x30014",
"EventName": "PM_EXEC_STALL_STORE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
},
{
- "EventCode": "30018",
+ "EventCode": "0x30018",
"EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
},
{
- "EventCode": "30026",
+ "EventCode": "0x30026",
"EventName": "PM_EXEC_STALL_STORE_MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
},
{
- "EventCode": "3012A",
+ "EventCode": "0x3012A",
"EventName": "PM_MRK_L2_RC_DONE",
"BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
},
{
- "EventCode": "3F046",
+ "EventCode": "0x3F046",
"EventName": "PM_ITLB_HIT_1G",
"BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "34058",
+ "EventCode": "0x34058",
"EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
"BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
},
{
- "EventCode": "3D05C",
+ "EventCode": "0x3D05C",
"EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
},
{
- "EventCode": "3E052",
+ "EventCode": "0x3E052",
"EventName": "PM_DISP_STALL_IC_L3",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
},
{
- "EventCode": "3E054",
+ "EventCode": "0x3E054",
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{
- "EventCode": "301EA",
+ "EventCode": "0x301EA",
"EventName": "PM_THRESH_EXC_1024",
"BriefDescription": "Threshold counter exceeded a value of 1024."
},
{
- "EventCode": "300FA",
+ "EventCode": "0x300FA",
"EventName": "PM_INST_FROM_L3MISS",
"BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
},
{
- "EventCode": "40006",
+ "EventCode": "0x40006",
"EventName": "PM_ISSUE_KILL",
"BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
},
{
- "EventCode": "40116",
+ "EventCode": "0x40116",
"EventName": "PM_MRK_LARX_FIN",
"BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "4C010",
+ "EventCode": "0x4C010",
"EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
},
{
- "EventCode": "4D01E",
+ "EventCode": "0x4D01E",
"EventName": "PM_DISP_STALL_BR_MPRED",
"BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
},
{
- "EventCode": "4E010",
+ "EventCode": "0x4E010",
"EventName": "PM_DISP_STALL_IC_L3MISS",
"BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
},
{
- "EventCode": "4E01A",
+ "EventCode": "0x4E01A",
"EventName": "PM_DISP_STALL_HELD_CYC",
"BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
},
{
- "EventCode": "44056",
+ "EventCode": "0x4003C",
+ "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+ },
+ {
+ "EventCode": "0x44056",
"EventName": "PM_VECTOR_ST_CMPL",
"BriefDescription": "Vector store instructions completed."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
index 016d8de0e14a..b5a0d6521963 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/locks.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
@@ -1,11 +1,11 @@
[
{
- "EventCode": "1E058",
+ "EventCode": "0x1E058",
"EventName": "PM_STCX_FAIL_FIN",
"BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "4E050",
+ "EventCode": "0x4E050",
"EventName": "PM_STCX_PASS_FIN",
"BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
index 93a5a5910648..58b5dfe3a273 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
@@ -1,146 +1,141 @@
[
{
- "EventCode": "1002C",
+ "EventCode": "0x1002C",
"EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
"BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
},
{
- "EventCode": "10132",
+ "EventCode": "0x10132",
"EventName": "PM_MRK_INST_ISSUED",
"BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
},
{
- "EventCode": "101E0",
+ "EventCode": "0x101E0",
"EventName": "PM_MRK_INST_DISP",
"BriefDescription": "The thread has dispatched a randomly sampled marked instruction."
},
{
- "EventCode": "101E2",
+ "EventCode": "0x101E2",
"EventName": "PM_MRK_BR_TAKEN_CMPL",
"BriefDescription": "Marked Branch Taken instruction completed."
},
{
- "EventCode": "20112",
+ "EventCode": "0x20112",
"EventName": "PM_MRK_NTF_FIN",
"BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
},
{
- "EventCode": "2C01C",
+ "EventCode": "0x2C01C",
"EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
},
{
- "EventCode": "20138",
+ "EventCode": "0x20138",
"EventName": "PM_MRK_ST_NEST",
"BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2."
},
{
- "EventCode": "2013A",
+ "EventCode": "0x2013A",
"EventName": "PM_MRK_BRU_FIN",
"BriefDescription": "Marked Branch instruction finished."
},
{
- "EventCode": "2C144",
+ "EventCode": "0x2C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]."
},
{
- "EventCode": "24156",
+ "EventCode": "0x24156",
"EventName": "PM_MRK_STCX_FIN",
"BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "24158",
+ "EventCode": "0x24158",
"EventName": "PM_MRK_INST",
"BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens."
},
{
- "EventCode": "2415C",
+ "EventCode": "0x2415C",
"EventName": "PM_MRK_BR_CMPL",
"BriefDescription": "A marked branch completed. All branches are included."
},
{
- "EventCode": "200FD",
+ "EventCode": "0x200FD",
"EventName": "PM_L1_ICACHE_MISS",
"BriefDescription": "Demand iCache Miss."
},
{
- "EventCode": "30130",
+ "EventCode": "0x30130",
"EventName": "PM_MRK_INST_FIN",
"BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
},
{
- "EventCode": "34146",
+ "EventCode": "0x34146",
"EventName": "PM_MRK_LD_CMPL",
"BriefDescription": "Marked loads completed."
},
{
- "EventCode": "3E158",
+ "EventCode": "0x3E158",
"EventName": "PM_MRK_STCX_FAIL",
"BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "3E15A",
+ "EventCode": "0x3E15A",
"EventName": "PM_MRK_ST_FIN",
"BriefDescription": "The marked instruction was a store of any kind."
},
{
- "EventCode": "30068",
+ "EventCode": "0x30068",
"EventName": "PM_L1_ICACHE_RELOADED_PREF",
"BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
},
{
- "EventCode": "301E4",
+ "EventCode": "0x301E4",
"EventName": "PM_MRK_BR_MPRED_CMPL",
"BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
},
{
- "EventCode": "300F6",
+ "EventCode": "0x300F6",
"EventName": "PM_LD_DEMAND_MISS_L1",
"BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
},
{
- "EventCode": "300FE",
+ "EventCode": "0x300FE",
"EventName": "PM_DATA_FROM_L3MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
},
{
- "EventCode": "40012",
+ "EventCode": "0x40012",
"EventName": "PM_L1_ICACHE_RELOADED_ALL",
"BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
},
{
- "EventCode": "40134",
+ "EventCode": "0x40134",
"EventName": "PM_MRK_INST_TIMEO",
"BriefDescription": "Marked instruction finish timeout (instruction was lost)."
},
{
- "EventCode": "4003C",
- "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
- },
- {
- "EventCode": "4505A",
+ "EventCode": "0x4505A",
"EventName": "PM_SP_FLOP_CMPL",
"BriefDescription": "Single Precision floating point instructions completed."
},
{
- "EventCode": "4D058",
+ "EventCode": "0x4D058",
"EventName": "PM_VECTOR_FLOP_CMPL",
"BriefDescription": "Vector floating point instructions completed."
},
{
- "EventCode": "4D05A",
+ "EventCode": "0x4D05A",
"EventName": "PM_NON_MATH_FLOP_CMPL",
"BriefDescription": "Non Math instructions completed."
},
{
- "EventCode": "401E0",
+ "EventCode": "0x401E0",
"EventName": "PM_MRK_INST_CMPL",
"BriefDescription": "marked instruction completed."
},
{
- "EventCode": "400FE",
+ "EventCode": "0x400FE",
"EventName": "PM_DATA_FROM_MEMORY",
"BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
index b01141eeebee..843b51f531e9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
@@ -1,191 +1,186 @@
[
{
- "EventCode": "1000A",
+ "EventCode": "0x1000A",
"EventName": "PM_PMC3_REWIND",
"BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
},
{
- "EventCode": "1C040",
+ "EventCode": "0x1C040",
"EventName": "PM_XFER_FROM_SRC_PMC1",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "1C142",
+ "EventCode": "0x1C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "1C144",
+ "EventCode": "0x1C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
},
{
- "EventCode": "1C056",
+ "EventCode": "0x1C056",
"EventName": "PM_DERAT_MISS_4K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "1C058",
+ "EventCode": "0x1C058",
"EventName": "PM_DTLB_MISS_16G",
"BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "1C05C",
+ "EventCode": "0x1C05C",
"EventName": "PM_DTLB_MISS_2M",
"BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "1E056",
+ "EventCode": "0x1E056",
"EventName": "PM_EXEC_STALL_STORE_PIPE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
},
{
- "EventCode": "1F150",
+ "EventCode": "0x1F150",
"EventName": "PM_MRK_ST_L2_CYC",
"BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
},
{
- "EventCode": "10062",
+ "EventCode": "0x10062",
"EventName": "PM_LD_L3MISS_PEND_CYC",
"BriefDescription": "Cycles L3 miss was pending for this thread."
},
{
- "EventCode": "20010",
+ "EventCode": "0x20010",
"EventName": "PM_PMC1_OVERFLOW",
"BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
},
{
- "EventCode": "2001A",
+ "EventCode": "0x2001A",
"EventName": "PM_ITLB_HIT",
"BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "2003E",
+ "EventCode": "0x2003E",
"EventName": "PM_PTESYNC_FIN",
"BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time."
},
{
- "EventCode": "2C040",
+ "EventCode": "0x2C040",
"EventName": "PM_XFER_FROM_SRC_PMC2",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "2C054",
+ "EventCode": "0x2C054",
"EventName": "PM_DERAT_MISS_64K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "2C056",
+ "EventCode": "0x2C056",
"EventName": "PM_DTLB_MISS_4K",
"BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "2D154",
+ "EventCode": "0x2D154",
"EventName": "PM_MRK_DERAT_MISS_64K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "200F6",
+ "EventCode": "0x200F6",
"EventName": "PM_DERAT_MISS",
"BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "3000A",
- "EventName": "PM_DISP_STALL_ITLB_MISS",
- "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
- },
- {
- "EventCode": "30016",
+ "EventCode": "0x30016",
"EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
},
{
- "EventCode": "3C040",
+ "EventCode": "0x3C040",
"EventName": "PM_XFER_FROM_SRC_PMC3",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "3C142",
+ "EventCode": "0x3C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "3C144",
+ "EventCode": "0x3C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
},
{
- "EventCode": "3C054",
+ "EventCode": "0x3C054",
"EventName": "PM_DERAT_MISS_16M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "3C056",
+ "EventCode": "0x3C056",
"EventName": "PM_DTLB_MISS_64K",
"BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "3C058",
+ "EventCode": "0x3C058",
"EventName": "PM_LARX_FIN",
"BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "301E2",
+ "EventCode": "0x301E2",
"EventName": "PM_MRK_ST_CMPL",
"BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
},
{
- "EventCode": "300FC",
+ "EventCode": "0x300FC",
"EventName": "PM_DTLB_MISS",
"BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
},
{
- "EventCode": "4D02C",
+ "EventCode": "0x4D02C",
"EventName": "PM_PMC1_REWIND",
"BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
},
{
- "EventCode": "4003E",
+ "EventCode": "0x4003E",
"EventName": "PM_LD_CMPL",
"BriefDescription": "Loads completed."
},
{
- "EventCode": "4C040",
+ "EventCode": "0x4C040",
"EventName": "PM_XFER_FROM_SRC_PMC4",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "4C142",
+ "EventCode": "0x4C142",
"EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
"BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "4C144",
+ "EventCode": "0x4C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
},
{
- "EventCode": "4C056",
+ "EventCode": "0x4C056",
"EventName": "PM_DTLB_MISS_16M",
"BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "4C05A",
+ "EventCode": "0x4C05A",
"EventName": "PM_DTLB_MISS_1G",
"BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "4C15E",
+ "EventCode": "0x4C15E",
"EventName": "PM_MRK_DTLB_MISS_64K",
"BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "4D056",
+ "EventCode": "0x4D056",
"EventName": "PM_NON_FMA_FLOP_CMPL",
"BriefDescription": "Non FMA instruction completed."
},
{
- "EventCode": "40164",
+ "EventCode": "0x40164",
"EventName": "PM_MRK_DERAT_MISS_2M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
index a119e56cbf1c..7d0de1a2860b 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
@@ -1,296 +1,271 @@
[
{
- "EventCode": "10016",
+ "EventCode": "0x10016",
"EventName": "PM_VSU0_ISSUE",
"BriefDescription": "VSU instructions issued to VSU pipe 0."
},
{
- "EventCode": "1001C",
+ "EventCode": "0x1001C",
"EventName": "PM_ULTRAVISOR_INST_CMPL",
"BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
},
{
- "EventCode": "100F0",
+ "EventCode": "0x100F0",
"EventName": "PM_CYC",
"BriefDescription": "Processor cycles."
},
{
- "EventCode": "10134",
+ "EventCode": "0x10134",
"EventName": "PM_MRK_ST_DONE_L2",
"BriefDescription": "Marked stores completed in L2 (RC machine done)."
},
{
- "EventCode": "1505E",
+ "EventCode": "0x1505E",
"EventName": "PM_LD_HIT_L1",
"BriefDescription": "Loads that finished without experiencing an L1 miss."
},
{
- "EventCode": "1D05E",
- "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
- },
- {
- "EventCode": "1E054",
- "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
- },
- {
- "EventCode": "1E05A",
- "EventName": "PM_CMPL_STALL_LWSYNC",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
- },
- {
- "EventCode": "1F056",
+ "EventCode": "0x1F056",
"EventName": "PM_DISP_SS0_2_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
},
{
- "EventCode": "1F15C",
+ "EventCode": "0x1F15C",
"EventName": "PM_MRK_STCX_L2_CYC",
"BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
},
{
- "EventCode": "10066",
+ "EventCode": "0x10066",
"EventName": "PM_ADJUNCT_CYC",
"BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
},
{
- "EventCode": "101E4",
+ "EventCode": "0x101E4",
"EventName": "PM_MRK_L1_ICACHE_MISS",
"BriefDescription": "Marked Instruction suffered an icache Miss."
},
{
- "EventCode": "101EA",
+ "EventCode": "0x101EA",
"EventName": "PM_MRK_L1_RELOAD_VALID",
"BriefDescription": "Marked demand reload."
},
{
- "EventCode": "100F4",
+ "EventCode": "0x100F4",
"EventName": "PM_FLOP_CMPL",
"BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
},
{
- "EventCode": "100FA",
+ "EventCode": "0x100FA",
"EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
"BriefDescription": "Cycles when at least one thread has the run latch set."
},
{
- "EventCode": "100FC",
+ "EventCode": "0x100FC",
"EventName": "PM_LD_REF_L1",
"BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
},
{
- "EventCode": "20006",
- "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
- },
- {
- "EventCode": "2000C",
+ "EventCode": "0x2000C",
"EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
"BriefDescription": "Cycles when the run latch is set for all threads."
},
{
- "EventCode": "2E010",
+ "EventCode": "0x2E010",
"EventName": "PM_ADJUNCT_INST_CMPL",
"BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
},
{
- "EventCode": "2E014",
+ "EventCode": "0x2E014",
"EventName": "PM_STCX_FIN",
"BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "20130",
+ "EventCode": "0x20130",
"EventName": "PM_MRK_INST_DECODED",
"BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
},
{
- "EventCode": "20132",
+ "EventCode": "0x20132",
"EventName": "PM_MRK_DFU_ISSUE",
"BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
},
{
- "EventCode": "20134",
+ "EventCode": "0x20134",
"EventName": "PM_MRK_FXU_ISSUE",
"BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
},
{
- "EventCode": "2505C",
+ "EventCode": "0x2505C",
"EventName": "PM_VSU_ISSUE",
"BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
},
{
- "EventCode": "2F054",
+ "EventCode": "0x2F054",
"EventName": "PM_DISP_SS1_2_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
},
{
- "EventCode": "2F056",
+ "EventCode": "0x2F056",
"EventName": "PM_DISP_SS1_4_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
},
{
- "EventCode": "2006C",
+ "EventCode": "0x2006C",
"EventName": "PM_RUN_CYC_SMT4_MODE",
"BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
},
{
- "EventCode": "201E0",
+ "EventCode": "0x201E0",
"EventName": "PM_MRK_DATA_FROM_MEMORY",
"BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
},
{
- "EventCode": "201E4",
+ "EventCode": "0x201E4",
"EventName": "PM_MRK_DATA_FROM_L3MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
},
{
- "EventCode": "201E8",
+ "EventCode": "0x201E8",
"EventName": "PM_THRESH_EXC_512",
"BriefDescription": "Threshold counter exceeded a value of 512."
},
{
- "EventCode": "200F2",
+ "EventCode": "0x200F2",
"EventName": "PM_INST_DISP",
"BriefDescription": "PowerPC instructions dispatched."
},
{
- "EventCode": "30132",
+ "EventCode": "0x30132",
"EventName": "PM_MRK_VSU_FIN",
"BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
},
{
- "EventCode": "30038",
+ "EventCode": "0x30038",
"EventName": "PM_EXEC_STALL_DMISS_LMEM",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
},
{
- "EventCode": "3F04A",
+ "EventCode": "0x3F04A",
"EventName": "PM_LSU_ST5_FIN",
"BriefDescription": "LSU Finished an internal operation in ST2 port."
},
{
- "EventCode": "34054",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
- },
- {
- "EventCode": "3405A",
+ "EventCode": "0x3405A",
"EventName": "PM_PRIVILEGED_INST_CMPL",
"BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
},
{
- "EventCode": "3F150",
+ "EventCode": "0x3F150",
"EventName": "PM_MRK_ST_DRAIN_CYC",
"BriefDescription": "cycles to drain st from core to L2."
},
{
- "EventCode": "3F054",
+ "EventCode": "0x3F054",
"EventName": "PM_DISP_SS0_4_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions."
},
{
- "EventCode": "3F056",
+ "EventCode": "0x3F056",
"EventName": "PM_DISP_SS0_8_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
},
{
- "EventCode": "30162",
+ "EventCode": "0x30162",
"EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
"BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
},
{
- "EventCode": "40114",
+ "EventCode": "0x40114",
"EventName": "PM_MRK_START_PROBE_NOP_DISP",
"BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
},
{
- "EventCode": "4001C",
+ "EventCode": "0x4001C",
"EventName": "PM_VSU_FIN",
"BriefDescription": "VSU instructions finished."
},
{
- "EventCode": "4C01A",
+ "EventCode": "0x4C01A",
"EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
},
{
- "EventCode": "4D012",
+ "EventCode": "0x4D012",
"EventName": "PM_PMC3_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
},
{
- "EventCode": "4D022",
+ "EventCode": "0x4D022",
"EventName": "PM_HYPERVISOR_INST_CMPL",
"BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
},
{
- "EventCode": "4D026",
+ "EventCode": "0x4D026",
"EventName": "PM_ULTRAVISOR_CYC",
"BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
},
{
- "EventCode": "4D028",
+ "EventCode": "0x4D028",
"EventName": "PM_PRIVILEGED_CYC",
"BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
},
{
- "EventCode": "40030",
+ "EventCode": "0x40030",
"EventName": "PM_INST_FIN",
"BriefDescription": "Instructions finished."
},
{
- "EventCode": "44146",
+ "EventCode": "0x44146",
"EventName": "PM_MRK_STCX_CORE_CYC",
"BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
},
{
- "EventCode": "44054",
+ "EventCode": "0x44054",
"EventName": "PM_VECTOR_LD_CMPL",
"BriefDescription": "Vector load instructions completed."
},
{
- "EventCode": "45054",
+ "EventCode": "0x45054",
"EventName": "PM_FMA_CMPL",
"BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
},
{
- "EventCode": "45056",
+ "EventCode": "0x45056",
"EventName": "PM_SCALAR_FLOP_CMPL",
"BriefDescription": "Scalar floating point instructions completed."
},
{
- "EventCode": "4505C",
+ "EventCode": "0x4505C",
"EventName": "PM_MATH_FLOP_CMPL",
"BriefDescription": "Math floating point instructions completed."
},
{
- "EventCode": "4D05E",
+ "EventCode": "0x4D05E",
"EventName": "PM_BR_CMPL",
"BriefDescription": "A branch completed. All branches are included."
},
{
- "EventCode": "4E15E",
+ "EventCode": "0x4E15E",
"EventName": "PM_MRK_INST_FLUSHED",
"BriefDescription": "The marked instruction was flushed."
},
{
- "EventCode": "401E6",
+ "EventCode": "0x401E6",
"EventName": "PM_MRK_INST_FROM_L3MISS",
"BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
},
{
- "EventCode": "401E8",
+ "EventCode": "0x401E8",
"EventName": "PM_MRK_DATA_FROM_L2MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
},
{
- "EventCode": "400F0",
+ "EventCode": "0x400F0",
"EventName": "PM_LD_DEMAND_MISS_L1_FIN",
"BriefDescription": "Load Missed L1, counted at finish time."
},
{
- "EventCode": "400FA",
+ "EventCode": "0x400FA",
"EventName": "PM_RUN_INST_CMPL",
"BriefDescription": "Completed PowerPC instructions gated by the run latch."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
index b61b5cc157ee..b8aded6045fa 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
@@ -1,296 +1,291 @@
[
{
- "EventCode": "100FE",
+ "EventCode": "0x100FE",
"EventName": "PM_INST_CMPL",
"BriefDescription": "PowerPC instructions completed."
},
{
- "EventCode": "10006",
- "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
- },
- {
- "EventCode": "1000C",
+ "EventCode": "0x1000C",
"EventName": "PM_LSU_LD0_FIN",
"BriefDescription": "LSU Finished an internal operation in LD0 port."
},
{
- "EventCode": "1000E",
+ "EventCode": "0x1000E",
"EventName": "PM_MMA_ISSUED",
"BriefDescription": "MMA instructions issued."
},
{
- "EventCode": "10012",
+ "EventCode": "0x10012",
"EventName": "PM_LSU_ST0_FIN",
"BriefDescription": "LSU Finished an internal operation in ST0 port."
},
{
- "EventCode": "10014",
+ "EventCode": "0x10014",
"EventName": "PM_LSU_ST4_FIN",
"BriefDescription": "LSU Finished an internal operation in ST4 port."
},
{
- "EventCode": "10018",
+ "EventCode": "0x10018",
"EventName": "PM_IC_DEMAND_CYC",
"BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
},
{
- "EventCode": "10022",
+ "EventCode": "0x10022",
"EventName": "PM_PMC2_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
},
{
- "EventCode": "10024",
+ "EventCode": "0x10024",
"EventName": "PM_PMC5_OVERFLOW",
"BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
},
{
- "EventCode": "10058",
+ "EventCode": "0x10058",
"EventName": "PM_EXEC_STALL_FIN_AT_DISP",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU."
},
{
- "EventCode": "1005A",
+ "EventCode": "0x1005A",
"EventName": "PM_FLUSH_MPRED",
"BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction."
},
{
- "EventCode": "1C05A",
+ "EventCode": "0x1C05A",
"EventName": "PM_DERAT_MISS_2M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "10064",
- "EventName": "PM_DISP_STALL_IC_L2",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+ "EventCode": "0x1E05A",
+ "EventName": "PM_CMPL_STALL_LWSYNC",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
},
{
- "EventCode": "10068",
+ "EventCode": "0x10068",
"EventName": "PM_BR_FIN",
"BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional."
},
{
- "EventCode": "1006A",
+ "EventCode": "0x1006A",
"EventName": "PM_FX_LSU_FIN",
"BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
},
{
- "EventCode": "1006C",
+ "EventCode": "0x1006C",
"EventName": "PM_RUN_CYC_ST_MODE",
"BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
},
{
- "EventCode": "20004",
+ "EventCode": "0x20004",
"EventName": "PM_ISSUE_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
},
{
- "EventCode": "2000A",
+ "EventCode": "0x2000A",
"EventName": "PM_HYPERVISOR_CYC",
"BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
},
{
- "EventCode": "2000E",
+ "EventCode": "0x2000E",
"EventName": "PM_LSU_LD1_FIN",
"BriefDescription": "LSU Finished an internal operation in LD1 port."
},
{
- "EventCode": "2C014",
+ "EventCode": "0x2C014",
"EventName": "PM_CMPL_STALL_SPECIAL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
},
{
- "EventCode": "2C018",
+ "EventCode": "0x2C018",
"EventName": "PM_EXEC_STALL_DMISS_L3MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
},
{
- "EventCode": "2D010",
+ "EventCode": "0x2D010",
"EventName": "PM_LSU_ST1_FIN",
"BriefDescription": "LSU Finished an internal operation in ST1 port."
},
{
- "EventCode": "2D012",
+ "EventCode": "0x2D012",
"EventName": "PM_VSU1_ISSUE",
"BriefDescription": "VSU instructions issued to VSU pipe 1."
},
{
- "EventCode": "2D018",
+ "EventCode": "0x2D018",
"EventName": "PM_EXEC_STALL_VSU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
},
{
- "EventCode": "2E01E",
+ "EventCode": "0x2D01C",
+ "EventName": "PM_CMPL_STALL_STCX",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
+ },
+ {
+ "EventCode": "0x2E01E",
"EventName": "PM_EXEC_STALL_NTC_FLUSH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
},
{
- "EventCode": "2013C",
+ "EventCode": "0x2013C",
"EventName": "PM_MRK_FX_LSU_FIN",
"BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
},
{
- "EventCode": "2405A",
+ "EventCode": "0x2405A",
"EventName": "PM_NTC_FIN",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
},
{
- "EventCode": "201E2",
+ "EventCode": "0x201E2",
"EventName": "PM_MRK_LD_MISS_L1",
"BriefDescription": "Marked DL1 Demand Miss counted at finish time."
},
{
- "EventCode": "200F4",
+ "EventCode": "0x200F4",
"EventName": "PM_RUN_CYC",
"BriefDescription": "Processor cycles gated by the run latch."
},
{
- "EventCode": "30004",
- "EventName": "PM_DISP_STALL_FLUSH",
- "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
- },
- {
- "EventCode": "30008",
+ "EventCode": "0x30008",
"EventName": "PM_EXEC_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
},
{
- "EventCode": "3001A",
+ "EventCode": "0x3001A",
"EventName": "PM_LSU_ST2_FIN",
"BriefDescription": "LSU Finished an internal operation in ST2 port."
},
{
- "EventCode": "30020",
+ "EventCode": "0x30020",
"EventName": "PM_PMC2_REWIND",
"BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
},
{
- "EventCode": "30022",
+ "EventCode": "0x30022",
"EventName": "PM_PMC4_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
},
{
- "EventCode": "30024",
+ "EventCode": "0x30024",
"EventName": "PM_PMC6_OVERFLOW",
"BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
},
{
- "EventCode": "30028",
+ "EventCode": "0x30028",
"EventName": "PM_CMPL_STALL_MEM_ECC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
},
{
- "EventCode": "30036",
+ "EventCode": "0x30036",
"EventName": "PM_EXEC_STALL_SIMPLE_FX",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
},
{
- "EventCode": "3003A",
+ "EventCode": "0x3003A",
"EventName": "PM_CMPL_STALL_EXCEPTION",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete."
},
{
- "EventCode": "3F044",
+ "EventCode": "0x3F044",
"EventName": "PM_VSU2_ISSUE",
"BriefDescription": "VSU instructions issued to VSU pipe 2."
},
{
- "EventCode": "30058",
+ "EventCode": "0x30058",
"EventName": "PM_TLBIE_FIN",
"BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
},
{
- "EventCode": "3D058",
+ "EventCode": "0x3D058",
"EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
"BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
},
{
- "EventCode": "30066",
+ "EventCode": "0x30066",
"EventName": "PM_LSU_FIN",
"BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)."
},
{
- "EventCode": "40004",
+ "EventCode": "0x40004",
"EventName": "PM_FXU_ISSUE",
"BriefDescription": "A fixed point instruction was issued to the VSU."
},
{
- "EventCode": "40008",
+ "EventCode": "0x40008",
"EventName": "PM_NTC_ALL_FIN",
"BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
},
{
- "EventCode": "40010",
+ "EventCode": "0x40010",
"EventName": "PM_PMC3_OVERFLOW",
"BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
},
{
- "EventCode": "4C012",
+ "EventCode": "0x4C012",
"EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
},
{
- "EventCode": "4C018",
+ "EventCode": "0x4C018",
"EventName": "PM_CMPL_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
},
{
- "EventCode": "4C01E",
+ "EventCode": "0x4C01E",
"EventName": "PM_LSU_ST3_FIN",
"BriefDescription": "LSU Finished an internal operation in ST3 port."
},
{
- "EventCode": "4D018",
+ "EventCode": "0x4D018",
"EventName": "PM_EXEC_STALL_BRU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit."
},
{
- "EventCode": "4D01A",
+ "EventCode": "0x4D01A",
"EventName": "PM_CMPL_STALL_HWSYNC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing."
},
{
- "EventCode": "4D01C",
+ "EventCode": "0x4D01C",
"EventName": "PM_EXEC_STALL_TLBIEL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
},
{
- "EventCode": "4E012",
+ "EventCode": "0x4E012",
"EventName": "PM_EXEC_STALL_UNKNOWN",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
},
{
- "EventCode": "4D020",
+ "EventCode": "0x4D020",
"EventName": "PM_VSU3_ISSUE",
"BriefDescription": "VSU instruction was issued to VSU pipe 3."
},
{
- "EventCode": "40132",
+ "EventCode": "0x40132",
"EventName": "PM_MRK_LSU_FIN",
"BriefDescription": "LSU marked instruction finish."
},
{
- "EventCode": "45058",
+ "EventCode": "0x45058",
"EventName": "PM_IC_MISS_CMPL",
"BriefDescription": "Non-speculative icache miss, counted at completion."
},
{
- "EventCode": "4D050",
+ "EventCode": "0x4D050",
"EventName": "PM_VSU_NON_FLOP_CMPL",
"BriefDescription": "Non-floating point VSU instructions completed."
},
{
- "EventCode": "4D052",
+ "EventCode": "0x4D052",
"EventName": "PM_2FLOP_CMPL",
"BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
},
{
- "EventCode": "400F2",
+ "EventCode": "0x400F2",
"EventName": "PM_1PLUS_PPC_DISP",
"BriefDescription": "Cycles at least one Instr Dispatched."
},
{
- "EventCode": "400F8",
+ "EventCode": "0x400F8",
"EventName": "PM_FLUSH",
"BriefDescription": "Flush (any type)."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
index ea122a91ceb0..b5d1bd39cfb2 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
@@ -1,21 +1,21 @@
[
{
- "EventCode": "301E8",
+ "EventCode": "0x301E8",
"EventName": "PM_THRESH_EXC_64",
"BriefDescription": "Threshold counter exceeded a value of 64."
},
{
- "EventCode": "45050",
+ "EventCode": "0x45050",
"EventName": "PM_1FLOP_CMPL",
"BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
},
{
- "EventCode": "45052",
+ "EventCode": "0x45052",
"EventName": "PM_4FLOP_CMPL",
"BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
},
{
- "EventCode": "4D054",
+ "EventCode": "0x4D054",
"EventName": "PM_8FLOP_CMPL",
"BriefDescription": "Four Double Precision vector instructions completed."
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
index 5a714e3dd71a..db3766dca07c 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
@@ -1,56 +1,56 @@
[
{
- "EventCode": "1F15E",
+ "EventCode": "0x1F15E",
"EventName": "PM_MRK_START_PROBE_NOP_CMPL",
"BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
},
{
- "EventCode": "20016",
+ "EventCode": "0x20016",
"EventName": "PM_ST_FIN",
"BriefDescription": "Store finish count. Includes speculative activity."
},
{
- "EventCode": "20018",
+ "EventCode": "0x20018",
"EventName": "PM_ST_FWD",
"BriefDescription": "Store forwards that finished."
},
{
- "EventCode": "2011C",
+ "EventCode": "0x2011C",
"EventName": "PM_MRK_NTF_CYC",
"BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
},
{
- "EventCode": "2E01C",
+ "EventCode": "0x2E01C",
"EventName": "PM_EXEC_STALL_TLBIE",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
},
{
- "EventCode": "201E6",
+ "EventCode": "0x201E6",
"EventName": "PM_THRESH_EXC_32",
"BriefDescription": "Threshold counter exceeded a value of 32."
},
{
- "EventCode": "200F0",
+ "EventCode": "0x200F0",
"EventName": "PM_ST_CMPL",
"BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)."
},
{
- "EventCode": "200FE",
+ "EventCode": "0x200FE",
"EventName": "PM_DATA_FROM_L2MISS",
"BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
},
{
- "EventCode": "30010",
+ "EventCode": "0x30010",
"EventName": "PM_PMC2_OVERFLOW",
"BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
},
{
- "EventCode": "4D010",
+ "EventCode": "0x4D010",
"EventName": "PM_PMC1_SAVED",
"BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
},
{
- "EventCode": "4D05C",
+ "EventCode": "0x4D05C",
"EventName": "PM_DPP_FLOP_CMPL",
"BriefDescription": "Double-Precision or Quad-Precision instructions completed."
}
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 7422b0ea8790..9604446f8360 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -960,7 +960,7 @@ static int get_maxfds(void)
struct rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
- return min((int)rlim.rlim_max / 2, 512);
+ return min(rlim.rlim_max / 2, (rlim_t)512);
return 512;
}
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 7daa8bb70a5a..711d4f9f5645 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -91,6 +91,11 @@
from __future__ import print_function
import sys
+# Only change warnings if the python -W option was not used
+if not sys.warnoptions:
+ import warnings
+ # PySide2 causes deprecation warnings, ignore them.
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
import argparse
import weakref
import threading
@@ -125,8 +130,9 @@ if pyside_version_1:
from PySide.QtGui import *
from PySide.QtSql import *
-from decimal import *
-from ctypes import *
+from decimal import Decimal, ROUND_HALF_UP
+from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
+ c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
from multiprocessing import Process, Array, Value, Event
# xrange is range in Python3
@@ -3868,7 +3874,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
if with_hdr:
model = indexes[0].model()
for col in range(min_col, max_col + 1):
- val = model.headerData(col, Qt.Horizontal)
+ val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
if as_csv:
text += sep + ToCSValue(val)
sep = ","
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 4a7b8deef3fd..8c10955eff93 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -16,7 +16,7 @@ pinned=0
exclusive=0
exclude_user=0
exclude_kernel=0|1
-exclude_hv=0
+exclude_hv=0|1
exclude_idle=0
mmap=1
comm=1
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
index 76a53126efdf..d4b0ef74defc 100644
--- a/tools/perf/tests/pfm.c
+++ b/tools/perf/tests/pfm.c
@@ -131,8 +131,8 @@ static int test__pfm_group(void)
},
{
.events = "{},{instructions}",
- .nr_events = 0,
- .nr_groups = 0,
+ .nr_events = 1,
+ .nr_groups = 1,
},
{
.events = "{instructions},{instructions}",
diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
index 22eb31e48ca7..2f9948b3d943 100755
--- a/tools/perf/tests/shell/stat_bpf_counters.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters.sh
@@ -11,9 +11,9 @@ compare_number()
second_num=$2
# upper bound is first_num * 110%
- upper=$(( $first_num + $first_num / 10 ))
+ upper=$(expr $first_num + $first_num / 10 )
# lower bound is first_num * 90%
- lower=$(( $first_num - $first_num / 10 ))
+ lower=$(expr $first_num - $first_num / 10 )
if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
echo "The difference between $first_num and $second_num are greater than 10%."
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index b8fc5c53ba6f..0d8e3dcb7f88 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec);
extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
#endif /* _LINUX_SOCKET_H */
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index ddb52f748c8e..5ed674a2f55e 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -451,10 +451,10 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
goto out;
}
- err = -1;
link = bpf_program__attach(skel->progs.on_switch);
- if (!link) {
+ if (IS_ERR(link)) {
pr_err("Failed to attach leader program\n");
+ err = PTR_ERR(link);
goto out;
}
@@ -521,9 +521,10 @@ static int bperf__load(struct evsel *evsel, struct target *target)
evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
if (evsel->bperf_leader_link_fd < 0 &&
- bperf_reload_leader_program(evsel, attr_map_fd, &entry))
+ bperf_reload_leader_program(evsel, attr_map_fd, &entry)) {
+ err = -1;
goto out;
-
+ }
/*
* The bpf_link holds reference to the leader program, and the
* leader program holds reference to the maps. Therefore, if
@@ -550,6 +551,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
/* Step 2: load the follower skeleton */
evsel->follower_skel = bperf_follower_bpf__open();
if (!evsel->follower_skel) {
+ err = -1;
pr_err("Failed to open follower skeleton\n");
goto out;
}
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index b2f4920e19a6..7d2ba8419b0c 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -975,9 +975,13 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
if ((tag == DW_TAG_formal_parameter ||
tag == DW_TAG_variable) &&
die_compare_name(die_mem, fvp->name) &&
- /* Does the DIE have location information or external instance? */
+ /*
+ * Does the DIE have location information or const value
+ * or external instance?
+ */
(dwarf_attr(die_mem, DW_AT_external, &attr) ||
- dwarf_attr(die_mem, DW_AT_location, &attr)))
+ dwarf_attr(die_mem, DW_AT_location, &attr) ||
+ dwarf_attr(die_mem, DW_AT_const_value, &attr)))
return DIE_FIND_CB_END;
if (dwarf_haspc(die_mem, fvp->addr))
return DIE_FIND_CB_CONTINUE;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 9130f6fad8d5..bc5e4f294e9e 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -144,6 +144,7 @@ static void perf_env__purge_bpf(struct perf_env *env)
node = rb_entry(next, struct bpf_prog_info_node, rb_node);
next = rb_next(&node->rb_node);
rb_erase(&node->rb_node, root);
+ free(node->info_linear);
free(node);
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8a62fb39e365..19ad64f2bd83 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -100,7 +100,7 @@ enum {
PERF_IP_FLAG_VMEXIT = 1ULL << 12,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6e5c41528c7d..6ea3e677dc1e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_entry(evlist, pos)
- bpf_counter__disable(pos);
-
/* Disable 'immediate' events last */
for (imm = 0; imm <= 1; imm++) {
evlist__for_each_cpu(evlist, i, cpu) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4a3cd1b5bb33..a8d8463f8ee5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -428,6 +428,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->auto_merge_stats = orig->auto_merge_stats;
evsel->collect_stat = orig->collect_stat;
evsel->weak_group = orig->weak_group;
+ evsel->use_config_name = orig->use_config_name;
if (evsel__copy_config_terms(evsel, orig) < 0)
goto out_err;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 75cf5dbfe208..bdad52a06438 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -83,8 +83,10 @@ struct evsel {
bool collect_stat;
bool weak_group;
bool bpf_counter;
+ bool use_config_name;
int bpf_fd;
struct bpf_object *bpf_obj;
+ struct list_head config_terms;
};
/*
@@ -116,10 +118,8 @@ struct evsel {
bool merged_stat;
bool reset_group;
bool errored;
- bool use_config_name;
struct hashmap *per_pkg_mask;
struct evsel *leader;
- struct list_head config_terms;
int err;
int cpu_iter;
struct {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8c59677bee13..20ad663978cc 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
decoder->state.type = INTEL_PT_TRANSACTION;
+ if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+ decoder->state.type |= INTEL_PT_BRANCH;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags;
@@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
return 0;
if (err == -EAGAIN ||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- if (intel_pt_fup_event(decoder))
+ if (intel_pt_fup_event(decoder) && no_tip)
return 0;
return -EAGAIN;
}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 8658d42ce57a..0dfec8761b9a 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
*ip += intel_pt_insn->length;
- if (to_ip && *ip == to_ip)
+ if (to_ip && *ip == to_ip) {
+ intel_pt_insn->length = 0;
goto out_no_cache;
+ }
if (*ip >= al.map->end)
break;
@@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
+ ptq->insn_len = 0;
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
} else if (ptq->state->flags & INTEL_PT_ASYNC) {
@@ -1211,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_INTERRUPT;
- ptq->insn_len = 0;
} else {
if (ptq->state->from_ip)
ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3ff4936a15a4..da19be7da284 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -776,10 +776,10 @@ static int machine__process_ksymbol_register(struct machine *machine,
if (dso) {
dso->kernel = DSO_SPACE__KERNEL;
map = map__new2(0, dso);
+ dso__put(dso);
}
if (!dso || !map) {
- dso__put(dso);
return -ENOMEM;
}
@@ -792,6 +792,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
+ map__put(map);
dso__set_loaded(dso);
if (is_bpf_image(event->ksymbol.name)) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 8336dd8e8098..d3cf2dee36c8 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -162,10 +162,10 @@ static bool contains_event(struct evsel **metric_events, int num_events,
return false;
}
-static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2)
+static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2)
{
if (!ev1->pmu_name || !ev2->pmu_name)
- return false;
+ return true;
return !strcmp(ev1->pmu_name, ev2->pmu_name);
}
@@ -288,7 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
*/
if (!has_constraint &&
ev->leader != metric_events[i]->leader &&
- evsel_same_pmu(ev->leader, metric_events[i]->leader))
+ evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader))
break;
if (!strcmp(metric_events[i]->name, ev->name)) {
set_bit(ev->idx, evlist_used);
@@ -1073,16 +1073,18 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids);
if (ret)
- return ret;
+ goto out;
ret = resolve_metric(d->metric_no_group,
d->metric_list, NULL, d->ids);
if (ret)
- return ret;
+ goto out;
*(d->has_match) = true;
- return *d->ret;
+out:
+ *(d->ret) = ret;
+ return ret;
}
static int metricgroup__add_metric(const char *metric, bool metric_no_group,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4dad14265b81..84108c17f48d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
.symbol = "bpf-output",
.alias = "",
},
+ [PERF_COUNT_SW_CGROUP_SWITCHES] = {
+ .symbol = "cgroup-switches",
+ .alias = "",
+ },
};
#define __PERF_EVENT_FIELD(config, name) \
@@ -2928,9 +2932,14 @@ restart:
}
for (i = 0; i < max; i++, syms++) {
+ /*
+ * New attr.config still not supported here, the latest
+ * example was PERF_COUNT_SW_CGROUP_SWITCHES
+ */
+ if (syms->symbol == NULL)
+ continue;
- if (event_glob != NULL && syms->symbol != NULL &&
- !(strglobmatch(syms->symbol, event_glob) ||
+ if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
(syms->alias && strglobmatch(syms->alias, event_glob))))
continue;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index fb8646cc3e83..923849024b15 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
/*
* We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 829af17a0867..020411682a3c 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -103,6 +103,11 @@ static void perf_probe_build_id(struct evsel *evsel)
evsel->core.attr.build_id = 1;
}
+static void perf_probe_cgroup(struct evsel *evsel)
+{
+ evsel->core.attr.cgroup = 1;
+}
+
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
@@ -182,3 +187,8 @@ bool perf_can_record_build_id(void)
{
return perf_probe_api(perf_probe_build_id);
}
+
+bool perf_can_record_cgroup(void)
+{
+ return perf_probe_api(perf_probe_cgroup);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index f12ca55f509a..b104168efb15 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -12,5 +12,6 @@ bool perf_can_record_switch_events(void);
bool perf_can_record_text_poke_events(void);
bool perf_can_sample_identifier(void);
bool perf_can_record_build_id(void);
+bool perf_can_record_cgroup(void);
#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index d735acb6c29c..6eef6dfeaa57 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
}
/* no event */
- if (*q == '\0')
+ if (*q == '\0') {
+ if (*sep == '}') {
+ if (grp_evt < 0) {
+ ui__error("cannot close a non-existing event group\n");
+ goto error;
+ }
+ grp_evt--;
+ }
continue;
+ }
memset(&attr, 0, sizeof(attr));
event_attr_init(&attr);
@@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
grp_evt = -1;
}
}
+ free(p_orig);
return 0;
error:
free(p_orig);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 866f2d514d72..b029c29ce227 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -190,6 +190,9 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
immediate_value_is_supported()) {
Dwarf_Sword snum;
+ if (!tvar)
+ return 0;
+
dwarf_formsdata(&attr, &snum);
ret = asprintf(&tvar->value, "\\%ld", (long)snum);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 106b3d60881a..e59242c361ce 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1723,6 +1723,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
if (event->header.size < hdr_sz || event->header.size > buf_sz)
return -1;
+ buf += hdr_sz;
rest = event->header.size - hdr_sz;
if (readn(fd, buf, rest) != (ssize_t)rest)
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a76fff5e7d83..ca326f98c7a2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -541,7 +541,7 @@ static void uniquify_event_name(struct evsel *counter)
char *config;
int ret = 0;
- if (counter->uniquified_name ||
+ if (counter->uniquified_name || counter->use_config_name ||
!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
strlen(counter->pmu_name)))
return;
@@ -555,10 +555,8 @@ static void uniquify_event_name(struct evsel *counter)
}
} else {
if (perf_pmu__has_hybrid()) {
- if (!counter->use_config_name) {
- ret = asprintf(&new_name, "%s/%s/",
- counter->pmu_name, counter->name);
- }
+ ret = asprintf(&new_name, "%s/%s/",
+ counter->pmu_name, counter->name);
} else {
ret = asprintf(&new_name, "%s [%s]",
counter->name, counter->pmu_name);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4c56aa837434..a73345730ba9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -2412,6 +2412,7 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes)
list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
list_del_init(&pos->note_list);
+ zfree(&pos->args);
zfree(&pos->name);
zfree(&pos->provider);
free(pos);
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index ab940c508ef0..bf9fd3549a1d 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.9";
+static const char *version_str = "v1.10";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -106,6 +106,22 @@ int is_skx_based_platform(void)
return 0;
}
+int is_spr_platform(void)
+{
+ if (cpu_model == 0x8F)
+ return 1;
+
+ return 0;
+}
+
+int is_icx_platform(void)
+{
+ if (cpu_model == 0x6A || cpu_model == 0x6C)
+ return 1;
+
+ return 0;
+}
+
static int update_cpu_model(void)
{
unsigned int ebx, ecx, edx;
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 6a26d5769984..4431c8a0d40a 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -201,6 +201,7 @@ void isst_get_uncore_mem_freq(int cpu, int config_index,
{
unsigned int resp;
int ret;
+
ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_MEM_FREQ,
0, config_index, &resp);
if (ret) {
@@ -209,6 +210,20 @@ void isst_get_uncore_mem_freq(int cpu, int config_index,
}
ctdp_level->mem_freq = resp & GENMASK(7, 0);
+ if (is_spr_platform()) {
+ ctdp_level->mem_freq *= 200;
+ } else if (is_icx_platform()) {
+ if (ctdp_level->mem_freq < 7) {
+ ctdp_level->mem_freq = (12 - ctdp_level->mem_freq) * 133.33 * 2 * 10;
+ ctdp_level->mem_freq /= 10;
+ if (ctdp_level->mem_freq % 10 > 5)
+ ctdp_level->mem_freq++;
+ } else {
+ ctdp_level->mem_freq = 0;
+ }
+ } else {
+ ctdp_level->mem_freq = 0;
+ }
debug_printf(
"cpu:%d ctdp:%d CONFIG_TDP_GET_MEM_FREQ resp:%x uncore mem_freq:%d\n",
cpu, config_index, resp, ctdp_level->mem_freq);
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 3bf1820c0da1..f97d8859ada7 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -446,7 +446,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
if (ctdp_level->mem_freq) {
snprintf(header, sizeof(header), "mem-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
- ctdp_level->mem_freq * DISP_FREQ_MULTIPLIER);
+ ctdp_level->mem_freq);
format_and_print(outf, level + 2, header, value);
}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 0cac6c54be87..1aa15d5ea57c 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -257,5 +257,7 @@ extern int get_cpufreq_base_freq(int cpu);
extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
extern int is_skx_based_platform(void);
+extern int is_spr_platform(void);
+extern int is_icx_platform(void);
extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
#endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index f9271f3ea912..071312f5eb92 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -131,29 +131,29 @@ QUIET_SUBDIR1 =
ifneq ($(silent),1)
ifneq ($(V),1)
- QUIET_CC = @echo ' CC '$@;
- QUIET_CC_FPIC = @echo ' CC FPIC '$@;
- QUIET_CLANG = @echo ' CLANG '$@;
- QUIET_AR = @echo ' AR '$@;
- QUIET_LINK = @echo ' LINK '$@;
- QUIET_MKDIR = @echo ' MKDIR '$@;
- QUIET_GEN = @echo ' GEN '$@;
+ QUIET_CC = @echo ' CC '$@;
+ QUIET_CC_FPIC = @echo ' CC FPIC '$@;
+ QUIET_CLANG = @echo ' CLANG '$@;
+ QUIET_AR = @echo ' AR '$@;
+ QUIET_LINK = @echo ' LINK '$@;
+ QUIET_MKDIR = @echo ' MKDIR '$@;
+ QUIET_GEN = @echo ' GEN '$@;
QUIET_SUBDIR0 = +@subdir=
QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
- echo ' SUBDIR '$$subdir; \
+ echo ' SUBDIR '$$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
- QUIET_FLEX = @echo ' FLEX '$@;
- QUIET_BISON = @echo ' BISON '$@;
- QUIET_GENSKEL = @echo ' GEN-SKEL '$@;
+ QUIET_FLEX = @echo ' FLEX '$@;
+ QUIET_BISON = @echo ' BISON '$@;
+ QUIET_GENSKEL = @echo ' GENSKEL '$@;
descend = \
- +@echo ' DESCEND '$(1); \
+ +@echo ' DESCEND '$(1); \
mkdir -p $(OUTPUT)$(1) && \
$(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
- QUIET_CLEAN = @printf ' CLEAN %s\n' $1;
- QUIET_INSTALL = @printf ' INSTALL %s\n' $1;
- QUIET_UNINST = @printf ' UNINST %s\n' $1;
+ QUIET_CLEAN = @printf ' CLEAN %s\n' $1;
+ QUIET_INSTALL = @printf ' INSTALL %s\n' $1;
+ QUIET_UNINST = @printf ' UNINST %s\n' $1;
endif
endif
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc3299a20338..fb010a35d61a 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -49,6 +49,7 @@ TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
+TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
TARGETS += seccomp
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index b29cbc642c57..76e138525d55 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
ksft_set_plan(2);
if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available");
+ ksft_exit_skip("SVE not available\n");
/*
* Enumerate up to SVE_VQ_MAX vector lengths
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4866f6a21901..addcfd8b615e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf
fixdep
test_dev_cgroup
/test_progs*
+!test_progs.h
test_verifier_log
feature
test_sock
@@ -30,10 +31,13 @@ test_sysctl
xdping
test_cpp
*.skel.h
+*.lskel.h
/no_alu32
/bpf_gcc
/tools
/runqslower
/bench
*.ko
+*.tmp
xdpxceiver
+xdp_redirect_multi
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 511259c2c6c5..f405b20c1e6c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
+ test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
test_offload.py \
@@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver
+ xdpxceiver xdp_redirect_multi
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
@@ -312,6 +313,10 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
linked_vars.skel.h linked_maps.skel.h
+LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
+ test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c
+SKEL_BLACKLIST += $$(LSKELS)
+
test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
@@ -339,6 +344,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
@@ -380,6 +386,14 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
@@ -409,6 +423,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_LSKELS) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
@@ -516,6 +531,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
- $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index ccf260021e83..eb6a4fea8c79 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst
ifndef RST2MAN_DEP
$$(error "rst2man not found, but required to generate man pages")
endif
- $$(QUIET_GEN)rst2man $$< > $$@
+ $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+ $$(QUIET_GEN)mv $$@.tmp $$@
docs-clean-$1:
$$(call QUIET_CLEAN, eBPF_$1-manpage)
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3353778c30f8..8deec1ca9150 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use
Clang that contains the fix.
__ https://reviews.llvm.org/D100362
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_ made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+ libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 332ed2f7b402..6ea15b93a2f8 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -43,6 +43,7 @@ void setup_libbpf()
{
int err;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
err = bump_memlock_rlimit();
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index a967674098ad..c7ec114eca56 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index bde6c9d4cbd4..d167bffac679 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 2a0b6c9885a4..f41a491a8cc0 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 12ee40284da0..2060bc122c53 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
-static int settimeo(int fd, int timeout_ms)
+int settimeo(int fd, int timeout_ms)
{
struct timeval timeout = { .tv_sec = 3 };
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 7205f8afdba1..5e0d51c07b63 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,6 +33,7 @@ struct ipv6_packet {
} __packed;
extern struct ipv6_packet pkt_v6;
+int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
int connect_to_fd(int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 21efe7bbf10d..ba0e1efe5a45 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -2,19 +2,19 @@
#include <test_progs.h>
-#include "atomics.skel.h"
+#include "atomics.lskel.h"
static void test_add(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.add);
- if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__add__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(add)"))
return;
- prog_fd = bpf_program__fd(skel->progs.add);
+ prog_fd = skel->progs.add.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -33,20 +33,20 @@ static void test_add(struct atomics *skel)
ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_sub(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.sub);
- if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__sub__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
return;
- prog_fd = bpf_program__fd(skel->progs.sub);
+ prog_fd = skel->progs.sub.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run sub",
@@ -66,20 +66,20 @@ static void test_sub(struct atomics *skel)
ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_and(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.and);
- if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__and__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(and)"))
return;
- prog_fd = bpf_program__fd(skel->progs.and);
+ prog_fd = skel->progs.and.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run and",
@@ -94,20 +94,20 @@ static void test_and(struct atomics *skel)
ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_or(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.or);
- if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__or__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(or)"))
return;
- prog_fd = bpf_program__fd(skel->progs.or);
+ prog_fd = skel->progs.or.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run or",
@@ -123,20 +123,20 @@ static void test_or(struct atomics *skel)
ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_xor(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.xor);
- if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__xor__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
return;
- prog_fd = bpf_program__fd(skel->progs.xor);
+ prog_fd = skel->progs.xor.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run xor",
@@ -151,20 +151,20 @@ static void test_xor(struct atomics *skel)
ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_cmpxchg(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.cmpxchg);
- if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__cmpxchg__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
return;
- prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+ prog_fd = skel->progs.cmpxchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -180,20 +180,20 @@ static void test_cmpxchg(struct atomics *skel)
ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_xchg(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.xchg);
- if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__xchg__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
return;
- prog_fd = bpf_program__fd(skel->progs.xchg);
+ prog_fd = skel->progs.xchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -207,7 +207,7 @@ static void test_xchg(struct atomics *skel)
ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
void test_atomics(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 9dc4e3dfbcf3..ec11e20d2b92 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -85,16 +85,14 @@ void test_attach_probe(void)
kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
false /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link)))
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
true /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link)))
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
@@ -103,8 +101,7 @@ void test_attach_probe(void)
0 /* self pid */,
"/proc/self/exe",
uprobe_offset);
- if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link)))
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
@@ -113,8 +110,7 @@ void test_attach_probe(void)
-1 /* any pid */,
"/proc/self/exe",
uprobe_offset);
- if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link)))
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 2d3590cfb5e1..1f1aade56504 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog)
int iter_fd, len;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
int ret = 0;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return ret;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -396,7 +396,7 @@ static void test_file_iter(void)
return;
link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
/* unlink this path if it exists. */
@@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
skel->bss->map2_id = map_info.id;
link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto free_map2;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -607,14 +607,12 @@ static void test_bpf_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap2 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap3 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
/* hashmap1 should be good, update map values here */
@@ -636,7 +634,7 @@ static void test_bpf_hash_map(void)
linfo.map.map_fd = map_fd;
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -798,7 +796,7 @@ static void test_bpf_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void)
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
&opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
bpf_link__destroy(link);
bpf_iter_test_kern5__destroy(skel);
@@ -1186,8 +1184,7 @@ static void test_task_vma(void)
skel->links.proc_maps = bpf_program__attach_iter(
skel->progs.proc_maps, NULL);
- if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
- "attach iterator failed\n")) {
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
skel->links.proc_maps = NULL;
goto out;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e25917f04602..efe1e979affb 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -82,7 +82,7 @@ static void *server(void *arg)
bytes, total_bytes, nr_sent, errno);
done:
- if (fd != -1)
+ if (fd >= 0)
close(fd);
if (err) {
WRITE_ONCE(stop, 1);
@@ -191,8 +191,7 @@ static void test_cubic(void)
return;
link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_cubic__destroy(cubic_skel);
return;
}
@@ -213,8 +212,7 @@ static void test_dctcp(void)
return;
link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_dctcp__destroy(dctcp_skel);
return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 0457ae32b270..857e3f26086f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num)
always_log);
free(raw_btf);
- err = ((btf_fd == -1) != test->btf_load_err);
+ err = ((btf_fd < 0) != test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num)
goto done;
}
- if (err || btf_fd == -1)
+ if (err || btf_fd < 0)
goto done;
create_attr.name = test->map_name;
@@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num)
map_fd = bpf_create_map_xattr(&create_attr);
- err = ((map_fd == -1) != test->map_create_err);
+ err = ((map_fd < 0) != test->map_create_err);
CHECK(err, "map_fd:%d test->map_create_err:%u",
map_fd, test->map_create_err);
done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
}
@@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -3987,7 +3987,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
return err;
@@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num)
btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num)
}
btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
- if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num)
create_attr.btf_value_type_id = 2;
map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num)
/* Test BTF ID is removed from the kernel */
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num)
close(map_fd);
map_fd = -1;
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+ if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
err = -1;
goto done;
}
@@ -4117,11 +4117,11 @@ done:
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
for (i = 0; i < 2; i++) {
free(user_btf[i]);
- if (btf_fd[i] != -1)
+ if (btf_fd[i] >= 0)
close(btf_fd[i]);
}
@@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4212,7 +4212,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
}
@@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num)
return;
btf = btf__parse_elf(test->file, &btf_ext);
- if (IS_ERR(btf)) {
- if (PTR_ERR(btf) == -ENOENT) {
+ err = libbpf_get_error(btf);
+ if (err) {
+ if (err == -ENOENT) {
printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
test__skip();
return;
@@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num)
btf_ext__free(btf_ext);
obj = bpf_object__open(test->file);
- if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "obj: %d", err))
return;
prog = bpf_program__next(NULL, obj);
@@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num)
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num)
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num)
always_log);
free(raw_btf);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num)
create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num)
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
}
@@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num)
cpu, cmapv);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
cmapv = cmapv + rounded_value_size;
@@ -5036,9 +5038,9 @@ done:
fprintf(stderr, "OK");
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
if (pin_file)
fclose(pin_file);
@@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
/* get necessary lens */
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
return -1;
}
@@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+ if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
err = -1;
goto done;
}
@@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
* Only recheck the info.*line_info* fields.
* Other fields are not the concern of this test.
*/
- if (CHECK(err == -1 ||
+ if (CHECK(err < 0 ||
info.nr_line_info != cnt ||
(jited_cnt && !info.jited_line_info) ||
info.nr_jited_line_info != jited_cnt ||
@@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num)
always_log);
free(raw_btf);
- if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
err = -1;
goto done;
}
@@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num)
patched_linfo = patch_name_tbd(test->line_info,
test->str_sec, linfo_str_off,
test->str_sec_size, &linfo_size);
- if (IS_ERR(patched_linfo)) {
+ err = libbpf_get_error(patched_linfo);
+ if (err) {
fprintf(stderr, "error in creating raw bpf_line_info");
err = -1;
goto done;
@@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num)
}
prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
- err = ((prog_fd == -1) != test->expected_prog_load_failure);
+ err = ((prog_fd < 0) != test->expected_prog_load_failure);
if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
prog_fd, test->expected_prog_load_failure, errno) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num)
goto done;
}
- if (prog_fd == -1)
+ if (prog_fd < 0)
goto done;
err = test_get_finfo(test, prog_fd);
@@ -6323,12 +6326,12 @@ done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (prog_fd != -1)
+ if (prog_fd >= 0)
close(prog_fd);
- if (!IS_ERR(patched_linfo))
+ if (!libbpf_get_error(patched_linfo))
free(patched_linfo);
}
@@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num)
return;
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(test_btf);
free(raw_btf);
- if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
- PTR_ERR(test_btf))) {
+ if (CHECK(err, "invalid test_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num)
if (!raw_btf)
return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(expect_btf);
free(raw_btf);
- if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
- PTR_ERR(expect_btf))) {
+ if (CHECK(err, "invalid expect_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num)
}
done:
- if (!IS_ERR(test_btf))
- btf__free(test_btf);
- if (!IS_ERR(expect_btf))
- btf__free(expect_btf);
+ btf__free(test_btf);
+ btf__free(expect_btf);
}
void test_btf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 5e129dc2073c..1b90e684ff13 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf,
int err = 0, id;
d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
for (id = 1; id <= type_cnt; id++) {
err = btf_dump__dump_type(d, id);
@@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
snprintf(test_file, sizeof(test_file), "%s.o", t->file);
btf = btf__parse_elf(test_file, NULL);
- if (CHECK(IS_ERR(btf), "btf_parse_elf",
- "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+ if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
err = -PTR_ERR(btf);
btf = NULL;
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index f36da15b134f..022c7d89d6f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,8 +4,6 @@
#include <bpf/btf.h>
#include "btf_helpers.h"
-static int duration = 0;
-
void test_btf_write() {
const struct btf_var_secinfo *vi;
const struct btf_type *t;
@@ -16,7 +14,7 @@ void test_btf_write() {
int id, err, str_off;
btf = btf__new_empty();
- if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
return;
str_off = btf__find_str(btf, "int");
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 643dfa35419c..876be0ecb654 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_link = bpf_program__attach_cgroup(obj->progs.egress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
- "err %ld", PTR_ERR(parent_link)))
+ if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_link = bpf_program__attach_cgroup(obj->progs.egress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_link), "child-cg-attach",
- "err %ld", PTR_ERR(child_link)))
+ if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_link))
- bpf_link__destroy(parent_link);
- if (!IS_ERR(child_link))
- bpf_link__destroy(child_link);
+ bpf_link__destroy(parent_link);
+ bpf_link__destroy(child_link);
cg_storage_multi_egress_only__destroy(obj);
}
@@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_isolated__destroy(obj);
}
@@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_shared__destroy(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 0a1fc9816cef..20bb8831dda6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
prog_cnt = 2;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt) != -1);
+ prog_ids, &prog_cnt) >= 0);
CHECK_FAIL(errno != ENOSPC);
CHECK_FAIL(prog_cnt != 4);
/* check that prog_ids are returned even when buffer is too small */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 736796e56ed1..9091524131d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -65,8 +65,7 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
links[i] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[i].fd);
- if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
- i, PTR_ERR(links[i])))
+ if (!ASSERT_OK_PTR(links[i], "cg_attach"))
goto cleanup;
}
@@ -121,8 +120,7 @@ void test_cgroup_link(void)
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr + 1, 0);
@@ -147,7 +145,7 @@ void test_cgroup_link(void)
/* attempt to mix in with multi-attach bpf_link */
tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
bpf_link__destroy(tmp_link);
goto cleanup;
}
@@ -165,8 +163,7 @@ void test_cgroup_link(void)
/* attach back link-based one */
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr, 0);
@@ -249,8 +246,7 @@ cleanup:
BPF_CGROUP_INET_EGRESS);
for (i = 0; i < cg_nr; i++) {
- if (!IS_ERR(links[i]))
- bpf_link__destroy(links[i]);
+ bpf_link__destroy(links[i]);
}
test_cgroup_link__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 464edc1c1708..b9dc4ec655b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
goto cleanup;
link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
- if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "cgroup_attach"))
goto cleanup;
run_lookup_test(&skel->bss->g_serv_port, out_sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index b62a39315336..012068f33a0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void)
prog = skel->progs.xdp_use_helper_basic;
link = bpf_program__attach_xdp(prog, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto out;
skel->links.xdp_use_helper_basic = link;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 607710826dca..d02e064c535f 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
const char *name;
int i;
- if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
- CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
btf__free(local_btf);
btf__free(targ_btf);
return -EINVAL;
@@ -848,8 +847,7 @@ void test_core_reloc(void)
}
obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
- test_case->bpf_obj_file, PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
continue;
probe_name = "raw_tracepoint/sys_enter";
@@ -899,8 +897,7 @@ void test_core_reloc(void)
data->my_pid_tgid = my_pid_tgid;
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
/* trigger test run */
@@ -941,10 +938,8 @@ cleanup:
CHECK_FAIL(munmap(mmap_data, mmap_sz));
mmap_data = NULL;
}
- if (!IS_ERR_OR_NULL(link)) {
- bpf_link__destroy(link);
- link = NULL;
- }
+ bpf_link__destroy(link);
+ link = NULL;
bpf_object__close(obj);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 109d0345a2be..91154c2ba256 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
-#include "fexit_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fexit_test.lskel.h"
void test_fentry_fexit(void)
{
@@ -26,7 +26,7 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 7cb111b11995..174c89e7456e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
+#include "fentry_test.lskel.h"
static int fentry_test(struct fentry_test *fentry_skel)
{
int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
__u64 *result;
err = fentry_test__attach(fentry_skel);
@@ -15,11 +15,11 @@ static int fentry_test(struct fentry_test *fentry_skel)
return err;
/* Check that already linked program can't be attached again. */
- link = bpf_program__attach(fentry_skel->progs.test1);
- if (!ASSERT_ERR_PTR(link, "fentry_attach_link"))
+ link_fd = fentry_test__test1__attach(fentry_skel);
+ if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
return -1;
- prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+ prog_fd = fentry_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 63990842d20f..73b4c76e6b86 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
close_prog:
for (i = 0; i < prog_cnt; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
bpf_object__close(tgt_obj);
free(link);
free(prog);
@@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj)
return err;
link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
- if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+ if (!ASSERT_OK_PTR(link, "second_link"))
goto out;
err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
@@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void)
opts.attach_prog_fd = pkt_fd;
freplace_obj = bpf_object__open_file(freplace_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
- "failed to open %s: %ld\n", freplace_name,
- PTR_ERR(freplace_obj)))
+ if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
goto out;
err = bpf_object__load(freplace_obj);
@@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void)
prog = bpf_program__next(NULL, freplace_obj);
freplace_link = bpf_program__attach_trace(prog);
- if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
goto out;
opts.attach_prog_fd = bpf_program__fd(prog);
fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
- "failed to open %s: %ld\n", fmod_ret_name,
- PTR_ERR(fmod_obj)))
+ if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
goto out;
err = bpf_object__load(fmod_obj);
@@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file,
);
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
/* It should fail to load the program */
@@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file,
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_object__close(obj);
bpf_object__close(pkt_obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index ccc7e8a34ab6..4e7f4b42ea29 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -6,7 +6,7 @@
#include <time.h>
#include <sys/mman.h>
#include <sys/syscall.h>
-#include "fexit_sleep.skel.h"
+#include "fexit_sleep.lskel.h"
static int do_sleep(void *skel)
{
@@ -58,8 +58,8 @@ void test_fexit_sleep(void)
* waiting for percpu_ref_kill to confirm). The other one
* will be freed quickly.
*/
- close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
- close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+ close(fexit_skel->progs.nanosleep_fentry.prog_fd);
+ close(fexit_skel->progs.nanosleep_fexit.prog_fd);
fexit_sleep__detach(fexit_skel);
/* kill the thread to unwind sys_nanosleep stack through the trampoline */
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 6792e41f7f69..af3dba726701 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fexit_test.skel.h"
+#include "fexit_test.lskel.h"
static int fexit_test(struct fexit_test *fexit_skel)
{
int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
__u64 *result;
err = fexit_test__attach(fexit_skel);
@@ -15,11 +15,11 @@ static int fexit_test(struct fexit_test *fexit_skel)
return err;
/* Check that already linked program can't be attached again. */
- link = bpf_program__attach(fexit_skel->progs.test1);
- if (!ASSERT_ERR_PTR(link, "fexit_attach_link"))
+ link_fd = fexit_test__test1__attach(fexit_skel);
+ if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
return -1;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cd6dc80edf18..225714f71ac6 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
return;
link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
- if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_netns"))
goto out_close;
run_tests_skb_less(tap_fd, skel->maps.last_dissection);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 172c586b6996..3931ede5c534 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when another link exists */
errno = 0;
link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
perror("bpf_prog_attach(prog2) expected E2BIG");
- if (link2 != -1)
+ if (link2 >= 0)
close(link2);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when prog attached */
errno = 0;
link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ if (CHECK_FAIL(link >= 0 || errno != EEXIST))
perror("bpf_link_create(prog2) expected EEXIST");
- if (link != -1)
+ if (link >= 0)
close(link);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -623,7 +623,7 @@ static void run_tests(int netns)
}
out_close:
for (i = 0; i < ARRAY_SIZE(progs); i++) {
- if (progs[i] != -1)
+ if (progs[i] >= 0)
CHECK_FAIL(close(progs[i]));
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 925722217edf..522237aa4470 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
pb_opts.sample_cb = get_stack_print_output;
pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
/* trigger some syscall action */
@@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void)
}
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ bpf_link__destroy(link);
+ perf_buffer__free(pb);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index d884b2ed5bc5..8d5a6023a1bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
close(pmu_fd);
/* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
- "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
close(pmu_fd);
/* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
close(pmu_fd);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 428d488830c6..4747ab18f97f 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -48,8 +48,7 @@ static void test_hashmap_generic(void)
struct hashmap *map;
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
for (i = 0; i < ELEM_CNT; i++) {
@@ -267,8 +266,7 @@ static void test_hashmap_multimap(void)
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
/* set up multimap:
@@ -339,8 +337,7 @@ static void test_hashmap_empty()
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
goto cleanup;
if (CHECK(hashmap__size(map) != 0, "hashmap__size",
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index d65107919998..ddfb6bf97152 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -97,15 +97,13 @@ void test_kfree_skb(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
link_fentry = bpf_program__attach_trace(fentry);
- if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
- PTR_ERR(link_fentry)))
+ if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
goto close_prog;
link_fexit = bpf_program__attach_trace(fexit);
- if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
- PTR_ERR(link_fexit)))
+ if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
goto close_prog;
perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
@@ -116,7 +114,7 @@ void test_kfree_skb(void)
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
memcpy(skb.cb, &cb, sizeof(cb));
@@ -144,12 +142,9 @@ void test_kfree_skb(void)
CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(link_fentry))
- bpf_link__destroy(link_fentry);
- if (!IS_ERR_OR_NULL(link_fexit))
- bpf_link__destroy(link_fexit);
+ bpf_link__destroy(link);
+ bpf_link__destroy(link_fentry);
+ bpf_link__destroy(link_fexit);
bpf_object__close(obj);
bpf_object__close(obj2);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 7fc0951ee75f..30a7b9b837bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
-#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
#include "kfunc_call_test_subprog.skel.h"
static void test_main(void)
@@ -14,13 +14,13 @@ static void test_main(void)
if (!ASSERT_OK_PTR(skel, "skel"))
return;
- prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ prog_fd = skel->progs.kfunc_call_test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 12, "test1-retval");
- prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+ prog_fd = skel->progs.kfunc_call_test2.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test2)");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index b58b775d19f3..67bebd324147 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -87,8 +87,7 @@ void test_ksyms_btf(void)
struct btf *btf;
btf = libbpf_find_kernel_btf();
- if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
- PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "btf_exists"))
return;
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 4c232b456479..2cd5cded543f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -4,7 +4,7 @@
#include <test_progs.h>
#include <bpf/libbpf.h>
#include <bpf/btf.h>
-#include "test_ksyms_module.skel.h"
+#include "test_ksyms_module.lskel.h"
static int duration;
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
index a743288cf384..6fc97c45f71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
int err, i;
link = bpf_program__attach(prog);
- if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
/* re-open link from BPFFS */
link = bpf_link__open(link_pin_path);
- if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_open"))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
cleanup:
- if (!IS_ERR(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
}
void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..beebfa9730e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+ __u64 key, value = START_VALUE;
+ int err;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+ __u64 key, value[nr_cpus];
+ int i, err;
+
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = START_VALUE;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+ int *map_fd)
+{
+ struct test_lookup_and_delete *skel;
+ int err;
+
+ skel = test_lookup_and_delete__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hash_map, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = test_lookup_and_delete__load(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+ goto cleanup;
+
+ *map_fd = bpf_map__fd(skel->maps.hash_map);
+ if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+ return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+ __u64 value)
+{
+ int err;
+
+ skel->bss->set_pid = getpid();
+ skel->bss->set_key = key;
+ skel->bss->set_value = value;
+
+ err = test_lookup_and_delete__attach(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+ return -1;
+
+ syscall(__NR_getpgid);
+
+ test_lookup_and_delete__detach(skel);
+
+ return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(val != START_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ }
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Value should match the new value. */
+ if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i, cpucnt = 0;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element 1. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Clean value. */
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = 0;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+ goto cleanup;
+ }
+
+ /* Check if only one CPU has set the value. */
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+ if (val) {
+ if (CHECK(val != NEW_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ cpucnt++;
+ }
+ }
+ if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ cpucnt))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+
+ if (test__start_subtest("lookup_and_delete"))
+ test_lookup_and_delete_hash();
+ if (test__start_subtest("lookup_and_delete_percpu"))
+ test_lookup_and_delete_percpu_hash();
+ if (test__start_subtest("lookup_and_delete_lru"))
+ test_lookup_and_delete_lru_hash();
+ if (test__start_subtest("lookup_and_delete_lru_percpu"))
+ test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644
index 000000000000..59adb4715394
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. call listen() for 4 server sockets.
+ * 2. call connect() for 25 client sockets.
+ * 3. call listen() for 1 server socket. (migration target)
+ * 4. update a map to migrate all child sockets
+ * to the last server socket (migrate_map[cookie] = 4)
+ * 5. call shutdown() for first 4 server sockets
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 6. call listen() for the second server socket.
+ * 7. call shutdown() for the last server
+ * and migrate the requests in the accept queue
+ * to the second server socket.
+ * 8. call listen() for the last server.
+ * 9. call shutdown() for the second server
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+ const char *name;
+ __s64 servers[NR_SERVERS];
+ __s64 clients[NR_CLIENTS];
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int family;
+ int state;
+ bool drop_ack;
+ bool expire_synack_timer;
+ bool fastopen;
+ struct bpf_link *link;
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (fds[i] != -1) {
+ close(fds[i]);
+ fds[i] = -1;
+ }
+ }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+ int err = 0, fd, len;
+
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (!ASSERT_NEQ(fd, -1, "open"))
+ return -1;
+
+ if (restore) {
+ len = write(fd, buf, *saved_len);
+ if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+ err = -1;
+ } else {
+ *saved_len = read(fd, buf, size);
+ if (!ASSERT_GE(*saved_len, 1, "read")) {
+ err = -1;
+ goto close;
+ }
+
+ err = lseek(fd, 0, SEEK_SET);
+ if (!ASSERT_OK(err, "lseek"))
+ goto close;
+
+ /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+ * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+ */
+ len = write(fd, "519", 3);
+ if (!ASSERT_EQ(len, 3, "write - setup"))
+ err = -1;
+ }
+
+close:
+ close(fd);
+
+ return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ if (test_case->family == AF_INET)
+ skel->bss->server_port = ((struct sockaddr_in *)
+ &test_case->addr)->sin_port;
+ else
+ skel->bss->server_port = ((struct sockaddr_in6 *)
+ &test_case->addr)->sin6_port;
+
+ test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+ IFINDEX_LO);
+ if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+ return -1;
+
+ return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+ int err;
+
+ err = bpf_link__detach(test_case->link);
+ if (!ASSERT_OK(err, "bpf_link__detach"))
+ return -1;
+
+ test_case->link = NULL;
+
+ return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+ prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+ make_sockaddr(test_case->family,
+ test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+ &test_case->addr, &test_case->addrlen);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+ return -1;
+
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_REUSEPORT, &reuseport, sizeof(reuseport));
+ if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+ return -1;
+
+ err = bind(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ if (i == 0) {
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (!ASSERT_OK(err,
+ "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+ return -1;
+
+ err = getsockname(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ &test_case->addrlen);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+ }
+
+ if (test_case->fastopen) {
+ err = setsockopt(test_case->servers[i],
+ SOL_TCP, TCP_FASTOPEN,
+ &qlen, sizeof(qlen));
+ if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+ return -1;
+ }
+
+ /* All requests will be tied to the first four listeners */
+ if (i != MIGRATED_TO) {
+ err = listen(test_case->servers[i], qlen);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+ char buf[MSGLEN] = MSG;
+ int i, err;
+
+ for (i = 0; i < NR_CLIENTS; i++) {
+ test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+ return -1;
+
+ /* The attached XDP program drops only the final ACK, so
+ * clients will transition to TCP_ESTABLISHED immediately.
+ */
+ err = settimeo(test_case->clients[i], 100);
+ if (!ASSERT_OK(err, "settimeo"))
+ return -1;
+
+ if (test_case->fastopen) {
+ int fastopen = 1;
+
+ err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+ TCP_FASTOPEN_CONNECT, &fastopen,
+ sizeof(fastopen));
+ if (!ASSERT_OK(err,
+ "setsockopt - TCP_FASTOPEN_CONNECT"))
+ return -1;
+ }
+
+ err = connect(test_case->clients[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "connect"))
+ return -1;
+
+ err = write(test_case->clients[i], buf, MSGLEN);
+ if (!ASSERT_EQ(err, MSGLEN, "write"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, migrated_to = MIGRATED_TO;
+ int reuseport_map_fd, migrate_map_fd;
+ __u64 value;
+
+ reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+ migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ value = (__u64)test_case->servers[i];
+ err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+ int i, err;
+
+ /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+ * to the last listener based on eBPF.
+ */
+ for (i = 0; i < MIGRATED_TO; i++) {
+ err = shutdown(test_case->servers[i], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+ }
+
+ /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+ if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+ return 0;
+
+ /* Note that we use the second listener instead of the
+ * first one here.
+ *
+ * The fist listener is bind()ed with port 0 and,
+ * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+ * calling listen() again will bind() the first listener
+ * on a new ephemeral port and detach it from the existing
+ * reuseport group. (See: __inet_bind(), tcp_set_state())
+ *
+ * OTOH, the second one is bind()ed with a specific port,
+ * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+ * resurrect the listener on the existing reuseport group.
+ */
+ err = listen(test_case->servers[1], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate from the last listener to the second one.
+ *
+ * All listeners were detached out of the reuseport_map,
+ * so migration will be done by kernel random pick from here.
+ */
+ err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ /* Back to the existing reuseport group */
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate back to the last one from the second one */
+ err = shutdown(test_case->servers[1], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err, cnt = 0, client;
+ char buf[MSGLEN];
+
+ err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+ if (!ASSERT_OK(err, "settimeo"))
+ goto out;
+
+ for (; cnt < NR_CLIENTS; cnt++) {
+ client = accept(test_case->servers[MIGRATED_TO],
+ (struct sockaddr *)&addr, &len);
+ if (!ASSERT_NEQ(client, -1, "accept"))
+ goto out;
+
+ memset(buf, 0, MSGLEN);
+ read(client, &buf, MSGLEN);
+ close(client);
+
+ if (!ASSERT_STREQ(buf, MSG, "read"))
+ goto out;
+ }
+
+out:
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+ switch (test_case->state) {
+ case BPF_TCP_ESTABLISHED:
+ cnt = skel->bss->migrated_at_close;
+ break;
+ case BPF_TCP_SYN_RECV:
+ cnt = skel->bss->migrated_at_close_fastopen;
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (test_case->expire_synack_timer)
+ cnt = skel->bss->migrated_at_send_synack;
+ else
+ cnt = skel->bss->migrated_at_recv_ack;
+ break;
+ default:
+ cnt = 0;
+ }
+
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int err, saved_len;
+ char buf[16];
+
+ skel->bss->migrated_at_close = 0;
+ skel->bss->migrated_at_close_fastopen = 0;
+ skel->bss->migrated_at_send_synack = 0;
+ skel->bss->migrated_at_recv_ack = 0;
+
+ init_fds(test_case->servers, NR_SERVERS);
+ init_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->fastopen) {
+ memset(buf, 0, sizeof(buf));
+
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+ if (!ASSERT_OK(err, "setup_fastopen - setup"))
+ return;
+ }
+
+ err = start_servers(test_case, skel);
+ if (!ASSERT_OK(err, "start_servers"))
+ goto close_servers;
+
+ if (test_case->drop_ack) {
+ /* Drop the final ACK of the 3-way handshake and stick the
+ * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+ */
+ err = drop_ack(test_case, skel);
+ if (!ASSERT_OK(err, "drop_ack"))
+ goto close_servers;
+ }
+
+ /* Tie requests to the first four listners */
+ err = start_clients(test_case);
+ if (!ASSERT_OK(err, "start_clients"))
+ goto close_clients;
+
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ goto close_clients;
+
+ err = update_maps(test_case, skel);
+ if (!ASSERT_OK(err, "fill_maps"))
+ goto close_clients;
+
+ /* Migrate the requests in the accept queue only.
+ * TCP_NEW_SYN_RECV requests are not migrated at this point.
+ */
+ err = migrate_dance(test_case);
+ if (!ASSERT_OK(err, "migrate_dance"))
+ goto close_clients;
+
+ if (test_case->expire_synack_timer) {
+ /* Wait for SYN+ACK timers to expire so that
+ * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+ */
+ sleep(1);
+ }
+
+ if (test_case->link) {
+ /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+ err = pass_ack(test_case);
+ if (!ASSERT_OK(err, "pass_ack"))
+ goto close_clients;
+ }
+
+ count_requests(test_case, skel);
+
+close_clients:
+ close_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->link) {
+ err = pass_ack(test_case);
+ ASSERT_OK(err, "pass_ack - clean up");
+ }
+
+close_servers:
+ close_fds(test_case->servers, NR_SERVERS);
+
+ if (test_case->fastopen) {
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+ ASSERT_OK(err, "setup_fastopen - restore");
+ }
+}
+
+void test_migrate_reuseport(void)
+{
+ struct test_migrate_reuseport *skel;
+ int i;
+
+ skel = test_migrate_reuseport__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test__start_subtest(test_cases[i].name);
+ run_test(&test_cases[i], skel);
+ }
+
+ test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index e178416bddad..6194b776a28b 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -38,13 +38,13 @@ void test_obj_name(void)
fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-prog-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
/* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
memcpy(attr.map_name, tests[i].name, ncopy);
fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-map-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index e35c444902a7..12c4f45cee1a 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
/* attach perf_event */
link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
goto out_destroy_skel;
/* generate some branches on cpu 0 */
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
* Some setups don't support branch records (virtual machines, !x86),
* so skip test in this case.
*/
- if (pfd == -1) {
+ if (pfd < 0) {
if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index ca9f0895ec84..6490e9673002 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -80,7 +80,7 @@ void test_perf_buffer(void)
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &cpu_seen;
pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out_close;
CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
index 72c3690844fb..33144c9432ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -97,8 +97,7 @@ void test_perf_event_stackmap(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 7aecfd9e87d1..95bd12097358 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -15,7 +15,7 @@ void test_probe_user(void)
static const int zero = 0;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
@@ -33,11 +33,8 @@ void test_probe_user(void)
goto cleanup;
kprobe_link = bpf_program__attach(kprobe_prog);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
- }
memset(&curr, 0, sizeof(curr));
in->sin_family = AF_INET;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 131d7f7eeb42..89fc98faf19e 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -46,7 +46,7 @@ void test_prog_run_xattr(void)
tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
+ CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
"err %d errno %d retval %d\n", err, errno, tattr.retval);
CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
@@ -78,6 +78,6 @@ void test_prog_run_xattr(void)
cleanup:
if (skel)
test_pkt_access__destroy(skel);
- if (stats_fd != -1)
+ if (stats_fd >= 0)
close(stats_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index c5fb191874ac..41720a62c4fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -77,7 +77,7 @@ void test_raw_tp_test_run(void)
/* invalid cpu ID should fail with ENXIO */
opts.cpu = 0xffffffff;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != ENXIO,
+ CHECK(err >= 0 || errno != ENXIO,
"test_run_opts_fail",
"should failed with ENXIO\n");
@@ -85,7 +85,7 @@ void test_raw_tp_test_run(void)
opts.cpu = 1;
opts.flags = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"test_run_opts_fail",
"should failed with EINVAL\n");
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 563e12120e77..5f9eaa3ab584 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -30,7 +30,7 @@ void test_rdonly_maps(void)
struct bss bss;
obj = bpf_object__open_file(file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
return;
err = bpf_object__load(obj);
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
- t->prog_name, PTR_ERR(link))) {
- link = NULL;
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- }
/* trigger probe */
usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index ac1ee10cffd8..de2688166696 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -15,7 +15,7 @@ void test_reference_tracking(void)
int err = 0;
obj = bpf_object__open_file(file, &open_opts);
- if (CHECK_FAIL(IS_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index d3c2de2c24d1..f62361306f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
}
for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
- if (test_symbols[i].id != -1)
+ if (test_symbols[i].id >= 0)
continue;
if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index de78617f6550..4706cee84360 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,7 +12,7 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
-#include "test_ringbuf.skel.h"
+#include "test_ringbuf.lskel.h"
#define EDONE 7777
@@ -86,25 +86,70 @@ void test_ringbuf(void)
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
pthread_t thread;
long bg_ret = -1;
- int err, cnt;
+ int err, cnt, rb_fd;
int page_size = getpagesize();
+ void *mmap_ptr, *tmp_ptr;
skel = test_ringbuf__open();
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
- err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
- if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
- goto cleanup;
+ skel->maps.ringbuf.max_entries = page_size;
err = test_ringbuf__load(skel);
if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
goto cleanup;
+ rb_fd = skel->maps.ringbuf.map_fd;
+ /* good read/write cons_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
+ tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
+ if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
+ goto cleanup;
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ /* bad writeable prod_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
+ ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
+
+ /* bad writeable data pages */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
+ ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
+ mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
+
+ /* good read-only pages */
+ mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
+
+ /* good read-only pages with initial offset */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
- ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd,
process_sample, NULL, NULL);
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index cef63e703924..167cd8a2edfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -63,7 +63,7 @@ void test_ringbuf_multi(void)
goto cleanup;
proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
- if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+ if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
goto cleanup;
err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 821b4146b7b6..4efd337d6a3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type)
attr.max_entries = REUSEPORT_ARRAY_SIZE;
reuseport_array = bpf_create_map_xattr(&attr);
- RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ RET_ERR(reuseport_array < 0, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
@@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type)
attr.max_entries = 1;
attr.inner_map_fd = reuseport_array;
outer_map = bpf_create_map_xattr(&attr);
- RET_ERR(outer_map == -1, "creating outer_map",
+ RET_ERR(outer_map < 0, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
return 0;
@@ -102,8 +102,9 @@ static int prepare_bpf_obj(void)
int err;
obj = bpf_object__open("test_select_reuseport_kern.o");
- RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+ err = libbpf_get_error(obj);
+ RET_ERR(err, "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%d\n", obj, err);
map = bpf_object__find_map_by_name(obj, "outer_map");
RET_ERR(!map, "find outer_map", "!map\n");
@@ -116,31 +117,31 @@ static int prepare_bpf_obj(void)
prog = bpf_program__next(NULL, obj);
RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- RET_ERR(result_map == -1, "get result_map fd",
+ RET_ERR(result_map < 0, "get result_map fd",
"result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- RET_ERR(linum_map == -1, "get linum_map fd",
+ RET_ERR(linum_map < 0, "get linum_map fd",
"linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- RET_ERR(data_check_map == -1, "get data_check_map fd",
+ RET_ERR(data_check_map < 0, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
return 0;
@@ -237,7 +238,7 @@ static long get_linum(void)
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
return linum;
@@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
if (type == SOCK_STREAM) {
@@ -347,7 +348,7 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
}
@@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- RET_IF(err == -1 || tmp_index != -1,
+ RET_IF(err < 0 || tmp_index >= 0,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
@@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
@@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
@@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- RET_IF(err == -1, "listen()",
+ RET_IF(err < 0, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- RET_IF(err == -1, "update_elem(reuseport_array)",
+ RET_IF(err < 0, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
@@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
/* Install reuseport_array to outer_map? */
@@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
BPF_ANY);
- RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
"err:%d errno:%d\n", err, errno);
}
@@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map)
return;
err = bpf_map_delete_elem(outer_map, &index_zero);
- RET_IF(err == -1, "delete_elem(outer_map)",
+ RET_IF(err < 0, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- if (outer_map != -1) {
+ if (outer_map >= 0) {
close(outer_map);
outer_map = -1;
}
- if (reuseport_array != -1) {
+ if (reuseport_array >= 0) {
close(reuseport_array);
reuseport_array = -1;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 7043e6ded0e6..023cc532992d 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -2,7 +2,7 @@
#include <test_progs.h>
#include "test_send_signal_kern.skel.h"
-static volatile int sigusr1_received = 0;
+int sigusr1_received = 0;
static void sigusr1_handler(int signum)
{
@@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->links.send_signal_perf =
bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
- if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+ if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
goto disable_pmu;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 45c82db3c58c..aee41547e7f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
}
link = bpf_program__attach_netns(prog, net_fd);
- if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
errno = -PTR_ERR(link);
log_err("failed to attach program '%s' to netns",
bpf_program__name(prog));
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fe87b77af459..f6f130c99b8c 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -82,10 +82,8 @@ void test_skeleton(void)
CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
- CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
- bss->handler_out5.a, 5);
- CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
- bss->handler_out5.b, 6);
+ CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5);
+ CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6);
CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index af87118e748e..577d619fb07e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -97,12 +97,12 @@ static void check_result(void)
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
&egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
&ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
@@ -355,14 +355,12 @@ void test_sock_fields(void)
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
child_cg_fd);
- if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
- PTR_ERR(egress_link)))
+ if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
goto done;
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
child_cg_fd);
- if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
- PTR_ERR(ingress_link)))
+ if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
goto done;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
@@ -375,8 +373,8 @@ done:
bpf_link__destroy(egress_link);
bpf_link__destroy(ingress_link);
test_sock_fields__destroy(skel);
- if (child_cg_fd != -1)
+ if (child_cg_fd >= 0)
close(child_cg_fd);
- if (parent_cg_fd != -1)
+ if (parent_cg_fd >= 0)
close(parent_cg_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index ab77596b64e3..1352ec104149 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
int s, map, err;
s = connected_socket_v4();
- if (CHECK_FAIL(s == -1))
+ if (CHECK_FAIL(s < 0))
return;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
+ if (CHECK_FAIL(map < 0)) {
perror("bpf_create_map");
goto out;
}
@@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.copy, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
}
err = bpf_prog_attach(verdict, map, second, 0);
- assert(err == -1 && errno == EBUSY);
+ ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
err = bpf_prog_detach2(verdict, map, first);
if (CHECK_FAIL(err)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 06b86addc181..7a0d64fdc192 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
int map;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
+ if (CHECK_FAIL(map < 0)) {
perror("bpf_map_create");
return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 648d9ae898d2..515229f24a93 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -139,7 +139,7 @@
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -147,7 +147,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -155,7 +155,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -164,7 +164,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -172,7 +172,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -1610,6 +1610,7 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
struct sockaddr_storage addr;
int c0, c1, p0, p1;
unsigned int pass;
+ int retries = 100;
socklen_t len;
int err, n;
u64 value;
@@ -1686,9 +1687,13 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
- if (n < 0)
+ if (n < 0) {
+ if (errno == EAGAIN && retries--)
+ goto again;
FAIL_ERRNO("%s: read", log_prefix);
+ }
if (n == 0)
FAIL("%s: incomplete read", log_prefix);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 11a769e18f5d..0a91d8d9954b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -62,8 +62,7 @@ retry:
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 37269d23df93..04b476bd62b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -21,7 +21,7 @@ void test_stacktrace_map(void)
goto close_prog;
link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_tp"))
goto close_prog;
/* find map fds */
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 404a5498e1a3..4fd30bb651ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
/* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
index 46556976dccc..5c4e3014e063 100644
--- a/tools/testing/selftests/bpf/prog_tests/static_linked.c
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -14,12 +14,7 @@ void test_static_linked(void)
return;
skel->rodata->rovar1 = 1;
- skel->bss->static_var1 = 2;
- skel->bss->static_var11 = 3;
-
skel->rodata->rovar2 = 4;
- skel->bss->static_var2 = 5;
- skel->bss->static_var22 = 6;
err = test_static_linked__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -32,8 +27,8 @@ void test_static_linked(void)
/* trigger */
usleep(1);
- ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
- ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+ ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2");
cleanup:
test_static_linked__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
new file mode 100644
index 000000000000..81e997a69f7a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "syscall.skel.h"
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+void test_syscall(void)
+{
+ static char verifier_log[8192];
+ struct args ctx = {
+ .max_entries = 1024,
+ .log_buf = (uintptr_t) verifier_log,
+ .log_size = sizeof(verifier_log),
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ };
+ struct syscall *skel = NULL;
+ __u64 key = 12, value = 0;
+ int err;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+ err = bpf_prog_test_run_xattr(&tattr);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(tattr.retval, 1, "retval");
+ ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
+ ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd");
+ ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1),
+ "verifier_log");
+
+ err = bpf_map_lookup_elem(ctx.map_fd, &key, &value);
+ ASSERT_EQ(err, 0, "map_lookup");
+ ASSERT_EQ(value, 34, "map lookup value");
+cleanup:
+ syscall__destroy(skel);
+ if (ctx.prog_fd > 0)
+ close(ctx.prog_fd);
+ if (ctx.map_fd > 0)
+ close(ctx.map_fd);
+ if (ctx.btf_fd > 0)
+ close(ctx.btf_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
new file mode 100644
index 000000000000..4a505a5adf4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_bpf.skel.h"
+
+#define LO_IFINDEX 1
+
+#define TEST_DECLARE_OPTS(__fd) \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \
+ .flags = BPF_TC_F_REPLACE); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \
+ .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1);
+
+static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int ret;
+
+ ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ return ret;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+ opts.prog_id = 0;
+ opts.flags = BPF_TC_F_REPLACE;
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach replace mode"))
+ goto end;
+
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_query(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_query"))
+ goto end;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+end:
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_detach(hook, &opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ return ret;
+}
+
+static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1);
+ int ret;
+
+ ret = bpf_tc_hook_create(NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL"))
+ return -EINVAL;
+
+ /* hook ifindex = 0 */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ /* hook ifindex < 0 */
+ inv_hook.ifindex = -1;
+
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ inv_hook.ifindex = LO_IFINDEX;
+
+ /* hook.attach_point invalid */
+ inv_hook.attach_point = 0xabcd;
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* hook.attach_point valid, but parent invalid */
+ inv_hook.parent = TC_H_MAKE(1UL << 16, 10);
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_CUSTOM;
+ inv_hook.parent = 0;
+ /* These return EOPNOTSUPP instead of EINVAL as parent is checked after
+ * attach_point of the hook.
+ */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* detach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_detach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+ }
+
+ /* query */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_query(NULL, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ /* when chain is not present, kernel returns -EINVAL */
+ ret = bpf_tc_query(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set"))
+ return -EINVAL;
+ }
+
+ /* attach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_attach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL"))
+ return -EINVAL;
+
+ opts_hp.flags = 42;
+ ret = bpf_tc_attach(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_pf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset"))
+ return -EINVAL;
+ opts_pf.prog_fd = opts_pf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_hf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset"))
+ return -EINVAL;
+ opts_hf.prog_fd = opts_hf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_f);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset"))
+ return -EINVAL;
+ opts_f.prog_fd = opts_f.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach");
+ }
+
+ return 0;
+}
+
+void test_tc_bpf(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_INGRESS);
+ struct test_tc_bpf *skel = NULL;
+ bool hook_created = false;
+ int cls_fd, ret;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ return;
+
+ cls_fd = bpf_program__fd(skel->progs.cls);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (ret == 0)
+ hook_created = true;
+
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+ goto end;
+
+ hook.attach_point = BPF_TC_CUSTOM;
+ hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point"))
+ goto end;
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ ret = bpf_tc_hook_destroy(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ goto end;
+
+ hook.attach_point = BPF_TC_INGRESS;
+ hook.parent = 0;
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal egress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_api(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_bpf_api"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+end:
+ if (hook_created) {
+ hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&hook);
+ }
+ test_tc_bpf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
new file mode 100644
index 000000000000..5703c918812b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+ * between src and dst. The netns fwd has veth links to each src and dst. The
+ * client is in src and server in dst. The test installs a TC BPF program to each
+ * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+ * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+ * switch from ingress side; it also installs a checker prog on the egress side
+ * to drop unexpected traffic.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_neigh_fib.skel.h"
+#include "test_tc_neigh.skel.h"
+#include "test_tc_peer.skel.h"
+
+#define NS_SRC "ns_src"
+#define NS_FWD "ns_fwd"
+#define NS_DST "ns_dst"
+
+#define IP4_SRC "172.16.1.100"
+#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
+#define IP4_PORT 9004
+
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
+#define IP6_PORT 9006
+
+#define IP4_SLL "169.254.0.1"
+#define IP4_DLL "169.254.0.2"
+#define IP4_NET "169.254.0.0"
+
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
+#define IFADDR_STR_LEN 18
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
+
+#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
+#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
+#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
+
+#define TIMEOUT_MILLIS 10000
+
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
+
+static int write_file(const char *path, const char *newval)
+{
+ FILE *f;
+
+ f = fopen(path, "r+");
+ if (!f)
+ return -1;
+ if (fwrite(newval, strlen(newval), 1, f) != 1) {
+ log_err("writing to %s failed", path);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+struct nstoken {
+ int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
+{
+ int err;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+
+ if (!ASSERT_OK(err, "setns"))
+ return err;
+
+ /* Switch /sys to the new namespace so that e.g. /sys/class/net
+ * reflects the devices in the new namespace.
+ */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
+
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+
+ err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys"))
+ return err;
+
+ err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+ return err;
+
+ return 0;
+}
+
+/**
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
+ */
+static struct nstoken *open_netns(const char *name)
+{
+ int nsfd;
+ char nspath[PATH_MAX];
+ int err;
+ struct nstoken *token;
+
+ token = malloc(sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
+
+ err = setns_by_fd(nsfd);
+ if (!ASSERT_OK(err, "setns_by_fd"))
+ goto fail;
+
+ return token;
+fail:
+ free(token);
+ return NULL;
+}
+
+static void close_netns(struct nstoken *token)
+{
+ ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ free(token);
+}
+
+static int netns_setup_namespaces(const char *verb)
+{
+ const char * const *ns = namespaces;
+ char cmd[128];
+
+ while (*ns) {
+ snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
+ if (!ASSERT_OK(system(cmd), cmd))
+ return -1;
+ ns++;
+ }
+ return 0;
+}
+
+struct netns_setup_result {
+ int ifindex_veth_src_fwd;
+ int ifindex_veth_dst_fwd;
+};
+
+static int get_ifaddr(const char *name, char *ifaddr)
+{
+ char path[PATH_MAX];
+ FILE *f;
+ int ret;
+
+ snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
+ f = fopen(path, "r");
+ if (!ASSERT_OK_PTR(f, path))
+ return -1;
+
+ ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
+ if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+static int get_ifindex(const char *name)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ FILE *f;
+ int ret;
+
+ snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
+ f = fopen(path, "r");
+ if (!ASSERT_OK_PTR(f, path))
+ return -1;
+
+ ret = fread(buf, 1, sizeof(buf), f);
+ if (!ASSERT_GT(ret, 0, "fread ifindex")) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return atoi(buf);
+}
+
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto fail; \
+ })
+
+static int netns_setup_links_and_routes(struct netns_setup_result *result)
+{
+ struct nstoken *nstoken = NULL;
+ char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
+
+ SYS("ip link add veth_src type veth peer name veth_src_fwd");
+ SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
+ if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+ goto fail;
+
+ result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
+ if (result->ifindex_veth_src_fwd < 0)
+ goto fail;
+ result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
+ if (result->ifindex_veth_dst_fwd < 0)
+ goto fail;
+
+ SYS("ip link set veth_src netns " NS_SRC);
+ SYS("ip link set veth_src_fwd netns " NS_FWD);
+ SYS("ip link set veth_dst_fwd netns " NS_FWD);
+ SYS("ip link set veth_dst netns " NS_DST);
+
+ /** setup in 'src' namespace */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto fail;
+
+ SYS("ip addr add " IP4_SRC "/32 dev veth_src");
+ SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
+ SYS("ip link set dev veth_src up");
+
+ SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
+ SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
+
+ SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
+ veth_src_fwd_addr);
+ SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
+ veth_src_fwd_addr);
+
+ close_netns(nstoken);
+
+ /** setup in 'fwd' namespace */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto fail;
+
+ /* The fwd netns automatically gets a v6 LL address / routes, but also
+ * needs v4 one in order to start ARP probing. IP4_NET route is added
+ * to the endpoints so that the ARP processing will reply.
+ */
+ SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
+ SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
+ SYS("ip link set dev veth_src_fwd up");
+ SYS("ip link set dev veth_dst_fwd up");
+
+ SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
+ SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
+ SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
+ SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+
+ close_netns(nstoken);
+
+ /** setup in 'dst' namespace */
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ goto fail;
+
+ SYS("ip addr add " IP4_DST "/32 dev veth_dst");
+ SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
+ SYS("ip link set dev veth_dst up");
+
+ SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
+ SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
+ SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ close_netns(nstoken);
+
+ return 0;
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+ return -1;
+}
+
+static int netns_load_bpf(void)
+{
+ SYS("tc qdisc add dev veth_src_fwd clsact");
+ SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_tcp(int family, const char *addr, __u16 port)
+{
+ int listen_fd = -1, accept_fd = -1, client_fd = -1;
+ char buf[] = "testing testing";
+ int n;
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return;
+
+ listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
+ if (!ASSERT_GE(listen_fd, 0, "listen"))
+ goto done;
+
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+
+ client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto done;
+
+ if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
+ goto done;
+
+ n = write(client_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+
+ n = read(accept_fd, buf, sizeof(buf));
+ ASSERT_EQ(n, sizeof(buf), "recv from server");
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ if (listen_fd >= 0)
+ close(listen_fd);
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+}
+
+static int test_ping(int family, const char *addr)
+{
+ const char *ping = family == AF_INET6 ? "ping6" : "ping";
+
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_connectivity(void)
+{
+ test_tcp(AF_INET, IP4_DST, IP4_PORT);
+ test_ping(AF_INET, IP4_DST);
+ test_tcp(AF_INET6, IP6_DST, IP6_PORT);
+ test_ping(AF_INET6, IP6_DST);
+}
+
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
+static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh_fib__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
+ goto done;
+
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto done;
+
+ if (netns_load_bpf())
+ goto done;
+
+ /* bpf_fib_lookup() checks if forwarding is enabled */
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_neigh__load(skel);
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto done;
+
+ if (netns_load_bpf())
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken;
+ struct test_tc_peer *skel;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto done;
+
+ err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto done;
+
+ if (netns_load_bpf())
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
+}
+
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+ return;
+
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
+
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
+
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
+
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = get_ifindex("tun_fwd");
+ if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ */
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+#define RUN_TEST(name) \
+ ({ \
+ struct netns_setup_result setup_result; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ RUN_TEST(tc_redirect_peer);
+ RUN_TEST(tc_redirect_peer_l3);
+ RUN_TEST(tc_redirect_neigh);
+ RUN_TEST(tc_redirect_neigh_fib);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 08d19cafd5e8..1fa772079967 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -353,8 +353,7 @@ static void fastopen_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, true)) {
@@ -398,8 +397,7 @@ static void syncookie_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +429,7 @@ static void fin(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +468,7 @@ static void __simple_estab(bool exprm)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +505,7 @@ static void misc(void)
return;
link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 9966685866fd..123c68c1917d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -73,7 +73,7 @@ void test_test_overhead(void)
return;
obj = bpf_object__open_file("./test_overhead.o", NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
@@ -108,7 +108,7 @@ void test_test_overhead(void)
/* attach kprobe */
link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
goto cleanup;
test_run("kprobe");
bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
/* attach kretprobe */
link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
goto cleanup;
test_run("kretprobe");
bpf_link__destroy(link);
/* attach raw_tp */
link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
test_run("raw_tp");
bpf_link__destroy(link);
/* attach fentry */
link = bpf_program__attach_trace(fentry_prog);
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
test_run("fentry");
bpf_link__destroy(link);
/* attach fexit */
link = bpf_program__attach_trace(fexit_prog);
- if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 39b0decb1bb2..d39bc00feb45 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -3,7 +3,7 @@
#include <test_progs.h>
-#include "trace_printk.skel.h"
+#include "trace_printk.lskel.h"
#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
@@ -21,6 +21,9 @@ void test_trace_printk(void)
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
+ ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string");
+ skel->rodata->fmt[0] = 't';
+
err = trace_printk__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index f3022d934e2d..d7f5a931d7f3 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,7 +55,7 @@ void test_trampoline_count(void)
/* attach 'allowed' trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
obj = NULL;
goto cleanup;
}
@@ -68,14 +68,14 @@ void test_trampoline_count(void)
if (rand() % 2) {
link = load(inst[i].obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "attach_prog")) {
link = NULL;
goto cleanup;
}
inst[i].link_fentry = link;
} else {
link = load(inst[i].obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "attach_prog")) {
link = NULL;
goto cleanup;
}
@@ -85,7 +85,7 @@ void test_trampoline_count(void)
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
obj = NULL;
goto cleanup;
}
@@ -96,13 +96,15 @@ void test_trampoline_count(void)
/* ..that needs to fail */
link = load(obj, fentry_name);
- if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+ err = libbpf_get_error(link);
+ if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
bpf_link__destroy(link);
goto cleanup_extra;
}
/* with E2BIG error */
- CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+ ASSERT_EQ(err, -E2BIG, "proper error check");
+ ASSERT_EQ(link, NULL, "ptr_is_null");
/* and finaly execute the probe */
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
index 2aba09d4d01b..56c9d6bd38a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -22,11 +22,10 @@ void test_udp_limit(void)
goto close_cgroup_fd;
skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+ goto close_skeleton;
skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
- if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
- "cg-attach", "sock %ld sock_release %ld",
- PTR_ERR(skel->links.sock),
- PTR_ERR(skel->links.sock_release)))
+ if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
goto close_skeleton;
/* BPF program enforces a single UDP socket per cgroup,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 2c6c570b21f8..3bd5904b4db5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void)
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
/* Run test program */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 6f814999b395..46eed0a33c23 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -51,7 +51,7 @@ void test_xdp_link(void)
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
/* best-effort detach prog */
opts.old_fd = prog_fd1;
@@ -67,7 +67,7 @@ void test_xdp_link(void)
/* now BPF link should attach successfully */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel1->links.xdp_handler = link;
@@ -95,7 +95,7 @@ void test_xdp_link(void)
/* BPF link is not allowed to replace another BPF link */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
goto cleanup;
}
@@ -105,7 +105,7 @@ void test_xdp_link(void)
/* new link attach should succeed */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel2->links.xdp_handler = link;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 6dfce3fd68bc..0aa3cd34cbe3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b83b5d2e17dc..6c39e86b666f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index d58d9f1642b5..784a610ce039 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 95989f4c99b5..a28e51e2dcee 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index b7f32c160f4e..c86b93f33b32 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
index a1ddc36f13ec..bca8b889cb10 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020, Oracle and/or its affiliates. */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include <errno.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index b2f7c7c5f952..6e7b400888fe 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 43c36f5f7649..f2b8167b72a8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
index 11d1aa37cf11..4ea6a37d1345 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 54380c5e1069..2e4775c35414 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index b4fbddfa4e10..943f7bba180e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index ee49493dc125..400fdf8d6233 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
-static volatile const __u32 print_len;
-static volatile const __u32 ret1;
+volatile const __u32 print_len;
+volatile const __u32 ret1;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index f258583afbbd..cf0c485b1ed7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 65f93bb03f0f..5031e21c433f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index a46a264ce24e..55e283050cab 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
return 0;
}
-static volatile struct {
+struct {
bool fentry_test_ok;
bool fexit_test_ok;
-} result;
+} result = {};
SEC("fentry/eth_type_trans")
int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
index 52291515cc72..00bf1ca95986 100644
--- a/tools/testing/selftests/bpf/progs/linked_maps1.c
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -75,7 +75,7 @@ int BPF_PROG(handler_exit1)
val = bpf_map_lookup_elem(&map_weak, &key);
if (val)
output_weak1 = *val;
-
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
new file mode 100644
index 000000000000..e550f728962d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <../../../tools/include/linux/filter.h>
+#include <linux/btf.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+static int btf_load(void)
+{
+ struct btf_blob {
+ struct btf_header btf_hdr;
+ __u32 types[8];
+ __u32 str;
+ } raw_btf = {
+ .btf_hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(__u32) * 8,
+ .str_off = sizeof(__u32) * 8,
+ .str_len = sizeof(__u32),
+ },
+ .types = {
+ /* long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */
+ /* unsigned long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ },
+ };
+ static union bpf_attr btf_load_attr = {
+ .btf_size = sizeof(raw_btf),
+ };
+
+ btf_load_attr.btf = (long)&raw_btf;
+ return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr));
+}
+
+SEC("syscall")
+int bpf_prog(struct args *ctx)
+{
+ static char license[] = "GPL";
+ static struct bpf_insn insns[] = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ static union bpf_attr map_create_attr = {
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = 8,
+ .value_size = 8,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 2,
+ };
+ static union bpf_attr map_update_attr = { .map_fd = 1, };
+ static __u64 key = 12;
+ static __u64 value = 34;
+ static union bpf_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .insn_cnt = sizeof(insns) / sizeof(insns[0]),
+ };
+ int ret;
+
+ ret = btf_load();
+ if (ret <= 0)
+ return ret;
+
+ ctx->btf_fd = ret;
+ map_create_attr.max_entries = ctx->max_entries;
+ map_create_attr.btf_fd = ret;
+
+ prog_load_attr.license = (long) license;
+ prog_load_attr.insns = (long) insns;
+ prog_load_attr.log_buf = ctx->log_buf;
+ prog_load_attr.log_size = ctx->log_size;
+ prog_load_attr.log_level = 1;
+
+ ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->map_fd = ret;
+ insns[3].imm = ret;
+
+ map_update_attr.map_fd = ret;
+ map_update_attr.key = (long) &key;
+ map_update_attr.value = (long) &value;
+ ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
+ if (ret < 0)
+ return ret;
+
+ ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->prog_fd = ret;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index 739dc2a51e74..910858fe078a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index f82075b47d7d..bd4be135c39d 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
SEC("classifier/" #x) \
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index ce5450744fd4..adf30a33064e 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
SEC("classifier/" #x) \
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index 7b1c04183824..3cc4c12817b5 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -20,7 +20,7 @@ int subprog_tail(struct __sk_buff *skb)
return 1;
}
-static volatile int count;
+int count = 0;
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index 9a1b166b7fbe..77df6d4db895 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -9,7 +9,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
__noinline
int subprog_tail_2(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index c4a9bae96e75..71184af57749 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -11,8 +11,8 @@
char _license[] SEC("license") = "GPL";
/* Userspace will update with MTU it can see on device */
-static volatile const int GLOBAL_USER_MTU;
-static volatile const __u32 GLOBAL_USER_IFINDEX;
+volatile const int GLOBAL_USER_MTU;
+volatile const __u32 GLOBAL_USER_IFINDEX;
/* BPF-prog will update these with MTU values it can see */
__u32 global_bpf_mtu_xdp = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 3c1e042962e6..e2a5acc4785c 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -39,8 +39,8 @@ char _license[] SEC("license") = "Dual BSD/GPL";
/**
* Destination port and IP used for UDP encapsulation.
*/
-static volatile const __be16 ENCAPSULATION_PORT;
-static volatile const __be32 ENCAPSULATION_IP;
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
typedef struct {
uint64_t processed_packets_total;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c
index cae309538a9e..e712bf77daae 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c
@@ -8,7 +8,7 @@ struct S {
int v;
};
-static volatile struct S global_variable;
+struct S global_variable = {};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..3a193f42c7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+ if (set_pid == bpf_get_current_pid_tgid() >> 32)
+ bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644
index 000000000000..27df571abf5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. If reuse_md->migrating_sk is NULL (SYN packet),
+ * return SK_PASS without selecting a listener.
+ * 2. If reuse_md->migrating_sk is not NULL (socket migration),
+ * select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 256);
+ __type(key, int);
+ __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 256);
+ __type(key, __u64);
+ __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ struct tcphdr *tcp = NULL;
+
+ if (eth + 1 > data_end)
+ goto pass;
+
+ switch (bpf_ntohs(eth->h_proto)) {
+ case ETH_P_IP: {
+ struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+ if (ip + 1 > data_end)
+ goto pass;
+
+ if (ip->protocol != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+ break;
+ }
+ case ETH_P_IPV6: {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+ if (ipv6 + 1 > data_end)
+ goto pass;
+
+ if (ipv6->nexthdr != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)(ipv6 + 1);
+ break;
+ }
+ default:
+ goto pass;
+ }
+
+ if (tcp + 1 > data_end)
+ goto pass;
+
+ if (tcp->dest != server_port)
+ goto pass;
+
+ if (!tcp->syn && tcp->ack)
+ return XDP_DROP;
+
+pass:
+ return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+ int *key, flags = 0, state, err;
+ __u64 cookie;
+
+ if (!reuse_md->migrating_sk)
+ return SK_PASS;
+
+ state = reuse_md->migrating_sk->state;
+ cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+ key = bpf_map_lookup_elem(&migrate_map, &cookie);
+ if (!key)
+ return SK_DROP;
+
+ err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+ if (err)
+ return SK_PASS;
+
+ switch (state) {
+ case BPF_TCP_ESTABLISHED:
+ __sync_fetch_and_add(&migrated_at_close, 1);
+ break;
+ case BPF_TCP_SYN_RECV:
+ __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (!reuse_md->len)
+ __sync_fetch_and_add(&migrated_at_send_synack, 1);
+ else
+ __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+ break;
+ }
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
index ecbeea2df259..fc8e8a34a3db 100644
--- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-static volatile const struct {
+const struct {
unsigned a[4];
/*
* if the struct's size is multiple of 16, compiler will put it into
@@ -15,11 +15,11 @@ static volatile const struct {
char _y;
} rdonly_values = { .a = {2, 3, 4, 5} };
-static volatile struct {
+struct {
unsigned did_run;
unsigned iters;
unsigned sum;
-} res;
+} res = {};
SEC("raw_tracepoint/sys_enter:skip_loop")
int skip_loop(struct pt_regs *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 6b3f288b7c63..eaa7d9dba0be 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -35,7 +35,7 @@ long prod_pos = 0;
/* inner state */
long seq = 0;
-SEC("tp/syscalls/sys_enter_getpgid")
+SEC("fentry/__x64_sys_getpgid")
int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
@@ -48,7 +48,7 @@ int test_ringbuf(void *ctx)
sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
if (!sample) {
__sync_fetch_and_add(&dropped, 1);
- return 1;
+ return 0;
}
sample->pid = pid;
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 374ccef704e1..441fa1c552c8 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -38,11 +38,11 @@ extern int LINUX_KERNEL_VERSION __kconfig;
bool bpf_syscall = 0;
int kern_ver = 0;
+struct s out5 = {};
+
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
- static volatile struct s out5;
-
out1 = in1;
out2 = in2;
out3 = in3;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e35129bea0a0..e2ad26150f9b 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -3,7 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
__u32 pid = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
index 402adaf344f9..3095837334d3 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf_single.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -5,7 +5,7 @@
#include <bpf/bpf_helpers.h>
/* The format string is filled from the userspace such that loading fails */
-static const char fmt[10];
+const char fmt[10];
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a39eba9f5201..a1cc58b10c7c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,8 +28,8 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
-static volatile bool test_sockmap; /* toggled by user-space */
-static volatile bool test_ingress; /* toggled by user-space */
+bool test_sockmap = false; /* toggled by user-space */
+bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
index ea1a6c4c7172..4f0b612e1661 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked1.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -4,10 +4,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-/* 8-byte aligned .bss */
-static volatile long static_var1;
-static volatile int static_var11;
-int var1 = 0;
+/* 8-byte aligned .data */
+static volatile long static_var1 = 2;
+static volatile int static_var2 = 3;
+int var1 = -1;
/* 4-byte aligned .rodata */
const volatile int rovar1;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
SEC("raw_tp/sys_enter")
int handler1(const void *ctx)
{
- var1 = subprog(rovar1) + static_var1 + static_var11;
+ var1 = subprog(rovar1) + static_var1 + static_var2;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
index 54d8d1ab577c..766ebd502a60 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked2.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -4,10 +4,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-/* 4-byte aligned .bss */
-static volatile int static_var2;
-static volatile int static_var22;
-int var2 = 0;
+/* 4-byte aligned .data */
+static volatile int static_var1 = 5;
+static volatile int static_var2 = 6;
+int var2 = -1;
/* 8-byte aligned .rodata */
const volatile long rovar2;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
SEC("raw_tp/sys_enter")
int handler2(const void *ctx)
{
- var2 = subprog(rovar2) + static_var2 + static_var22;
+ var2 = subprog(rovar2) + static_var1 + static_var2;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index d3c5673c0218..b7c37ca09544 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -4,8 +4,18 @@
const char LICENSE[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
__noinline int sub1(int x)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return x + 1;
}
@@ -23,6 +33,9 @@ static __noinline int sub3(int z)
static __noinline int sub4(int w)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return w + sub3(5) + sub1(6);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
new file mode 100644
index 000000000000..18a3a7ed924a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* Dummy prog to test TC-BPF API */
+
+SEC("classifier")
+int cls(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index b985ac4e7a81..0c93d326a663 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,17 +33,8 @@
a.s6_addr32[3] == b.s6_addr32[3])
#endif
-enum {
- dev_src,
- dev_dst,
-};
-
-struct bpf_map_def SEC("maps") ifindex_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
__be32 addr)
@@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
return v6_equal(ip6h->daddr, addr);
}
-static __always_inline int get_dev_ifindex(int which)
-{
- int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
- return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
{
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
@@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
}
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
{
__u8 zero[ETH_ALEN * 2];
bool redirect = false;
@@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
+ return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
{
__u8 zero[ETH_ALEN * 2];
bool redirect = false;
@@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
+ return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0);
}
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
index d82ed3457030..f7ab69cf018e 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
return 0;
}
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
{
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
@@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb)
/* these are identical, but keep them separate for compatibility with the
* section names expected by test_tc_redirect.sh
*/
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
{
return tc_redir(skb);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
{
return tc_redir(skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fc84a7685aa2..fe818cd5f010 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,41 +5,59 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
-enum {
- dev_src,
- dev_dst,
-};
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
-struct bpf_map_def SEC("maps") ifindex_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
-static __always_inline int get_dev_ifindex(int which)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
{
- int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+ return TC_ACT_SHOT;
+}
- return ifindex ? *ifindex : 0;
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC, 0);
}
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
{
- return TC_ACT_SHOT;
+ return bpf_redirect_peer(IFINDEX_DST, 0);
}
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
{
- return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+ return bpf_redirect(IFINDEX_SRC, 0);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
{
- return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
}
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 8ca7f399b670..119582aa105a 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -10,11 +10,11 @@ char _license[] SEC("license") = "GPL";
int trace_printk_ret = 0;
int trace_printk_ran = 0;
-SEC("tp/raw_syscalls/sys_enter")
+const char fmt[] = "Testing,testing %d\n";
+
+SEC("fentry/__x64_sys_nanosleep")
int sys_enter(void *ctx)
{
- static const char fmt[] = "testing,testing %d\n";
-
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
++trace_printk_ran);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644
index 000000000000..880debcbcd65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __be64);
+ __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp_redirect_map_multi")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ int if_index = ctx->ingress_ifindex;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return bpf_redirect_map(&map_all, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+ /* Using IPv6 for none flag testing */
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+ return bpf_redirect_map(&map_all, if_index, 0);
+ /* All others for BPF_F_BROADCAST testing */
+ else
+ return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp_redirect_map_ingress")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&map_egress, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+ __be64 *mac;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index 7eb940a7b2eb..ed12111cd2f0 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -1,5 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
# Assume script is located under tools/testing/selftests/bpf/. We want to start
# build attempts from the top of kernel repository.
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..7e9049fa3edf 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
errno == ENOENT);
+ /* lookup elem key=1 and delete it, then check it doesn't exist */
+ key = 1;
+ assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+ assert(value[0] == 1234);
+
+ /* remove the same element from the expected map */
+ assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
assert(map_equal(lru_map_fd, expected_map_fd));
close(expected_map_fd);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..30cbf5d98f7d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data)
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
+ value = 1234;
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 matches the value and delete it */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
@@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data)
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
key = 3;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete both elements. */
@@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+ /* Lookup and delete elem key=1 and check value. */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value,0) == 100);
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value,i) = i + 100;
+
+ /* Insert key=1 element which should not exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
key = 2;
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data)
* due to max_entries limit.
*/
key = 2;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
- assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
- assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
- if (IS_ERR(bpf_map_rx)) {
+ if (!bpf_map_rx) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
@@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
- if (IS_ERR(bpf_map_tx)) {
+ if (!bpf_map_tx) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
- if (IS_ERR(bpf_map_msg)) {
+ if (!bpf_map_msg) {
printf("Failed to load map msg from msg_verdict prog\n");
goto out_sockmap;
}
@@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
- if (IS_ERR(bpf_map_break)) {
+ if (!bpf_map_break) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -1153,7 +1170,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1164,7 +1181,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1177,7 +1194,7 @@ static void test_map_in_map(void)
bpf_object__load(obj);
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1194,7 +1211,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1246,7 +1263,7 @@ static void test_map_large(void)
}
key.c = -1;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Iterate through all elements. */
@@ -1254,12 +1271,12 @@ static void test_map_large(void)
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1391,7 +1408,7 @@ static void test_map_parallel(void)
run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that all elements were inserted. */
@@ -1399,7 +1416,7 @@ static void test_map_parallel(void)
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
@@ -1415,8 +1432,8 @@ static void test_map_parallel(void)
/* Nothing should be left. */
key = -1;
- assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
}
static void test_map_rdonly(void)
@@ -1434,12 +1451,12 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
/* Try to insert key=1 element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
errno == EPERM);
/* Check that key=1 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1462,8 +1479,8 @@ static void test_map_wronly_hash(void)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
/* Check that reading elements and keys from the map is not allowed. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
close(fd);
}
@@ -1490,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
/* Peek element should fail */
- assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
/* Pop element should fail */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
errno == EPERM);
close(fd);
@@ -1547,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
value = &fd32;
}
err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update unbound sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1576,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
*/
err = bpf_map_update_elem(map_fd, &index0, value,
BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update non-listening sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1606,31 +1623,31 @@ static void test_reuseport_array(void)
map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
sizeof(__u32), sizeof(__u64), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
/* Test lookup/update/delete with invalid index */
err = bpf_map_delete_elem(map_fd, &bad_index);
- CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != E2BIG,
+ CHECK(err >= 0 || errno != E2BIG,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
/* Test lookup/delete non existence elem */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup not-exist elem",
"err:%d errno:%d\n", err, errno);
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array del not-exist elem",
"err:%d errno:%d\n", err, errno);
@@ -1644,7 +1661,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update empty elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1653,7 +1670,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST success case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1662,7 +1679,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST success case. */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1670,7 +1687,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err != -1 || errno != EEXIST,
+ CHECK(err >= 0 || errno != EEXIST,
"reuseport array update non-empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1679,7 +1696,7 @@ static void test_reuseport_array(void)
/* BPF_ANY case (always succeed) */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_ANY);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same sk with BPF_ANY",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
@@ -1688,32 +1705,32 @@ static void test_reuseport_array(void)
/* The same sk cannot be added to reuseport_array twice */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with same index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with different index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err == -1, "reuseport array delete sk",
+ CHECK(err < 0, "reuseport array delete sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Add it back with BPF_NOEXIST */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array re-add with BPF_NOEXIST after del",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
/* Test cookie */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err == -1 || sk_cookie != map_cookie,
+ CHECK(err < 0 || sk_cookie != map_cookie,
"reuseport array lookup re-added sk",
"sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
type, err, errno, sk_cookie, map_cookie);
@@ -1722,7 +1739,7 @@ static void test_reuseport_array(void)
for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
close(grpa_fds64[f]);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup after close()",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1733,7 +1750,7 @@ static void test_reuseport_array(void)
CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
"err:%d errno:%d\n", err, errno);
close(fd64);
@@ -1743,16 +1760,16 @@ static void test_reuseport_array(void)
/* Test 32 bit fd */
map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
sizeof(__u32), sizeof(__u32), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
&sk_cookie, 1);
fd = fd64;
err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
- CHECK(err == -1, "reuseport array update 32 bit fd",
+ CHECK(err < 0, "reuseport array update 32 bit fd",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOSPC,
+ CHECK(err >= 0 || errno != ENOSPC,
"reuseport array lookup 32 bit fd",
"err:%d errno:%d\n", err, errno);
close(fd);
@@ -1798,6 +1815,8 @@ int main(void)
{
srand(time(NULL));
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
map_flags = 0;
run_all_tests();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6396932b97e2..6f103106a39b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -737,6 +737,9 @@ int main(int argc, char **argv)
if (err)
return err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
libbpf_set_print(libbpf_print_fn);
srand(time(NULL));
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dda52cb649dc..8ef7f334e715 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_OK_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = !IS_ERR_OR_NULL(___res); \
- CHECK(!___ok, (name), \
- "unexpected error: %ld\n", PTR_ERR(___res)); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err == 0; \
+ CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \
___ok; \
})
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err != 0; \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
deleted file mode 100755
index 8868aa1ca902..000000000000
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
-# between src and dst. The netns fwd has veth links to each src and dst. The
-# client is in src and server in dst. The test installs a TC BPF program to each
-# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
-# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
-# switch from ingress side; it also installs a checker prog on the egress side
-# to drop unexpected traffic.
-
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-# check that needed tools are present
-command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
- { echo >&2 "dd is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
- { echo >&2 "timeout is not available"; exit 1; }
-command -v ping >/dev/null 2>&1 || \
- { echo >&2 "ping is not available"; exit 1; }
-if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
-command -v perl >/dev/null 2>&1 || \
- { echo >&2 "perl is not available"; exit 1; }
-command -v jq >/dev/null 2>&1 || \
- { echo >&2 "jq is not available"; exit 1; }
-command -v bpftool >/dev/null 2>&1 || \
- { echo >&2 "bpftool is not available"; exit 1; }
-
-readonly GREEN='\033[0;92m'
-readonly RED='\033[0;31m'
-readonly NC='\033[0m' # No Color
-
-readonly PING_ARG="-c 3 -w 10 -q"
-
-readonly TIMEOUT=10
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP4_SRC="172.16.1.100"
-readonly IP4_DST="172.16.2.100"
-
-readonly IP6_SRC="::1:dead:beef:cafe"
-readonly IP6_DST="::2:dead:beef:cafe"
-
-readonly IP4_SLL="169.254.0.1"
-readonly IP4_DLL="169.254.0.2"
-readonly IP4_NET="169.254.0.0"
-
-netns_cleanup()
-{
- ip netns del ${NS_SRC}
- ip netns del ${NS_FWD}
- ip netns del ${NS_DST}
-}
-
-netns_setup()
-{
- ip netns add "${NS_SRC}"
- ip netns add "${NS_FWD}"
- ip netns add "${NS_DST}"
-
- ip link add veth_src type veth peer name veth_src_fwd
- ip link add veth_dst type veth peer name veth_dst_fwd
-
- ip link set veth_src netns ${NS_SRC}
- ip link set veth_src_fwd netns ${NS_FWD}
-
- ip link set veth_dst netns ${NS_DST}
- ip link set veth_dst_fwd netns ${NS_FWD}
-
- ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
- ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
-
- # The fwd netns automatically get a v6 LL address / routes, but also
- # needs v4 one in order to start ARP probing. IP4_NET route is added
- # to the endpoints so that the ARP processing will reply.
-
- ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
- ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
-
- ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
- ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
-
- ip -netns ${NS_SRC} link set dev veth_src up
- ip -netns ${NS_FWD} link set dev veth_src_fwd up
-
- ip -netns ${NS_DST} link set dev veth_dst up
- ip -netns ${NS_FWD} link set dev veth_dst_fwd up
-
- ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
- ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
- ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
-
- ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
- ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
-
- ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
- ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
- ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
-
- ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
- ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
-
- fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
- fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
-
- ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
- ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
-
- ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
- ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
-}
-
-netns_test_connectivity()
-{
- set +e
-
- ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
- ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
-
- TEST="TCPv4 connectivity test"
- ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="TCPv6 connectivity test"
- ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="ICMPv4 connectivity test"
- ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="ICMPv6 connectivity test"
- ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- set -e
-}
-
-hex_mem_str()
-{
- perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
-}
-
-netns_setup_bpf()
-{
- local obj=$1
- local use_forwarding=${2:-0}
-
- ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
-
- ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
-
- if [ "$use_forwarding" -eq "1" ]; then
- # bpf_fib_lookup() checks if forwarding is enabled
- ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
- ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
- ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
- return 0
- fi
-
- veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
- veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
-
- progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
- for prog in $progs; do
- map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
- if [ ! -z "$map" ]; then
- bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
- bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
- fi
- done
-}
-
-trap netns_cleanup EXIT
-set -e
-
-netns_setup
-netns_setup_bpf test_tc_neigh.o
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_neigh_fib.o 1
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_peer.o
-netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 73da7fe8c152..4a39304cc5a6 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -82,6 +82,8 @@ int main(int argc, char **argv)
cpu_set_t cpuset;
__u32 key = 0;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
@@ -116,7 +118,7 @@ int main(int argc, char **argv)
pb_opts.sample_cb = dummyfn;
pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
- if (IS_ERR(pb))
+ if (!pb)
goto err;
pthread_create(&tid, NULL, poller_thread, pb);
@@ -163,7 +165,6 @@ err:
bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
close(cg_fd);
cleanup_cgroup_environment();
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
return error;
}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 1512092e1e68..3a9e332c5e36 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
}
- if (test->insn_processed) {
+ if (!unpriv && test->insn_processed) {
uint32_t insn_processed;
char *proc;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
new file mode 100755
index 000000000000..1538373157e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+# - - - - - - - - - - - - - - - - - - - - - - - - -
+# | veth1 veth2 veth3 | ... init net
+# - -| - - - - - - | - - - - - - | - -
+# --------- --------- ---------
+# | veth0 | | veth0 | | veth0 | ...
+# --------- --------- ---------
+# ns1 ns2 ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+# the redirects.
+# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+# interface should not receive the redirects.
+# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+# IPv6: Testing none flag, all the pkts should be redirected back
+# ping test: ns1 -> ns2 (block), echo requests will be redirect back
+# egress_prog:
+# all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+
+test_pass()
+{
+ echo "Pass: $@"
+ PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+ echo "fail: $@"
+ FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+ for i in $(seq $NUM); do
+ ip link del veth$i 2> /dev/null
+ ip netns del ns$i 2> /dev/null
+ done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+ ip link set dev lo xdpgeneric off &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+ exit 4
+ fi
+
+ which tcpdump &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+}
+
+setup_ns()
+{
+ local mode=$1
+ IFACES=""
+
+ if [ "$mode" = "xdpegress" ]; then
+ mode="xdpdrv"
+ fi
+
+ for i in $(seq $NUM); do
+ ip netns add ns$i
+ ip link add veth$i type veth peer name veth0 netns ns$i
+ ip link set veth$i up
+ ip -n ns$i link set veth0 up
+
+ ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+ ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+ # Add a neigh entry for IPv4 ping test
+ ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ns$i link set veth0 $mode obj \
+ xdp_dummy.o sec xdp_dummy &> /dev/null || \
+ { test_fail "Unable to load dummy xdp" && exit 1; }
+ IFACES="$IFACES veth$i"
+ veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
+ done
+}
+
+do_egress_tests()
+{
+ local mode=$1
+
+ # mac test
+ ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+ ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
+ sleep 0.5
+ ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ sleep 0.5
+ pkill -9 tcpdump
+
+ # mac check
+ grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
+ test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+ grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
+ test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+ local mode=$1
+
+ # ping6 test: echo request should be redirect back to itself, not others
+ ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+ ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+ ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+ ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
+ sleep 0.5
+ # ARP test
+ ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ # IPv4 test
+ ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ # IPv6 test
+ ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ sleep 0.5
+ pkill -9 tcpdump
+
+ # All netns should receive the redirect arp requests
+ [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-1"
+ [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-2"
+ [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+ # ns1 should not receive the redirect echo request, others should
+ [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+ [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+ [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+ # ns1 should receive the echo request, ns2 should not
+ [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-1" || \
+ test_fail "$mode IPv6 (no flags) ns1-1"
+ [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-2" || \
+ test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+ local mode=$1
+ local drv_p
+
+ case ${mode} in
+ xdpdrv) drv_p="-N";;
+ xdpegress) drv_p="-X";;
+ xdpgeneric) drv_p="-S";;
+ esac
+
+ ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
+ xdp_pid=$!
+ sleep 1
+
+ if [ "$mode" = "xdpegress" ]; then
+ do_egress_tests $mode
+ else
+ do_ping_tests $mode
+ fi
+
+ kill $xdp_pid
+}
+
+trap clean_up 0 2 3 6 9
+
+check_env
+rm -f xdp_redirect_*.log ns*.log mac_ns*.log
+
+for mode in ${DRV_MODE}; do
+ setup_ns $mode
+ do_tests $mode
+ clean_up
+done
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index ca8fdb1b3f01..7d7ebee5cc7a 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -61,6 +61,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 0
},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 8a1caf46ffbc..e061e8799ce2 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -508,6 +508,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT
},
{
@@ -528,6 +530,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT
},
{
@@ -569,6 +573,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -589,6 +595,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -609,6 +617,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -674,6 +684,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
@@ -695,6 +707,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+ .result_unpriv = REJECT,
.fixup_map_hash_8b = { 3 },
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 17fe33a75034..2c8935b3e65d 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -8,6 +8,8 @@
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bd5cae4a7f73..1c857b2fbdf0 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -87,6 +87,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -150,6 +152,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -213,6 +217,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -280,6 +286,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -348,6 +356,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -416,6 +426,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -484,6 +496,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -552,6 +566,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -620,6 +636,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -688,6 +706,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
@@ -756,6 +776,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 8dcd4e0383d5..11fc68da735e 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -82,8 +82,8 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .retval_unpriv = 1,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.retval = 1,
.result = ACCEPT,
},
@@ -141,7 +141,8 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -162,6 +163,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
index 07eaa04412ae..8ab94d65f3d5 100644
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c
@@ -295,8 +295,6 @@
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
BPF_EXIT_INSN(),
},
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid write to stack R1 off=0 size=1",
.result = ACCEPT,
.retval = 42,
},
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index bd436df5cc32..111801aea5e3 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -420,6 +420,8 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R7 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 0,
},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index e5913fd3b903..a3e593ddfafc 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -120,7 +120,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+ .errstr_unpriv = "R2 pointer comparison prohibited",
.retval = 0,
},
{
@@ -159,7 +159,8 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
// fake-dead code; targeted from branch A to
- // prevent dead code sanitization
+ // prevent dead code sanitization, rejected
+ // via branch B however
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
@@ -167,7 +168,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
.retval = 0,
},
{
@@ -300,8 +301,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
@@ -371,8 +370,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
@@ -472,8 +469,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
@@ -766,8 +761,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
new file mode 100644
index 000000000000..3696a8f32c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+ __u32 prog_id = 0;
+ int i;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+ printf("bpf_get_link_xdp_id failed\n");
+ exit(1);
+ }
+ if (prog_id)
+ bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+ }
+
+ exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr;
+ int fd, ret = -1;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return ret;
+
+ if (!if_indextoname(ifindex, ifname))
+ goto err_out;
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+ goto err_out;
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+ ret = 0;
+
+err_out:
+ close(fd);
+ return ret;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+ "OPTS:\n"
+ " -S use skb-mode\n"
+ " -N enforce native mode\n"
+ " -F force loading prog\n"
+ " -X load xdp program on egress\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ int prog_fd, group_all, mac_map;
+ struct bpf_program *ingress_prog, *egress_prog;
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+ int i, ret, opt, egress_prog_fd = 0;
+ struct bpf_devmap_val devmap_val;
+ bool attach_egress_prog = false;
+ unsigned char mac_addr[6];
+ char ifname[IF_NAMESIZE];
+ struct bpf_object *obj;
+ unsigned int ifindex;
+ char filename[256];
+
+ while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+ switch (opt) {
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'N':
+ /* default, set below */
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
+ case 'X':
+ attach_egress_prog = true;
+ break;
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ } else if (attach_egress_prog) {
+ printf("Load xdp program on egress with SKB mode not supported yet\n");
+ goto err_out;
+ }
+
+ if (optind == argc) {
+ printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+ goto err_out;
+ }
+
+ printf("Get interfaces");
+ for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+ ifaces[i] = if_nametoindex(argv[optind + i]);
+ if (!ifaces[i])
+ ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+ if (!if_indextoname(ifaces[i], ifname)) {
+ perror("Invalid interface name or i");
+ goto err_out;
+ }
+ if (ifaces[i] > MAX_INDEX_NUM) {
+ printf("Interface index to large\n");
+ goto err_out;
+ }
+ printf(" %d", ifaces[i]);
+ }
+ printf("\n");
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = filename;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ goto err_out;
+
+ if (attach_egress_prog)
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+ else
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+ mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+ if (group_all < 0 || mac_map < 0) {
+ printf("bpf_object__find_map_fd_by_name failed\n");
+ goto err_out;
+ }
+
+ if (attach_egress_prog) {
+ /* Find ingress/egress prog for 2nd xdp prog */
+ ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+ egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+ if (!ingress_prog || !egress_prog) {
+ printf("finding ingress/egress_prog in obj file failed\n");
+ goto err_out;
+ }
+ prog_fd = bpf_program__fd(ingress_prog);
+ egress_prog_fd = bpf_program__fd(egress_prog);
+ if (prog_fd < 0 || egress_prog_fd < 0) {
+ printf("find egress_prog fd failed\n");
+ goto err_out;
+ }
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ /* Init forward multicast groups and exclude group */
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ if (attach_egress_prog) {
+ ret = get_mac_addr(ifindex, mac_addr);
+ if (ret < 0) {
+ printf("get interface %d mac failed\n", ifindex);
+ goto err_out;
+ }
+ ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+ if (ret) {
+ perror("bpf_update_elem mac_map failed\n");
+ goto err_out;
+ }
+ }
+
+ /* Add all the interfaces to group all */
+ devmap_val.ifindex = ifindex;
+ devmap_val.bpf_prog.fd = egress_prog_fd;
+ ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+ if (ret) {
+ perror("bpf_map_update_elem");
+ goto err_out;
+ }
+
+ /* bind prog_fd to each interface */
+ ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+ if (ret) {
+ printf("Set xdp fd failed on %d\n", ifindex);
+ goto err_out;
+ }
+ }
+
+ /* sleep some time for testing */
+ sleep(999);
+
+ return 0;
+
+err_out:
+ return 1;
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 4029833f7e27..160891dcb4bc 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -109,6 +109,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 42d44e27802c..190c1b6b5365 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -111,6 +111,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index 65f43a7ce9c9..1e9a4aff76a2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -7,6 +7,8 @@
PORT_NUM_NETIFS=0
+declare -a unsplit
+
port_setup_prepare()
{
:
@@ -20,12 +22,12 @@ port_cleanup()
devlink port unsplit $port
check_err $? "Did not unsplit $netdev"
done
+ unsplit=()
}
split_all_ports()
{
local should_fail=$1; shift
- local -a unsplit
# Loop over the splittable netdevs and create tuples of netdev along
# with its width. For example:
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 5cbff8038f84..28a570006d4d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -93,7 +93,9 @@ switch_destroy()
lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
lldpad_app_wait_del
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index 27de3d9ed08e..f4493ef9cca1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -29,37 +29,38 @@ cleanup()
get_prio_pg()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+ # of buffer that priority X is mapped to.
+ dcb -j buffer show dev $swp |
+ jq -r '[.prio_buffer | .[] | tostring + " "] | add'
}
get_prio_pfc()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+ # whether priority X has PFC enabled (the value is 1) or disabled (0).
+ dcb -j pfc show dev $swp |
+ jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
}
get_prio_tc()
{
- __mlnx_qos -i $swp | sed -n '/^tc/,$p' |
- awk '/^tc/ { TC = $2 }
- /priority:/ { PRIO[$2]=TC }
- END {
- for (i in PRIO)
- printf("%d ", PRIO[i])
- }'
+ # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+ # of TC that priority X is mapped to.
+ dcb -j ets show dev $swp |
+ jq -r '[.prio_tc | .[] | tostring + " "] | add'
}
get_buf_size()
{
local idx=$1; shift
- __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+ dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
}
get_tot_size()
{
- __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+ dcb -j buffer show dev $swp | jq '.total_size'
}
check_prio_pg()
@@ -121,18 +122,18 @@ test_dcb_ets()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
check_prio_pg "0 2 4 6 1 3 5 7 "
check_prio_tc "0 2 4 6 1 3 5 7 "
check_prio_pfc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
check_fail $? "prio2buffer accepted in DCB mode"
log_test "Configuring headroom through ETS"
@@ -174,7 +175,7 @@ test_pfc()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+ dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
local buf0size=$(get_buf_size 0)
local buf1size=$(get_buf_size 1)
@@ -193,7 +194,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
check_prio_pg "0 0 0 0 0 1 2 3 "
check_prio_pfc "0 0 0 0 0 1 1 1 "
@@ -210,7 +211,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+ dcb pfc set dev $swp delay 1000
check_buf_size 0 "== $buf0size"
check_buf_size 1 "> $buf1size"
@@ -221,8 +222,8 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off delay 0
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
@@ -242,13 +243,13 @@ test_tc_priomap()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
check_prio_pg "0 1 2 3 4 5 6 7 "
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
check_prio_pg "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
check_prio_pg "1 3 5 7 0 2 4 6 "
tc qdisc delete dev $swp root
@@ -256,9 +257,9 @@ test_tc_priomap()
# Clean up.
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp prio-buffer all:0
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
log_test "TC: priomap"
}
@@ -270,12 +271,12 @@ test_tc_sizes()
RET=0
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail before qdisc is added"
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_err $? "buffer_size should pass after qdisc is added"
check_buf_size 0 "== $size" "set size: "
@@ -283,26 +284,26 @@ test_tc_sizes()
check_buf_size 0 "== $size" "set MTU: "
mtu_restore $swp
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# After replacing the qdisc for the same kind, buffer_size still has to
# work.
tc qdisc replace dev $swp root handle 1: bfifo limit 1M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace, set size: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# Likewise after replacing for a different kind.
tc qdisc replace dev $swp root handle 2: prio bands 8
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace different kind, set size: "
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail after qdisc is deleted"
log_test "TC: buffer size"
@@ -363,10 +364,10 @@ test_tc_int_buf()
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
test_int_buf "TC: "
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
test_int_buf "TC+buffsize: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
tc qdisc delete dev $swp root
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index 0bf76f13c030..faa51012cdac 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,17 +82,3 @@ bail_on_lldpad()
fi
fi
}
-
-__mlnx_qos()
-{
- local err
-
- mlnx_qos "$@" 2>/dev/null
- err=$?
-
- if ((err)); then
- echo "Error ($err) in mlnx_qos $@" >/dev/stderr
- fi
-
- return $err
-}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5c7700212f75..5d5622fc2758 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -171,7 +171,7 @@ switch_create()
# assignment.
tc qdisc replace dev $swp1 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
# $swp2
# -----
@@ -209,8 +209,8 @@ switch_create()
# the lossless prio into a buffer of its own. Don't bother with buffer
# sizes though, there is not going to be any pressure in the "backward"
# direction.
- __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp3 prio-buffer all:0 1:1
+ dcb pfc set dev $swp3 prio-pfc all:off 1:on
# $swp4
# -----
@@ -226,11 +226,11 @@ switch_create()
# Configure qdisc so that we can hand-tune headroom.
tc qdisc replace dev $swp4 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 prio-buffer all:0 1:1
+ dcb pfc set dev $swp4 prio-pfc all:off 1:on
# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
# is (-2*MTU) about 80K of delay provision.
- __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB
# bridges
# -------
@@ -273,9 +273,9 @@ switch_destroy()
# $swp4
# -----
- __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0
+ dcb pfc set dev $swp4 prio-pfc all:off
+ dcb buffer set dev $swp4 prio-buffer all:0
tc qdisc del dev $swp4 root
devlink_tc_bind_pool_th_restore $swp4 1 ingress
@@ -288,8 +288,8 @@ switch_destroy()
# $swp3
# -----
- __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb pfc set dev $swp3 prio-pfc all:off
+ dcb buffer set dev $swp3 prio-buffer all:0
tc qdisc del dev $swp3 root
devlink_tc_bind_pool_th_restore $swp3 1 egress
@@ -315,7 +315,7 @@ switch_destroy()
# $swp1
# -----
- __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0
tc qdisc del dev $swp1 root
devlink_tc_bind_pool_th_restore $swp1 1 ingress
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index e93878d42596..683759d29199 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -68,7 +68,7 @@ wait_for_routes()
local t0=$1; shift
local route_count=$1; shift
- local t1=$(ip route | grep -o 'offload' | wc -l)
+ local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
local delta=$((t1 - t0))
echo $delta
[[ $delta -ge $route_count ]]
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 093bed088ad0..373d5f2a846e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -234,15 +234,15 @@ __tc_sample_rate_test()
psample_capture_start
- ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B $dip -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
- pct=$((100 * (pkts - 100) / 100))
+ pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
- check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
log_test "tc sample rate ($desc)"
@@ -587,15 +587,15 @@ __tc_sample_acl_rate_test()
psample_capture_start
- ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
- pct=$((100 * (pkts - 100) / 100))
+ pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
- check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
# Setup a filter that should not match any packet and make sure packets
# are not sampled.
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 40909c254365..9de1d123f4f5 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="fw_flash_test params_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
- empty_reporter_test dummy_reporter_test"
+ empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
source $lib_dir/lib.sh
BUS_ADDR=10
PORT_COUNT=4
+VF_COUNT=4
DEV_NAME=netdevsim$BUS_ADDR
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
@@ -507,6 +508,170 @@ dummy_reporter_test()
log_test "dummy reporter test"
}
+rate_leafs_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_nodes_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
+rate_attr_set()
+{
+ local handle=$1
+ local name=$2
+ local value=$3
+ local units=$4
+
+ devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+ local handle=$1
+ local name=$2
+
+ cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+ local handle=$1
+ local name=$2
+ local rate=$3
+ local debug_file=$4
+
+ rate_attr_set $handle $name $rate mbit
+ check_err $? "Failed to set $name value"
+
+ local debug_value=$(cat $debug_file)
+ check_err $? "Failed to read $name value from debugfs"
+ [ "$debug_value" == "$rate" ]
+ check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+ local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+ check_err $? "Failed to get $name attr value"
+ [ "$api_value" == "$rate" ]
+ check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
+rate_attr_parent_check()
+{
+ local handle=$1
+ local parent=$2
+ local debug_file=$3
+
+ rate_attr_set $handle parent $parent
+ check_err $? "Failed to set parent"
+
+ debug_value=$(cat $debug_file)
+ check_err $? "Failed to get parent debugfs value"
+ [ "$debug_value" == "$parent" ]
+ check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+ api_value=$(rate_attr_get $r_obj parent)
+ check_err $? "Failed to get parent attr value"
+ [ "$api_value" == "$parent" ]
+ check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
+rate_node_add()
+{
+ local handle=$1
+
+ devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+ local handle=$1
+
+ devlink port function rate del $handle
+}
+
+rate_test()
+{
+ RET=0
+
+ echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+ devlink dev eswitch set $DL_HANDLE mode switchdev
+ local leafs=`rate_leafs_get $DL_HANDLE`
+ local num_leafs=`echo $leafs | wc -w`
+ [ "$num_leafs" == "$VF_COUNT" ]
+ check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+ rate=10
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_share $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+ rate=$(($rate+10))
+ done
+
+ rate=100
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_max $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+ rate=$(($rate+100))
+ done
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 1 ]
+ check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+ local node_tx_share=10
+ rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+ local node_tx_max=100
+ rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 0 ]
+ check_err $? "Expected 0 rate node but got $num_nodes"
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ rate_attr_parent_check $r_obj $node1_name \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+ local node2_name='group2'
+ local node2="$DL_HANDLE/$node2_name"
+ rate_node_add "$node2"
+ check_err $? "Failed to add node $node2"
+
+ rate_attr_parent_check $node2 $node1_name \
+ $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+ rate_node_del "$node2"
+ check_err $? "Failed to delete node $node2"
+ rate_attr_set "$r_obj" noparent
+ check_err $? "Failed to unset $r_obj parent node"
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+
+ log_test "rate test"
+}
+
setup_prepare()
{
modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index da49ad2761b5..109900c817be 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -24,13 +24,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
require_command udevadm
@@ -163,6 +165,16 @@ trap_stats_test()
devlink_trap_action_set $trap_name "drop"
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+ devlink_trap_drop_stats_idle_test $trap_name
+ check_fail $? "Drop stats of trap $trap_name idle when should not"
else
devlink_trap_stats_idle_test $trap_name
check_fail $? "Stats of non-drop trap $trap_name idle when should not"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 251f228ce63e..fc794cd30389 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -33,13 +33,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
NUM_NETIFS=0
source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index ba75c81cda91..e8e0dc088d6a 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -44,12 +44,14 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
nexthop_check()
{
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
index ee10b1a8933c..e689ff7a0b12 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -14,13 +14,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
CAPTURE_FILE=$(mktemp)
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
# Available at https://github.com/Mellanox/libpsample
require_command psample
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index cf69b2fcce59..dd61118df66e 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
$(OUTPUT)/load_address_4096: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
$(OUTPUT)/load_address_2097152: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
$(OUTPUT)/load_address_16777216: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494daab..0e78b49d0f2f 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,5 @@ futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
+futex_wait
+futex_requeue
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829ec75..bd1fec59e010 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../
+INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \
+ -I$(KBUILD_OUTPUT)/kselftest/usr/include
CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
LDLIBS := -lpthread -lrt
@@ -14,7 +15,9 @@ TEST_GEN_FILES := \
futex_requeue_pi_signal_restart \
futex_requeue_pi_mismatched_ops \
futex_wait_uninitialized_heap \
- futex_wait_private_mapped_file
+ futex_wait_private_mapped_file \
+ futex_wait \
+ futex_requeue
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
new file mode 100644
index 000000000000..51485be6eb2f
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <limits.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-requeue"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+
+volatile futex_t *f1;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(f1, *f1, &to, 0))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t waiter[10];
+ int res, ret = RET_PASS;
+ int c, i;
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+
+ f1 = &_f1;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(2);
+ ksft_print_msg("%s: Test futex_requeue\n",
+ basename(argv[0]));
+
+ /*
+ * Requeue a waiter from f1 to f2, and wake f2.
+ */
+ if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Requeuing 1 futex from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+
+ info("Waking 1 futex at f2\n");
+ res = futex_wake(&f2, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue simple succeeds\n");
+ }
+
+
+ /*
+ * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
+ * At futex_wake, wake INT_MAX (should be exactly 7).
+ */
+ for (i = 0; i < 10; i++) {
+ if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+ }
+
+ usleep(WAKE_WAIT_US);
+
+ info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
+ if (res != 10) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ info("Waking INT_MAX futexes at f2\n");
+ res = futex_wake(&f2, INT_MAX, 0);
+ if (res != 7) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue many succeeds\n");
+ }
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
new file mode 100644
index 000000000000..685140d9b93d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+#define SHM_PATH "futex_shm_file"
+
+void *futex;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+static void *waiterfn(void *arg)
+{
+ struct timespec to;
+ unsigned int flags = 0;
+
+ if (arg)
+ flags = *((unsigned int *) arg);
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(futex, 0, &to, flags))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ int res, ret = RET_PASS, fd, c, shm_id;
+ u_int32_t f_private = 0, *shared_data;
+ unsigned int flags = FUTEX_PRIVATE_FLAG;
+ pthread_t waiter;
+ void *shm;
+
+ futex = &f_private;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(3);
+ ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
+
+ /* Testing a private futex */
+ info("Calling private futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling private futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake private succeeds\n");
+ }
+
+ /* Testing an anon page shared memory */
+ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ futex = shared_data;
+
+ info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
+ }
+
+
+ /* Testing a file backed shared memory */
+ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ if (ftruncate(fd, sizeof(f_private))) {
+ perror("ftruncate");
+ exit(1);
+ }
+
+ shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (shm == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memcpy(shm, &f_private, sizeof(f_private));
+
+ futex = shm;
+
+ info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ res = futex_wake(shm, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
+ }
+
+ /* Freeing resources */
+ shmdt(shared_data);
+ munmap(shm, sizeof(f_private));
+ remove(SHM_PATH);
+ close(fd);
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d389a3..1f8f6daaf1e7 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,21 +11,18 @@
*
* HISTORY
* 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ * 2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com>
*
*****************************************************************************/
-#include <errno.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#include <pthread.h>
#include "futextest.h"
#include "logging.h"
#define TEST_NAME "futex-wait-timeout"
static long timeout_ns = 100000; /* 100us default timeout */
+static futex_t futex_pi;
void usage(char *prog)
{
@@ -37,11 +34,67 @@ void usage(char *prog)
VQUIET, VCRITICAL, VINFO);
}
+/*
+ * Get a PI lock and hold it forever, so the main thread lock_pi will block
+ * and we can test the timeout
+ */
+void *get_pi_lock(void *arg)
+{
+ int ret;
+ volatile futex_t lock = 0;
+
+ ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
+ if (ret != 0)
+ error("futex_lock_pi failed\n", ret);
+
+ /* Blocks forever */
+ ret = futex_wait(&lock, 0, NULL, 0);
+ error("futex_wait failed\n", ret);
+
+ return NULL;
+}
+
+/*
+ * Check if the function returned the expected error
+ */
+static void test_timeout(int res, int *ret, char *test_name, int err)
+{
+ if (!res || errno != err) {
+ ksft_test_result_fail("%s returned %d\n", test_name,
+ res < 0 ? errno : res);
+ *ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("%s succeeds\n", test_name);
+ }
+}
+
+/*
+ * Calculate absolute timeout and correct overflow
+ */
+static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
+ long timeout_ns)
+{
+ if (clock_gettime(clockid, to)) {
+ error("clock_gettime failed\n", errno);
+ return errno;
+ }
+
+ to->tv_nsec += timeout_ns;
+
+ if (to->tv_nsec >= 1000000000) {
+ to->tv_sec++;
+ to->tv_nsec -= 1000000000;
+ }
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
futex_t f1 = FUTEX_INITIALIZER;
- struct timespec to;
int res, ret = RET_PASS;
+ struct timespec to;
+ pthread_t thread;
int c;
while ((c = getopt(argc, argv, "cht:v:")) != -1) {
@@ -65,22 +118,63 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(7);
ksft_print_msg("%s: Block on a futex and wait for timeout\n",
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
- /* initialize timeout */
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+ /* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
- info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
- res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != ETIMEDOUT) {
- fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- ret = RET_FAIL;
- }
+ res = futex_wait(&f1, f1, &to, 0);
+ test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, 0);
+ test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+ test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+ /*
+ * FUTEX_LOCK_PI with CLOCK_REALTIME
+ * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+ * clock, but requires the caller to not set CLOCK_REALTIME flag.
+ *
+ * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be
+ * interpreted as a realtime clock, and (unless you mess your machine's
+ * time or your time machine) the monotonic clock value is always
+ * smaller than realtime and the syscall will timeout immediately.
+ */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_lock_pi(&futex_pi, &to, 0, 0);
+ test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+
+ /* Test operations that don't support FUTEX_CLOCK_REALTIME */
+ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
- print_result(TEST_NAME, ret);
+ ksft_print_cnts();
return ret;
}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1680..11a9d62290f5 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,9 @@ echo
echo
./futex_wait_uninitialized_heap $COLOR
./futex_wait_private_mapped_file $COLOR
+
+echo
+./futex_wait $COLOR
+
+echo
+./futex_requeue $COLOR
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index bd83158e0e0b..06a351b4f93b 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/debug-exceptions
/aarch64/get-reg-list
-/aarch64/get-reg-list-sve
/aarch64/vgic_init
/s390x/memop
/s390x/resets
@@ -8,12 +8,15 @@
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
+/x86_64/emulator_error_test
/x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_pv_test
/x86_64/hyperv_clock
/x86_64/hyperv_cpuid
+/x86_64/hyperv_features
/x86_64/mmio_warning_test
+/x86_64/mmu_role_test
/x86_64/platform_info_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
@@ -29,6 +32,7 @@
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
+/x86_64/vmx_nested_tsc_scaling_test
/x86_64/xapic_ipi_test
/x86_64/xen_shinfo_test
/x86_64/xen_vmcall_test
@@ -41,5 +45,7 @@
/kvm_create_max_vcpus
/kvm_page_table_test
/memslot_modification_stress_test
+/memslot_perf_test
/set_memory_region_test
/steal_time
+/kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index e439d027939d..b853be2ae3c6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -33,19 +33,22 @@ ifeq ($(ARCH),s390)
UNAME_M := s390x
endif
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
+LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
+TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -60,6 +63,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
@@ -74,11 +78,13 @@ TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86_64 += memslot_perf_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
+TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
@@ -87,6 +93,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
+TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
@@ -96,6 +103,7 @@ TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += kvm_page_table_test
TEST_GEN_PROGS_s390x += set_memory_region_test
+TEST_GEN_PROGS_s390x += kvm_binary_stats_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644
index 000000000000..e5e6c92b60da
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MDSCR_KDE (1 << 13)
+#define MDSCR_MDE (1 << 15)
+#define MDSCR_SS (1 << 0)
+
+#define DBGBCR_LEN8 (0xff << 5)
+#define DBGBCR_EXEC (0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E (0x1 << 0)
+
+#define DBGWCR_LEN8 (0xff << 5)
+#define DBGWCR_RD (0x1 << 3)
+#define DBGWCR_WR (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E (0x1 << 0)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS (1 << 21)
+
+extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define PC(v) ((uint64_t)&(v))
+
+static void reset_debug_state(void)
+{
+ asm volatile("msr daifset, #8");
+
+ write_sysreg(osdlr_el1, 0);
+ write_sysreg(oslar_el1, 0);
+ isb();
+
+ write_sysreg(mdscr_el1, 0);
+ /* This test only uses the first bp and wp slot. */
+ write_sysreg(dbgbvr0_el1, 0);
+ write_sysreg(dbgbcr0_el1, 0);
+ write_sysreg(dbgwcr0_el1, 0);
+ write_sysreg(dbgwvr0_el1, 0);
+ isb();
+}
+
+static void install_wp(uint64_t addr)
+{
+ uint32_t wcr;
+ uint32_t mdscr;
+
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+ write_sysreg(dbgwcr0_el1, wcr);
+ write_sysreg(dbgwvr0_el1, addr);
+ isb();
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static void install_hw_bp(uint64_t addr)
+{
+ uint32_t bcr;
+ uint32_t mdscr;
+
+ bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+ write_sysreg(dbgbcr0_el1, bcr);
+ write_sysreg(dbgbvr0_el1, addr);
+ isb();
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static void install_ss(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(void)
+{
+ GUEST_SYNC(0);
+
+ /* Software-breakpoint */
+ asm volatile("sw_bp: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+ GUEST_SYNC(1);
+
+ /* Hardware-breakpoint */
+ reset_debug_state();
+ install_hw_bp(PC(hw_bp));
+ asm volatile("hw_bp: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+ GUEST_SYNC(2);
+
+ /* Hardware-breakpoint + svc */
+ reset_debug_state();
+ install_hw_bp(PC(bp_svc));
+ asm volatile("bp_svc: svc #0");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+ GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+ GUEST_SYNC(3);
+
+ /* Hardware-breakpoint + software-breakpoint */
+ reset_debug_state();
+ install_hw_bp(PC(bp_brk));
+ asm volatile("bp_brk: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+ GUEST_SYNC(4);
+
+ /* Watchpoint */
+ reset_debug_state();
+ install_wp(PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ GUEST_SYNC(5);
+
+ /* Single-step */
+ reset_debug_state();
+ install_ss();
+ ss_idx = 0;
+ asm volatile("ss_start:\n"
+ "mrs x0, esr_el1\n"
+ "add x0, x0, #1\n"
+ "msr daifset, #8\n"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+ GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+ GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+ GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+ sw_bp_addr = regs->pc;
+ regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+ hw_bp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+ wp_data_addr = read_sysreg(far_el1);
+ wp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+ GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+ ss_addr[ss_idx++] = regs->pc;
+ regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+ svc_addr = regs->pc;
+}
+
+static int debug_version(struct kvm_vm *vm)
+{
+ uint64_t id_aa64dfr0;
+
+ get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+ return id_aa64dfr0 & 0xf;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (debug_version(vm) < 6) {
+ print_skip("Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+ exit(KSFT_SKIP);
+ }
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_BRK_INS, guest_sw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_WP_CURRENT, guest_wp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SSTEP_CURRENT, guest_ss_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SVC64, guest_svc_handler);
+
+ for (stage = 0; stage < 7; stage++) {
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Stage %d: Unexpected sync ucall, got %lx",
+ stage, (ulong)uc.args[1]);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
deleted file mode 100644
index efba76682b4b..000000000000
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
+++ /dev/null
@@ -1,3 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define REG_LIST_SVE
-#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 486932164cf2..a16c8f05366c 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -27,17 +27,37 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "kvm_util.h"
#include "test_util.h"
#include "processor.h"
-#ifdef REG_LIST_SVE
-#define reg_list_sve() (true)
-#else
-#define reg_list_sve() (false)
-#endif
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+struct reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+};
+
+struct vcpu_config {
+ char *name;
+ struct reg_sublist sublists[];
+};
+
+static struct vcpu_config *vcpu_configs[];
+static int vcpu_configs_n;
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
#define for_each_reg(i) \
for ((i) = 0; (i) < reg_list->n; ++(i))
@@ -54,12 +74,41 @@
for_each_reg_filtered(i) \
if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+static const char *config_name(struct vcpu_config *c)
+{
+ struct reg_sublist *s;
+ int len = 0;
-static struct kvm_reg_list *reg_list;
+ if (c->name)
+ return c->name;
-static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
-static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
-static __u64 *blessed_reg, blessed_n;
+ for_each_sublist(c, s)
+ len += strlen(s->name) + 1;
+
+ c->name = malloc(len);
+
+ len = 0;
+ for_each_sublist(c, s) {
+ if (!strcmp(s->name, "base"))
+ continue;
+ strcat(c->name + len, s->name);
+ len += strlen(s->name) + 1;
+ c->name[len - 1] = '+';
+ }
+ c->name[len - 1] = '\0';
+
+ return c->name;
+}
+
+static bool has_cap(struct vcpu_config *c, long capability)
+{
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability == capability)
+ return true;
+ return false;
+}
static bool filter_reg(__u64 reg)
{
@@ -96,11 +145,13 @@ static const char *str_with_index(const char *template, __u64 index)
return (const char *)str;
}
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
#define CORE_REGS_XX_NR_WORDS 2
#define CORE_SPSR_XX_NR_WORDS 2
#define CORE_FPREGS_XX_NR_WORDS 4
-static const char *core_id_to_str(__u64 id)
+static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
{
__u64 core_off = id & ~REG_MASK, idx;
@@ -111,7 +162,7 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
KVM_REG_ARM_CORE_REG(regs.regs[30]):
idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
case KVM_REG_ARM_CORE_REG(regs.sp):
return "KVM_REG_ARM_CORE_REG(regs.sp)";
@@ -126,12 +177,12 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(spsr[0]) ...
KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
- TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
@@ -139,11 +190,11 @@ static const char *core_id_to_str(__u64 id)
return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
}
- TEST_FAIL("Unknown core reg id: 0x%llx", id);
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
return NULL;
}
-static const char *sve_id_to_str(__u64 id)
+static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
{
__u64 sve_off, n, i;
@@ -153,37 +204,37 @@ static const char *sve_id_to_str(__u64 id)
sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
- TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
switch (sve_off) {
case KVM_REG_ARM64_SVE_ZREG_BASE ...
KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
- "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
case KVM_REG_ARM64_SVE_PREG_BASE ...
KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
- "Unexpected bits set in SVE PREG id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
case KVM_REG_ARM64_SVE_FFR_BASE:
TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
- "Unexpected bits set in SVE FFR id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
return "KVM_REG_ARM64_SVE_FFR(0)";
}
return NULL;
}
-static void print_reg(__u64 id)
+static void print_reg(struct vcpu_config *c, __u64 id)
{
unsigned op0, op1, crn, crm, op2;
const char *reg_size = NULL;
TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
- "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
switch (id & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U8:
@@ -214,17 +265,17 @@ static void print_reg(__u64 id)
reg_size = "KVM_REG_SIZE_U2048";
break;
default:
- TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
}
switch (id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE:
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
break;
case KVM_REG_ARM_DEMUX:
TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
- "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
break;
@@ -235,23 +286,23 @@ static void print_reg(__u64 id)
crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
- "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
break;
case KVM_REG_ARM_FW:
TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
- "Unexpected bits set in FW reg id: 0x%llx", id);
+ "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
break;
case KVM_REG_ARM64_SVE:
- if (reg_list_sve())
- printf("\t%s,\n", sve_id_to_str(id));
+ if (has_cap(c, KVM_CAP_ARM_SVE))
+ printf("\t%s,\n", sve_id_to_str(c, id));
else
- TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+ TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
break;
default:
- TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
}
}
@@ -312,56 +363,58 @@ static void core_reg_fixup(void)
reg_list = tmp;
}
-static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
{
- if (reg_list_sve())
- init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability)
+ init->features[s->feature / 32] |= 1 << (s->feature % 32);
}
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c)
{
+ struct reg_sublist *s;
int feature;
- if (reg_list_sve()) {
- feature = KVM_ARM_VCPU_SVE;
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
}
}
-static void check_supported(void)
+static void check_supported(struct vcpu_config *c)
{
- if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
- fprintf(stderr, "SVE not available, skipping tests\n");
- exit(KSFT_SKIP);
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s) {
+ if (s->capability && !kvm_check_cap(s->capability)) {
+ fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name);
+ exit(KSFT_SKIP);
+ }
}
}
-int main(int ac, char **av)
+static bool print_list;
+static bool print_filtered;
+static bool fixup_core_regs;
+
+static void run_test(struct vcpu_config *c)
{
struct kvm_vcpu_init init = { .target = -1, };
- int new_regs = 0, missing_regs = 0, i;
+ int new_regs = 0, missing_regs = 0, i, n;
int failed_get = 0, failed_set = 0, failed_reject = 0;
- bool print_list = false, print_filtered = false, fixup_core_regs = false;
struct kvm_vm *vm;
- __u64 *vec_regs;
+ struct reg_sublist *s;
- check_supported();
-
- for (i = 1; i < ac; ++i) {
- if (strcmp(av[i], "--core-reg-fixup") == 0)
- fixup_core_regs = true;
- else if (strcmp(av[i], "--list") == 0)
- print_list = true;
- else if (strcmp(av[i], "--list-filtered") == 0)
- print_filtered = true;
- else
- TEST_FAIL("Unknown option: %s\n", av[i]);
- }
+ check_supported(c);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- prepare_vcpu_init(&init);
+ prepare_vcpu_init(c, &init);
aarch64_vcpu_add_default(vm, 0, &init, NULL);
- finalize_vcpu(vm, 0);
+ finalize_vcpu(vm, 0, c);
reg_list = vcpu_get_reg_list(vm, 0);
@@ -374,10 +427,10 @@ int main(int ac, char **av)
__u64 id = reg_list->reg[i];
if ((print_list && !filter_reg(id)) ||
(print_filtered && filter_reg(id)))
- print_reg(id);
+ print_reg(c, id);
}
putchar('\n');
- return 0;
+ return;
}
/*
@@ -396,50 +449,52 @@ int main(int ac, char **av)
.id = reg_list->reg[i],
.addr = (__u64)&addr,
};
+ bool reject_reg = false;
int ret;
ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
if (ret) {
- puts("Failed to get ");
- print_reg(reg.id);
+ printf("%s: Failed to get ", config_name(c));
+ print_reg(c, reg.id);
putchar('\n');
++failed_get;
}
/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
- if (find_reg(rejects_set, rejects_set_n, reg.id)) {
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret != -1 || errno != EPERM) {
- printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
- print_reg(reg.id);
- putchar('\n');
- ++failed_reject;
+ for_each_sublist(c, s) {
+ if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+ reject_reg = true;
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret != -1 || errno != EPERM) {
+ printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+ print_reg(c, reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ break;
}
- continue;
}
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret) {
- puts("Failed to set ");
- print_reg(reg.id);
- putchar('\n');
- ++failed_set;
+ if (!reject_reg) {
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret) {
+ printf("%s: Failed to set ", config_name(c));
+ print_reg(c, reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
}
}
- if (reg_list_sve()) {
- blessed_n = base_regs_n + sve_regs_n;
- vec_regs = sve_regs;
- } else {
- blessed_n = base_regs_n + vregs_n;
- vec_regs = vregs;
- }
-
+ for_each_sublist(c, s)
+ blessed_n += s->regs_n;
blessed_reg = calloc(blessed_n, sizeof(__u64));
- for (i = 0; i < base_regs_n; ++i)
- blessed_reg[i] = base_regs[i];
- for (i = 0; i < blessed_n - base_regs_n; ++i)
- blessed_reg[base_regs_n + i] = vec_regs[i];
+
+ n = 0;
+ for_each_sublist(c, s) {
+ for (i = 0; i < s->regs_n; ++i)
+ blessed_reg[n++] = s->regs[i];
+ }
for_each_new_reg(i)
++new_regs;
@@ -448,40 +503,141 @@ int main(int ac, char **av)
++missing_regs;
if (new_regs || missing_regs) {
- printf("Number blessed registers: %5lld\n", blessed_n);
- printf("Number registers: %5lld\n", reg_list->n);
+ printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+ printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n);
}
if (new_regs) {
- printf("\nThere are %d new registers.\n"
+ printf("\n%s: There are %d new registers.\n"
"Consider adding them to the blessed reg "
- "list with the following lines:\n\n", new_regs);
+ "list with the following lines:\n\n", config_name(c), new_regs);
for_each_new_reg(i)
- print_reg(reg_list->reg[i]);
+ print_reg(c, reg_list->reg[i]);
putchar('\n');
}
if (missing_regs) {
- printf("\nThere are %d missing registers.\n"
- "The following lines are missing registers:\n\n", missing_regs);
+ printf("\n%s: There are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", config_name(c), missing_regs);
for_each_missing_reg(i)
- print_reg(blessed_reg[i]);
+ print_reg(c, blessed_reg[i]);
putchar('\n');
}
TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
- "There are %d missing registers; "
+ "%s: There are %d missing registers; "
"%d registers failed get; %d registers failed set; %d registers failed reject",
- missing_regs, failed_get, failed_set, failed_reject);
+ config_name(c), missing_regs, failed_get, failed_set, failed_reject);
- return 0;
+ pr_info("%s: PASS\n", config_name(c));
+ blessed_n = 0;
+ free(blessed_reg);
+ free(reg_list);
+ kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+ struct vcpu_config *c;
+ int i;
+
+ printf(
+ "\n"
+ "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
+ " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
+ " '<selection>' may be\n");
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ printf(
+ " '%s'\n", config_name(c));
+ }
+
+ printf(
+ "\n"
+ " --list Print the register list rather than test it (requires --config)\n"
+ " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
+ " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n"
+ "\n"
+ );
+}
+
+static struct vcpu_config *parse_config(const char *config)
+{
+ struct vcpu_config *c;
+ int i;
+
+ if (config[8] != '=')
+ help(), exit(1);
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (strcmp(config_name(c), &config[9]) == 0)
+ break;
+ }
+
+ if (i == vcpu_configs_n)
+ help(), exit(1);
+
+ return c;
+}
+
+int main(int ac, char **av)
+{
+ struct vcpu_config *c, *sel = NULL;
+ int i, ret = 0;
+ pid_t pid;
+
+ for (i = 1; i < ac; ++i) {
+ if (strcmp(av[i], "--core-reg-fixup") == 0)
+ fixup_core_regs = true;
+ else if (strncmp(av[i], "--config", 8) == 0)
+ sel = parse_config(av[i]);
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
+ else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+ help(), exit(0);
+ else
+ help(), exit(1);
+ }
+
+ if (print_list || print_filtered) {
+ /*
+ * We only want to print the register list of a single config.
+ */
+ if (!sel)
+ help(), exit(1);
+ }
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (sel && c != sel)
+ continue;
+
+ pid = fork();
+
+ if (!pid) {
+ run_test(c);
+ exit(0);
+ } else {
+ int wstatus;
+ pid_t wpid = wait(&wstatus);
+ TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+ if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+ ret = KSFT_FAIL;
+ }
+ }
+
+ return ret;
}
/*
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
*
- * The blessed list is up to date with kernel version v5.10-rc5
+ * The blessed list is up to date with kernel version v5.13-rc3
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -673,8 +829,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
@@ -683,6 +837,16 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
@@ -692,8 +856,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 8, 0),
ARM64_SYS_REG(3, 3, 14, 8, 1),
ARM64_SYS_REG(3, 3, 14, 8, 2),
@@ -757,11 +919,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 14, 15, 5),
ARM64_SYS_REG(3, 3, 14, 15, 6),
ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
- ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
};
-static __u64 base_regs_n = ARRAY_SIZE(base_regs);
static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
@@ -797,7 +955,6 @@ static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
};
-static __u64 vregs_n = ARRAY_SIZE(vregs);
static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_VLS,
@@ -852,11 +1009,57 @@ static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_FFR(0),
ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
};
-static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
-static __u64 rejects_set[] = {
-#ifdef REG_LIST_SVE
+static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
-#endif
};
-static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
+
+#define BASE_SUBLIST \
+ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+ { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+
+static struct vcpu_config vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_config *vcpu_configs[] = {
+ &vregs_config,
+ &vregs_pmu_config,
+ &sve_config,
+ &sve_pmu_config,
+};
+static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 5f7a229c3af1..b74704305835 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -9,6 +9,7 @@
#define _GNU_SOURCE /* for pipe2 */
+#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -38,6 +39,7 @@
static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static size_t demand_paging_size;
static char *guest_data_prototype;
static void *vcpu_worker(void *data)
@@ -71,36 +73,51 @@ static void *vcpu_worker(void *data)
return NULL;
}
-static int handle_uffd_page_request(int uffd, uint64_t addr)
+static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
{
- pid_t tid;
+ pid_t tid = syscall(__NR_gettid);
struct timespec start;
struct timespec ts_diff;
- struct uffdio_copy copy;
int r;
- tid = syscall(__NR_gettid);
+ clock_gettime(CLOCK_MONOTONIC, &start);
- copy.src = (uint64_t)guest_data_prototype;
- copy.dst = addr;
- copy.len = perf_test_args.host_page_size;
- copy.mode = 0;
+ if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
+ struct uffdio_copy copy;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ copy.src = (uint64_t)guest_data_prototype;
+ copy.dst = addr;
+ copy.len = demand_paging_size;
+ copy.mode = 0;
- r = ioctl(uffd, UFFDIO_COPY, &copy);
- if (r == -1) {
- pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
- addr, tid, errno);
- return r;
+ r = ioctl(uffd, UFFDIO_COPY, &copy);
+ if (r == -1) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
+ }
+ } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ struct uffdio_continue cont = {0};
+
+ cont.range.start = addr;
+ cont.range.len = demand_paging_size;
+
+ r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
+ if (r == -1) {
+ pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
+ }
+ } else {
+ TEST_FAIL("Invalid uffd mode %d", uffd_mode);
}
ts_diff = timespec_elapsed(start);
- PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+ PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
timespec_to_ns(ts_diff));
PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
- perf_test_args.host_page_size, addr, tid);
+ demand_paging_size, addr, tid);
return 0;
}
@@ -108,6 +125,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
bool quit_uffd_thread;
struct uffd_handler_args {
+ int uffd_mode;
int uffd;
int pipefd;
useconds_t delay;
@@ -169,7 +187,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (r == -1) {
if (errno == EAGAIN)
continue;
- pr_info("Read of uffd gor errno %d", errno);
+ pr_info("Read of uffd got errno %d\n", errno);
return NULL;
}
@@ -184,7 +202,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (delay)
usleep(delay);
addr = msg.arg.pagefault.address;
- r = handle_uffd_page_request(uffd, addr);
+ r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
if (r < 0)
return NULL;
pages++;
@@ -198,43 +216,53 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL;
}
-static int setup_demand_paging(struct kvm_vm *vm,
- pthread_t *uffd_handler_thread, int pipefd,
- useconds_t uffd_delay,
- struct uffd_handler_args *uffd_args,
- void *hva, uint64_t len)
+static void setup_demand_paging(struct kvm_vm *vm,
+ pthread_t *uffd_handler_thread, int pipefd,
+ int uffd_mode, useconds_t uffd_delay,
+ struct uffd_handler_args *uffd_args,
+ void *hva, void *alias, uint64_t len)
{
+ bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
int uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
+ uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1) {
- pr_info("uffd creation failed\n");
- return -1;
+ PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+ is_minor ? "MINOR" : "MISSING",
+ is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+
+ /* In order to get minor faults, prefault via the alias. */
+ if (is_minor) {
+ size_t p;
+
+ expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+ TEST_ASSERT(alias != NULL, "Alias required for minor faults");
+ for (p = 0; p < (len / demand_paging_size); ++p) {
+ memcpy(alias + (p * demand_paging_size),
+ guest_data_prototype, demand_paging_size);
+ }
}
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
- pr_info("ioctl uffdio_api failed\n");
- return -1;
- }
+ TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+ "ioctl UFFDIO_API failed: %" PRIu64,
+ (uint64_t)uffdio_api.api);
uffdio_register.range.start = (uint64_t)hva;
uffdio_register.range.len = len;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
- pr_info("ioctl uffdio_register failed\n");
- return -1;
- }
-
- if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
- UFFD_API_RANGE_IOCTLS) {
- pr_info("unexpected userfaultfd ioctl set\n");
- return -1;
- }
+ uffdio_register.mode = uffd_mode;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+ "ioctl UFFDIO_REGISTER failed");
+ TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+ expected_ioctls, "missing userfaultfd ioctls");
+ uffd_args->uffd_mode = uffd_mode;
uffd_args->uffd = uffd;
uffd_args->pipefd = pipefd;
uffd_args->delay = uffd_delay;
@@ -243,13 +271,12 @@ static int setup_demand_paging(struct kvm_vm *vm,
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
hva, hva + len);
-
- return 0;
}
struct test_params {
- bool use_uffd;
+ int uffd_mode;
useconds_t uffd_delay;
+ enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access;
};
@@ -267,14 +294,16 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int r;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- VM_MEM_SRC_ANONYMOUS);
+ p->src_type);
perf_test_args.wr_fract = 1;
- guest_data_prototype = malloc(perf_test_args.host_page_size);
+ demand_paging_size = get_backing_src_pagesz(p->src_type);
+
+ guest_data_prototype = malloc(demand_paging_size);
TEST_ASSERT(guest_data_prototype,
"Failed to allocate buffer for guest data pattern");
- memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
+ memset(guest_data_prototype, 0xAB, demand_paging_size);
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
@@ -282,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
p->partition_vcpu_memory_access);
- if (p->use_uffd) {
+ if (p->uffd_mode) {
uffd_handler_threads =
malloc(nr_vcpus * sizeof(*uffd_handler_threads));
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
@@ -296,6 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vm_paddr_t vcpu_gpa;
void *vcpu_hva;
+ void *vcpu_alias;
uint64_t vcpu_mem_size;
@@ -310,8 +340,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
- /* Cache the HVA pointer of the region */
+ /* Cache the host addresses of the region */
vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+ vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
/*
* Set up user fault fd to handle demand paging
@@ -321,13 +352,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!r, "Failed to set up pipefd");
- r = setup_demand_paging(vm,
- &uffd_handler_threads[vcpu_id],
- pipefds[vcpu_id * 2],
- p->uffd_delay, &uffd_args[vcpu_id],
- vcpu_hva, vcpu_mem_size);
- if (r < 0)
- exit(-r);
+ setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
+ pipefds[vcpu_id * 2], p->uffd_mode,
+ p->uffd_delay, &uffd_args[vcpu_id],
+ vcpu_hva, vcpu_alias,
+ vcpu_mem_size);
}
}
@@ -355,7 +384,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("All vCPU threads joined\n");
- if (p->use_uffd) {
+ if (p->uffd_mode) {
char c;
/* Tell the user fault fd handler threads to quit */
@@ -377,7 +406,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
free(guest_data_prototype);
free(vcpu_threads);
- if (p->use_uffd) {
+ if (p->uffd_mode) {
free(uffd_handler_threads);
free(uffd_args);
free(pipefds);
@@ -387,17 +416,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
- " [-b memory] [-v vcpus] [-o]\n", name);
+ printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
+ " [-b memory] [-t type] [-v vcpus] [-o]\n", name);
guest_modes_help();
- printf(" -u: use User Fault FD to handle vCPU page\n"
- " faults.\n");
+ printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
+ " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
+ printf(" -t: The type of backing memory to use. Default: anonymous\n");
+ backing_src_help();
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
@@ -409,19 +440,24 @@ int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
+ .src_type = VM_MEM_SRC_ANONYMOUS,
.partition_vcpu_memory_access = true,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
break;
case 'u':
- p.use_uffd = true;
+ if (!strcmp("MISSING", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+ else if (!strcmp("MINOR", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
+ TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break;
case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0);
@@ -430,6 +466,9 @@ int main(int argc, char *argv[])
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
+ case 't':
+ p.src_type = parse_backing_src_type(optarg);
+ break;
case 'v':
nr_vcpus = atoi(optarg);
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
@@ -445,6 +484,11 @@ int main(int argc, char *argv[])
}
}
+ if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
+ !backing_src_is_shared(p.src_type)) {
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+ }
+
for_each_guest_mode(run_test, &p);
return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 81edbd23d371..5fe0140e407e 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -16,7 +16,6 @@
#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
-#include <asm/barrier.h>
#include <linux/atomic.h>
#include "kvm_util.h"
@@ -681,7 +680,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
#ifdef __x86_64__
vm_create_irqchip(vm);
#endif
@@ -761,7 +760,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
KVM_MEM_LOG_DIRTY_PAGES);
/* Do mapping for the dirty track memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 5aadf84c91c0..b21c69a56daa 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -105,7 +105,7 @@ static void run_test(uint32_t run)
CPU_SET(i, &cpu_set);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
vm_create_irqchip(vm);
pr_debug("%s: [%d] start vcpus\n", __func__, run);
@@ -132,6 +132,36 @@ static void run_test(uint32_t run)
TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
}
+void wait_for_child_setup(pid_t pid)
+{
+ /*
+ * Wait for the child to post to the semaphore, but wake up periodically
+ * to check if the child exited prematurely.
+ */
+ for (;;) {
+ const struct timespec wait_period = { .tv_sec = 1 };
+ int status;
+
+ if (!sem_timedwait(sem, &wait_period))
+ return;
+
+ /* Child is still running, keep waiting. */
+ if (pid != waitpid(pid, &status, WNOHANG))
+ continue;
+
+ /*
+ * Child is no longer running, which is not expected.
+ *
+ * If it exited with a non-zero status, we explicitly forward
+ * the child's status in case it exited with KSFT_SKIP.
+ */
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ else
+ TEST_ASSERT(false, "Child exited unexpectedly");
+ }
+}
+
int main(int argc, char **argv)
{
uint32_t i;
@@ -148,7 +178,7 @@ int main(int argc, char **argv)
run_test(i); /* This function always exits */
pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
- sem_wait(sem);
+ wait_for_child_setup(pid);
r = (rand() % DELAY_US_MAX) + 1;
pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
usleep(r);
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b7fa0c8551db..27dc5c2e56b9 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,16 +8,20 @@
#define SELFTEST_KVM_PROCESSOR_H
#include "kvm_util.h"
+#include <linux/stringify.h>
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-#define CPACR_EL1 3, 0, 1, 0, 2
-#define TCR_EL1 3, 0, 2, 0, 2
-#define MAIR_EL1 3, 0, 10, 2, 0
-#define TTBR0_EL1 3, 0, 2, 0, 0
-#define SCTLR_EL1 3, 0, 1, 0, 0
+#define CPACR_EL1 3, 0, 1, 0, 2
+#define TCR_EL1 3, 0, 2, 0, 2
+#define MAIR_EL1 3, 0, 10, 2, 0
+#define TTBR0_EL1 3, 0, 2, 0, 0
+#define SCTLR_EL1 3, 0, 1, 0, 0
+#define VBAR_EL1 3, 0, 12, 0, 0
+
+#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0
/*
* Default MAIR
@@ -56,4 +60,73 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_init *init, void *guest_code);
+struct ex_regs {
+ u64 regs[31];
+ u64 sp;
+ u64 pc;
+ u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+ VECTOR_SYNC_CURRENT_SP0,
+ VECTOR_IRQ_CURRENT_SP0,
+ VECTOR_FIQ_CURRENT_SP0,
+ VECTOR_ERROR_CURRENT_SP0,
+
+ VECTOR_SYNC_CURRENT,
+ VECTOR_IRQ_CURRENT,
+ VECTOR_FIQ_CURRENT,
+ VECTOR_ERROR_CURRENT,
+
+ VECTOR_SYNC_LOWER_64,
+ VECTOR_IRQ_LOWER_64,
+ VECTOR_FIQ_LOWER_64,
+ VECTOR_ERROR_LOWER_64,
+
+ VECTOR_SYNC_LOWER_32,
+ VECTOR_IRQ_LOWER_32,
+ VECTOR_FIQ_LOWER_32,
+ VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+ (v) == VECTOR_SYNC_CURRENT || \
+ (v) == VECTOR_SYNC_LOWER_64 || \
+ (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM 64
+#define ESR_EC_SHIFT 26
+#define ESR_EC_MASK (ESR_EC_NUM - 1)
+
+#define ESR_EC_SVC64 0x15
+#define ESR_EC_HW_BP_CURRENT 0x31
+#define ESR_EC_SSTEP_CURRENT 0x33
+#define ESR_EC_WP_CURRENT 0x35
+#define ESR_EC_BRK_INS 0x3c
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+ int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+ int vector, int ec, handler_fn handler);
+
+#define write_sysreg(reg, val) \
+({ \
+ u64 __val = (u64)(val); \
+ asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \
+})
+
+#define read_sysreg(reg) \
+({ u64 val; \
+ asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
+ val; \
+})
+
+#define isb() asm volatile("isb" : : : "memory")
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index a8f022794ce3..615ab254899d 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -30,6 +30,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
@@ -43,6 +44,7 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_64K,
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
NUM_VM_MODES,
};
@@ -60,7 +62,7 @@ enum vm_guest_mode {
#elif defined(__s390x__)
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+#define VM_MODE_DEFAULT VM_MODE_P47V64_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 16)
@@ -77,6 +79,7 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+int open_kvm_dev_path_or_exit(void);
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
@@ -96,8 +99,7 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
@@ -139,13 +141,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot);
+ unsigned int npages);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
/*
* Address Guest Virtual to Guest Physical
@@ -234,7 +239,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
const char *exit_reason_str(unsigned int exit_reason);
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pgd_alloc(struct kvm_vm *vm);
/*
* VM Virtual Page Map
@@ -252,13 +257,13 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
* Within @vm, creates a virtual translation for the page starting
* at @vaddr to the page starting at @paddr.
*/
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
/*
* Create a VM with reasonable defaults
@@ -283,10 +288,11 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
uint32_t num_percpu_pages, void *guest_code,
uint32_t vcpuids[]);
-/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
+/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t extra_mem_pages, uint32_t num_percpu_pages,
- void *guest_code, uint32_t vcpuids[]);
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
/*
* Adds a vCPU with reasonable defaults (e.g. a stack)
@@ -302,7 +308,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
unsigned int vm_get_page_size(struct kvm_vm *vm);
unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+uint64_t vm_get_max_gfn(struct kvm_vm *vm);
int vm_get_fd(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
@@ -347,6 +353,7 @@ enum {
UCALL_SYNC,
UCALL_ABORT,
UCALL_DONE,
+ UCALL_UNHANDLED,
};
#define UCALL_MAX_ARGS 6
@@ -365,26 +372,31 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- #_condition, __LINE__, _args); \
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ _condstr, __LINE__, _args); \
} while (0)
#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT((_condition), 0, 0)
+ __GUEST_ASSERT(_condition, #_condition, 0, 0)
#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT((_condition), 1, (arg1))
+ __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+ __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+ __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+ __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index fade3130eb01..d79be15dd3d2 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -17,6 +17,7 @@
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/mman.h>
#include "kselftest.h"
static inline int _no_printf(const char *format, ...) { return 0; }
@@ -84,6 +85,8 @@ enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+ VM_MEM_SRC_SHMEM,
+ VM_MEM_SRC_SHARED_HUGETLB,
NUM_SRC_TYPES,
};
@@ -100,4 +103,13 @@ size_t get_backing_src_pagesz(uint32_t i);
void backing_src_help(void);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+/*
+ * Whether or not the given source type is shared memory (as opposed to
+ * anonymous).
+ */
+static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
+{
+ return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
+}
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
new file mode 100644
index 000000000000..0be4757f1f20
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/apic.h
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+ return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+ ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+ return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+ wrmsr(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index a034438b6266..c9af97abd622 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/evmcs.h
*
* Copyright (C) 2018, Red Hat, Inc.
*
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
new file mode 100644
index 000000000000..412eaee7884a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/hyperv.h
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_HYPERCALL 0x40000001
+#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_X64_MSR_RESET 0x40000003
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
+#define HV_MSR_SYNIC_AVAILABLE BIT(2)
+#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4)
+#define HV_MSR_HYPERCALL_AVAILABLE BIT(5)
+#define HV_MSR_VP_INDEX_AVAILABLE BIT(6)
+#define HV_MSR_RESET_AVAILABLE BIT(7)
+#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10)
+#define HV_ACCESS_FREQUENCY_MSRS BIT(11)
+#define HV_ACCESS_REENLIGHTENMENT BIT(13)
+#define HV_ACCESS_TSC_INVARIANT BIT(15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS BIT(0)
+#define HV_ACCESS_PARTITION_ID BIT(1)
+#define HV_ACCESS_MEMORY_POOL BIT(2)
+#define HV_ADJUST_MESSAGE_BUFFERS BIT(3)
+#define HV_POST_MESSAGES BIT(4)
+#define HV_SIGNAL_EVENTS BIT(5)
+#define HV_CREATE_PORT BIT(6)
+#define HV_CONNECT_PORT BIT(7)
+#define HV_ACCESS_STATS BIT(8)
+#define HV_DEBUGGING BIT(11)
+#define HV_CPU_MANAGEMENT BIT(12)
+#define HV_ISOLATION BIT(22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE BIT(0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
+#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
+#define HV_STATUS_ACCESS_DENIED 6
+#define HV_STATUS_OPERATION_DENIED 8
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
+#define HV_STATUS_INVALID_CONNECTION_ID 18
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0b30b4e15c38..242ae8e09a65 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -13,6 +13,8 @@
#include <asm/msr-index.h>
+#include "../kvm_util.h"
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -53,7 +55,8 @@
#define CPUID_PKU (1ul << 3)
#define CPUID_LA57 (1ul << 16)
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
+/* CPUID.0x8000_0001.EDX */
+#define CPUID_GBPAGES (1ul << 26)
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
@@ -391,9 +394,13 @@ struct ex_regs {
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr);
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte);
+
/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
@@ -410,6 +417,14 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+enum x86_page_size {
+ X86_PAGE_SIZE_4K = 0,
+ X86_PAGE_SIZE_2M,
+ X86_PAGE_SIZE_1G,
+};
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size);
+
/*
* Basic CPU control in CR0
*/
@@ -425,53 +440,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-#define APIC_DEFAULT_GPA 0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE 0x0000001b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_EXTD (1<<10)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define GET_APIC_BASE(x) (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR 0x800
-#define X2APIC_ENABLE (1UL << 10)
-#define APIC_ID 0x20
-#define APIC_LVR 0x30
-#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
-#define APIC_TASKPRI 0x80
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
-#define APIC_SPIV_APIC_ENABLED (1 << 8)
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_FIXED_MASK 0x00700
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
-
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 65eb1079a161..583ceb0d1457 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -10,6 +10,7 @@
#include <stdint.h>
#include "processor.h"
+#include "apic.h"
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
@@ -607,15 +608,13 @@ bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot);
+ uint32_t memslot);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
new file mode 100644
index 000000000000..5906bbc08483
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_binary_stats_test
+ *
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Test the fd-based interface for KVM statistics.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+static void stats_test(int stats_fd)
+{
+ ssize_t ret;
+ int i;
+ size_t size_desc;
+ size_t size_data = 0;
+ struct kvm_stats_header *header;
+ char *id;
+ struct kvm_stats_desc *stats_desc;
+ u64 *stats_data;
+ struct kvm_stats_desc *pdesc;
+
+ /* Read kvm stats header */
+ header = malloc(sizeof(*header));
+ TEST_ASSERT(header, "Allocate memory for stats header");
+
+ ret = read(stats_fd, header, sizeof(*header));
+ TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+ size_desc = sizeof(*stats_desc) + header->name_size;
+
+ /* Read kvm stats id string */
+ id = malloc(header->name_size);
+ TEST_ASSERT(id, "Allocate memory for id string");
+ ret = read(stats_fd, id, header->name_size);
+ TEST_ASSERT(ret == header->name_size, "Read id string");
+
+ /* Check id string, that should start with "kvm" */
+ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size,
+ "Invalid KVM stats type, id: %s", id);
+
+ /* Sanity check for other fields in header */
+ if (header->num_desc == 0) {
+ printf("No KVM stats defined!");
+ return;
+ }
+ /* Check overlap */
+ TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0
+ && header->desc_offset >= sizeof(*header)
+ && header->data_offset >= sizeof(*header),
+ "Invalid offset fields in header");
+ TEST_ASSERT(header->desc_offset > header->data_offset ||
+ (header->desc_offset + size_desc * header->num_desc <=
+ header->data_offset),
+ "Descriptor block is overlapped with data block");
+
+ /* Allocate memory for stats descriptors */
+ stats_desc = calloc(header->num_desc, size_desc);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+ /* Read kvm stats descriptors */
+ ret = pread(stats_fd, stats_desc,
+ size_desc * header->num_desc, header->desc_offset);
+ TEST_ASSERT(ret == size_desc * header->num_desc,
+ "Read KVM stats descriptors");
+
+ /* Sanity check for fields in descriptors */
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ /* Check type,unit,base boundaries */
+ TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK)
+ <= KVM_STATS_TYPE_MAX, "Unknown KVM stats type");
+ TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK)
+ <= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit");
+ TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK)
+ <= KVM_STATS_BASE_MAX, "Unknown KVM stats base");
+ /* Check exponent for stats unit
+ * Exponent for counter should be greater than or equal to 0
+ * Exponent for unit bytes should be greater than or equal to 0
+ * Exponent for unit seconds should be less than or equal to 0
+ * Exponent for unit clock cycles should be greater than or
+ * equal to 0
+ */
+ switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+ case KVM_STATS_UNIT_NONE:
+ case KVM_STATS_UNIT_BYTES:
+ case KVM_STATS_UNIT_CYCLES:
+ TEST_ASSERT(pdesc->exponent >= 0,
+ "Unsupported KVM stats unit");
+ break;
+ case KVM_STATS_UNIT_SECONDS:
+ TEST_ASSERT(pdesc->exponent <= 0,
+ "Unsupported KVM stats unit");
+ break;
+ }
+ /* Check name string */
+ TEST_ASSERT(strlen(pdesc->name) < header->name_size,
+ "KVM stats name(%s) too long", pdesc->name);
+ /* Check size field, which should not be zero */
+ TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0",
+ pdesc->name);
+ size_data += pdesc->size * sizeof(*stats_data);
+ }
+ /* Check overlap */
+ TEST_ASSERT(header->data_offset >= header->desc_offset
+ || header->data_offset + size_data <= header->desc_offset,
+ "Data block is overlapped with Descriptor block");
+ /* Check validity of all stats data size */
+ TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data),
+ "Data size is not correct");
+ /* Check stats offset */
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ TEST_ASSERT(pdesc->offset < size_data,
+ "Invalid offset (%u) for stats: %s",
+ pdesc->offset, pdesc->name);
+ }
+
+ /* Allocate memory for stats data */
+ stats_data = malloc(size_data);
+ TEST_ASSERT(stats_data, "Allocate memory for stats data");
+ /* Read kvm stats data as a bulk */
+ ret = pread(stats_fd, stats_data, size_data, header->data_offset);
+ TEST_ASSERT(ret == size_data, "Read KVM stats data");
+ /* Read kvm stats data one by one */
+ size_data = 0;
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ ret = pread(stats_fd, stats_data,
+ pdesc->size * sizeof(*stats_data),
+ header->data_offset + size_data);
+ TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data),
+ "Read data of KVM stats: %s", pdesc->name);
+ size_data += pdesc->size * sizeof(*stats_data);
+ }
+
+ free(stats_data);
+ free(stats_desc);
+ free(id);
+ free(header);
+}
+
+
+static void vm_stats_test(struct kvm_vm *vm)
+{
+ int stats_fd;
+
+ /* Get fd for VM stats */
+ stats_fd = vm_get_stats_fd(vm);
+ TEST_ASSERT(stats_fd >= 0, "Get VM stats fd");
+
+ stats_test(stats_fd);
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id)
+{
+ int stats_fd;
+
+ /* Get fd for VCPU stats */
+ stats_fd = vcpu_get_stats_fd(vm, vcpu_id);
+ TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd");
+
+ stats_test(stats_fd);
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+#define DEFAULT_NUM_VM 4
+#define DEFAULT_NUM_VCPU 4
+
+/*
+ * Usage: kvm_bin_form_stats [#vm] [#vcpu]
+ * The first parameter #vm set the number of VMs being created.
+ * The second parameter #vcpu set the number of VCPUs being created.
+ * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be
+ * created for testing.
+ */
+
+int main(int argc, char *argv[])
+{
+ int i, j;
+ struct kvm_vm **vms;
+ int max_vm = DEFAULT_NUM_VM;
+ int max_vcpu = DEFAULT_NUM_VCPU;
+
+ /* Get the number of VMs and VCPUs that would be created for testing. */
+ if (argc > 1) {
+ max_vm = strtol(argv[1], NULL, 0);
+ if (max_vm <= 0)
+ max_vm = DEFAULT_NUM_VM;
+ }
+ if (argc > 2) {
+ max_vcpu = strtol(argv[2], NULL, 0);
+ if (max_vcpu <= 0)
+ max_vcpu = DEFAULT_NUM_VCPU;
+ }
+
+ /* Check the extension for binary stats */
+ if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) {
+ print_skip("Binary form statistics interface is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VMs and VCPUs */
+ vms = malloc(sizeof(vms[0]) * max_vm);
+ TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+ for (i = 0; i < max_vm; ++i) {
+ vms[i] = vm_create(VM_MODE_DEFAULT,
+ DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ for (j = 0; j < max_vcpu; ++j)
+ vm_vcpu_add(vms[i], j);
+ }
+
+ /* Check stats read for every VM and VCPU */
+ for (i = 0; i < max_vm; ++i) {
+ vm_stats_test(vms[i]);
+ for (j = 0; j < max_vcpu; ++j)
+ vcpu_stats_test(vms[i], j);
+ }
+
+ for (i = 0; i < max_vm; ++i)
+ kvm_vm_free(vms[i]);
+ free(vms);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 1c4753fff19e..0d04a7db7f24 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -268,7 +268,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
/* Create a VM with enough guest pages */
guest_num_pages = test_mem_size / guest_page_size;
- vm = vm_create_with_vcpus(mode, nr_vcpus,
+ vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
guest_num_pages, 0, guest_code, NULL);
/* Align down GPA of the testing memslot */
@@ -303,7 +303,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
/* Do mapping(GVA->GPA) for the testing memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index 000000000000..0e443eadfac6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index cee92d477dc0..9f49f6caafe5 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -6,14 +6,16 @@
*/
#include <linux/compiler.h>
+#include <assert.h>
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+static vm_vaddr_t exception_handlers;
+
static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
{
return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -72,19 +74,19 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
return 1 << (vm->page_shift - 3);
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
if (!vm->pgd_created) {
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
vm->pgd = paddr;
vm->pgd_created = true;
}
}
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot, uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
{
uint8_t attr_idx = flags & 7;
uint64_t *ptep;
@@ -104,25 +106,19 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
paddr, vm->max_gfn, vm->page_size);
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -135,12 +131,11 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
- _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
}
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
@@ -302,7 +297,7 @@ void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
vm_vcpu_add(vm, vcpuid);
aarch64_vcpu_setup(vm, vcpuid, init);
@@ -334,6 +329,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_end(ap);
}
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
+ struct ucall uc;
+
+ if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ extern char vectors;
+
+ set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, 0, 0);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec < ESR_EC_NUM);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 2f37b90ee1a9..e0b0164e9af8 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -14,7 +14,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
return false;
- virt_pg_map(vm, gpa, gpa, 0);
+ virt_pg_map(vm, gpa, gpa);
ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
sync_global_to_guest(vm, ucall_exit_mmio_addr);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a6..eac44f5d0db0 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
* by the image and it needs to have sufficient available physical pages, to
* back the virtual pages used to load the image.
*/
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
{
off_t offset, offset_rv;
Elf64_Ehdr hdr;
@@ -164,8 +163,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
- data_memslot, pgd_memslot);
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index fc83f6c5902d..5b56b57b3c20 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -32,6 +32,34 @@ static void *align(void *x, size_t size)
}
/*
+ * Open KVM_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Input Args:
+ * flags - The flags to pass when opening KVM_DEV_PATH.
+ *
+ * Return:
+ * The opened file descriptor of /dev/kvm.
+ */
+static int _open_kvm_dev_path_or_exit(int flags)
+{
+ int fd;
+
+ fd = open(KVM_DEV_PATH, flags);
+ if (fd < 0) {
+ print_skip("%s not available, is KVM loaded? (errno: %d)",
+ KVM_DEV_PATH, errno);
+ exit(KSFT_SKIP);
+ }
+
+ return fd;
+}
+
+int open_kvm_dev_path_or_exit(void)
+{
+ return _open_kvm_dev_path_or_exit(O_RDONLY);
+}
+
+/*
* Capability
*
* Input Args:
@@ -52,12 +80,9 @@ int kvm_check_cap(long cap)
int ret;
int kvm_fd;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
+ kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);
close(kvm_fd);
@@ -128,9 +153,7 @@ void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
static void vm_open(struct kvm_vm *vm, int perm)
{
- vm->kvm_fd = open(KVM_DEV_PATH, perm);
- if (vm->kvm_fd < 0)
- exit(KSFT_SKIP);
+ vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
print_skip("immediate_exit not available");
@@ -152,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -169,6 +193,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
{ 40, 48, 0x1000, 12 },
{ 40, 48, 0x10000, 16 },
{ 0, 0, 0x1000, 12 },
+ { 47, 64, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -203,7 +228,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus);
- INIT_LIST_HEAD(&vm->userspace_mem_regions);
+ vm->regions.gpa_tree = RB_ROOT;
+ vm->regions.hva_tree = RB_ROOT;
+ hash_init(vm->regions.slot_hash);
vm->mode = mode;
vm->type = 0;
@@ -252,6 +279,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
#endif
break;
+ case VM_MODE_P47V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
}
@@ -283,21 +313,50 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
return vm;
}
+/*
+ * VM Create with customized parameters
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ * nr_vcpus - VCPU count
+ * slot0_mem_pages - Slot0 physical memory size
+ * extra_mem_pages - Non-slot0 physical memory total size
+ * num_percpu_pages - Per-cpu physical memory pages
+ * guest_code - Guest entry point
+ * vcpuids - VCPU IDs
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
+ * with customized slot0 memory size, at least 512 pages currently.
+ * extra_mem_pages is only used to calculate the maximum page table size,
+ * no real memory allocation for non-slot0 memory in this function.
+ */
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t extra_mem_pages, uint32_t num_percpu_pages,
- void *guest_code, uint32_t vcpuids[])
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[])
{
+ uint64_t vcpu_pages, extra_pg_pages, pages;
+ struct kvm_vm *vm;
+ int i;
+
+ /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
+ if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
+ slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
+
/* The maximum page table size for a memory region will be when the
* smallest pages are used. Considering each page contains x page
* table descriptors, the total extra size for page tables (for extra
* N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
* than N/x*2.
*/
- uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
- uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
- uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
- struct kvm_vm *vm;
- int i;
+ vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
+ extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
+ pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
"nr_vcpus = %d too large for host, max-vcpus = %d",
@@ -306,7 +365,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
pages = vm_adjust_num_guest_pages(mode, pages);
vm = vm_create(mode, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
#ifdef __x86_64__
vm_create_irqchip(vm);
@@ -316,10 +375,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
-#endif
}
return vm;
@@ -329,8 +384,8 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
uint32_t num_percpu_pages, void *guest_code,
uint32_t vcpuids[])
{
- return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
- num_percpu_pages, guest_code, vcpuids);
+ return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
+ extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
}
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
@@ -355,13 +410,14 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
*/
void kvm_vm_restart(struct kvm_vm *vmp, int perm)
{
+ int ctr;
struct userspace_mem_region *region;
vm_open(vmp, perm);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
+ hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
@@ -424,14 +480,21 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ for (node = vm->regions.gpa_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, gpa_node);
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
+
+ if (start < existing_start)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
}
return NULL;
@@ -546,11 +609,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
- struct userspace_mem_region *region)
+ struct userspace_mem_region *region,
+ bool unlink)
{
int ret;
- list_del(&region->list);
+ if (unlink) {
+ rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+ rb_erase(&region->hva_node, &vm->regions.hva_tree);
+ hash_del(&region->slot_node);
+ }
region->region.memory_size = 0;
ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
@@ -569,14 +637,16 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- struct userspace_mem_region *region, *tmp;
+ int ctr;
+ struct hlist_node *node;
+ struct userspace_mem_region *region;
if (vmp == NULL)
return;
/* Free userspace_mem_regions. */
- list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
- __vm_mem_region_delete(vmp, region);
+ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
+ __vm_mem_region_delete(vmp, region, false);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -658,13 +728,64 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
return 0;
}
+static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), gpa_node);
+ parent = *cur;
+ if (region->region.guest_phys_addr <
+ cregion->region.guest_phys_addr)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->region.guest_phys_addr !=
+ cregion->region.guest_phys_addr,
+ "Duplicate GPA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->gpa_node, parent, cur);
+ rb_insert_color(&region->gpa_node, gpa_tree);
+}
+
+static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), hva_node);
+ parent = *cur;
+ if (region->host_mem < cregion->host_mem)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->host_mem !=
+ cregion->host_mem,
+ "Duplicate HVA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->hva_node, parent, cur);
+ rb_insert_color(&region->hva_node, hva_tree);
+}
+
/*
* VM Userspace Memory Region Add
*
* Input Args:
* vm - Virtual Machine
- * backing_src - Storage source for this region.
- * NULL to use anonymous memory.
+ * src_type - Storage source for this region.
+ * NULL to use anonymous memory.
* guest_paddr - Starting guest physical address
* slot - KVM region slot
* npages - Number of physical pages
@@ -722,7 +843,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ slot) {
if (region->region.slot != slot)
continue;
@@ -755,11 +877,30 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
if (alignment > 1)
region->mmap_size += alignment;
+ region->fd = -1;
+ if (backing_src_is_shared(src_type)) {
+ int memfd_flags = MFD_CLOEXEC;
+
+ if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
+ memfd_flags |= MFD_HUGETLB;
+
+ region->fd = memfd_create("kvm_selftest", memfd_flags);
+ TEST_ASSERT(region->fd != -1,
+ "memfd_create failed, errno: %i", errno);
+
+ ret = ftruncate(region->fd, region->mmap_size);
+ TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
+
+ ret = fallocate(region->fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+ region->mmap_size);
+ TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
+ }
+
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS
- | vm_mem_backing_src_alias(src_type)->flag,
- -1, 0);
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
"test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno);
@@ -793,8 +934,23 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
ret, errno, slot, flags,
guest_paddr, (uint64_t) region->region.memory_size);
- /* Add to linked-list of memory regions. */
- list_add(&region->list, &vm->userspace_mem_regions);
+ /* Add to quick lookup data structures */
+ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
+ vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
+ hash_add(vm->regions.slot_hash, &region->slot_node, slot);
+
+ /* If shared memory, create an alias. */
+ if (region->fd >= 0) {
+ region->mmap_alias = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
+ TEST_ASSERT(region->mmap_alias != MAP_FAILED,
+ "mmap of alias failed, errno: %i", errno);
+
+ /* Align host alias address */
+ region->host_alias = align(region->mmap_alias, alignment);
+ }
}
/*
@@ -817,10 +973,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ memslot)
if (region->region.slot == memslot)
return region;
- }
fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot);
@@ -905,7 +1061,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{
- __vm_mem_region_delete(vm, memslot2region(vm, slot));
+ __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
}
/*
@@ -925,9 +1081,7 @@ static int vcpu_mmap_sz(void)
{
int dev_fd, ret;
- dev_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (dev_fd < 0)
- exit(KSFT_SKIP);
+ dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
@@ -1093,12 +1247,13 @@ va_found:
* a unique set of pages, with the minimum real allocation being at least
* a page.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
- virt_pgd_alloc(vm, pgd_memslot);
+ virt_pgd_alloc(vm);
+ vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
+ KVM_UTIL_MIN_PFN * vm->page_size, 0);
/*
* Find an unused range of virtual page addresses of at least
@@ -1108,13 +1263,9 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
/* Map the virtual pages. */
for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
- pages--, vaddr += vm->page_size) {
- vm_paddr_t paddr;
-
- paddr = vm_phy_page_alloc(vm,
- KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+ pages--, vaddr += vm->page_size, paddr += vm->page_size) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
sparsebit_set(vm->vpages_mapped,
vaddr >> vm->page_shift);
@@ -1124,6 +1275,44 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
}
/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+ return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+}
+
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+ return vm_vaddr_alloc_pages(vm, 1);
+}
+
+/*
* Map a range of VM virtual address to the VM's physical address
*
* Input Args:
@@ -1141,7 +1330,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot)
+ unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
@@ -1150,7 +1339,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
vaddr += page_size;
paddr += page_size;
}
@@ -1177,16 +1366,14 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((gpa >= region->region.guest_phys_addr)
- && (gpa <= (region->region.guest_phys_addr
- + region->region.memory_size - 1)))
- return (void *) ((uintptr_t) region->host_mem
- + (gpa - region->region.guest_phys_addr));
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region) {
+ TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+ return NULL;
}
- TEST_FAIL("No vm physical memory at 0x%lx", gpa);
- return NULL;
+ return (void *)((uintptr_t)region->host_mem
+ + (gpa - region->region.guest_phys_addr));
}
/*
@@ -1208,15 +1395,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
*/
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
+
+ for (node = vm->regions.hva_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, hva_node);
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((hva >= region->host_mem)
- && (hva <= (region->host_mem
- + region->region.memory_size - 1)))
- return (vm_paddr_t) ((uintptr_t)
- region->region.guest_phys_addr
- + (hva - (uintptr_t) region->host_mem));
+ if (hva >= region->host_mem) {
+ if (hva <= (region->host_mem
+ + region->region.memory_size - 1))
+ return (vm_paddr_t)((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t)region->host_mem));
+
+ node = node->rb_right;
+ } else
+ node = node->rb_left;
}
TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
@@ -1224,6 +1418,42 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
}
/*
+ * Address VM physical to Host Virtual *alias*.
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent address within the host virtual *alias* area, or NULL
+ * (without failing the test) if the guest memory is not shared (so
+ * no alias exists).
+ *
+ * When vm_create() and related functions are called with a shared memory
+ * src_type, we also create a writable, shared alias mapping of the
+ * underlying guest memory. This allows the host to manipulate guest memory
+ * without mapping that memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, we can do so without triggering userfaults.
+ */
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ struct userspace_mem_region *region;
+ uintptr_t offset;
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region)
+ return NULL;
+
+ if (!region->host_alias)
+ return NULL;
+
+ offset = gpa - region->region.guest_phys_addr;
+ return (void *) ((uintptr_t) region->host_alias + offset);
+}
+
+/*
* VM Create IRQ Chip
*
* Input Args:
@@ -1822,6 +2052,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
*/
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ int ctr;
struct userspace_mem_region *region;
struct vcpu *vcpu;
@@ -1829,7 +2060,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1978,6 +2209,14 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
+/* Arbitrary minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+ return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+}
+
/*
* Address Guest Virtual to Host Virtual
*
@@ -2015,10 +2254,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
if (vm == NULL) {
/* Ensure that the KVM vendor-specific module is loaded. */
- f = fopen(KVM_DEV_PATH, "r");
- TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
- errno);
- fclose(f);
+ close(open_kvm_dev_path_or_exit());
}
f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
@@ -2041,7 +2277,7 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
return vm->page_shift;
}
-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+uint64_t vm_get_max_gfn(struct kvm_vm *vm)
{
return vm->max_gfn;
}
@@ -2090,3 +2326,15 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
return vm_adjust_num_guest_pages(mode, n);
}
+
+int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
+}
+
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 91ce1b5d480b..a03febc24ba6 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -8,6 +8,9 @@
#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
#define SELFTEST_KVM_UTIL_INTERNAL_H
+#include "linux/hashtable.h"
+#include "linux/rbtree.h"
+
#include "sparsebit.h"
struct userspace_mem_region {
@@ -16,9 +19,13 @@ struct userspace_mem_region {
int fd;
off_t offset;
void *host_mem;
+ void *host_alias;
void *mmap_start;
+ void *mmap_alias;
size_t mmap_size;
- struct list_head list;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
};
struct vcpu {
@@ -31,6 +38,12 @@ struct vcpu {
uint32_t dirty_gfns_count;
};
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
struct kvm_vm {
int mode;
unsigned long type;
@@ -43,7 +56,7 @@ struct kvm_vm {
unsigned int va_bits;
uint64_t max_gfn;
struct list_head vcpus;
- struct list_head userspace_mem_regions;
+ struct userspace_mem_regions regions;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 81490b9b4e32..b488f4aefea8 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2020, Google LLC.
*/
+#include <inttypes.h>
#include "kvm_util.h"
#include "perf_test_util.h"
@@ -68,7 +69,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
"Guest memory size is not guest page size aligned.");
- vm = vm_create_with_vcpus(mode, vcpus,
+ vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
0, guest_code, NULL);
@@ -80,7 +81,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
*/
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
"Requested more guest memory than address space allows.\n"
- " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64
+ " vcpus: %d wss: %" PRIx64 "]\n",
guest_num_pages, vm_get_max_gfn(vm), vcpus,
vcpu_memory_bytes);
@@ -99,7 +101,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
guest_num_pages, 0);
/* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
ucall_init(vm, NULL);
diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c
new file mode 100644
index 000000000000..a703f0194ea3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/rbtree.c
@@ -0,0 +1 @@
+#include "../../../../lib/rbtree.c"
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 0152f356c099..f87c7137598e 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -9,11 +9,9 @@
#include "kvm_util.h"
#include "../kvm_util_internal.h"
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
@@ -24,7 +22,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
@@ -36,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
@@ -49,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
- uint32_t memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -77,7 +74,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry[idx] = virt_alloc_region(vm, ri);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
@@ -170,7 +167,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
vm->page_size);
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_GUEST_STACK_VADDR_MIN);
vm_vcpu_add(vm, vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 63d2bc7d757b..af1031fed97f 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -166,72 +166,89 @@ size_t get_def_hugetlb_pagesz(void)
return 0;
}
+#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
+
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
{
static const struct vm_mem_backing_src_alias aliases[] = {
[VM_MEM_SRC_ANONYMOUS] = {
.name = "anonymous",
- .flag = 0,
+ .flag = ANON_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_THP] = {
.name = "anonymous_thp",
- .flag = 0,
+ .flag = ANON_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
.name = "anonymous_hugetlb",
- .flag = MAP_HUGETLB,
+ .flag = ANON_HUGE_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
.name = "anonymous_hugetlb_16kb",
- .flag = MAP_HUGETLB | MAP_HUGE_16KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
.name = "anonymous_hugetlb_64kb",
- .flag = MAP_HUGETLB | MAP_HUGE_64KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
.name = "anonymous_hugetlb_512kb",
- .flag = MAP_HUGETLB | MAP_HUGE_512KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
.name = "anonymous_hugetlb_1mb",
- .flag = MAP_HUGETLB | MAP_HUGE_1MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
.name = "anonymous_hugetlb_2mb",
- .flag = MAP_HUGETLB | MAP_HUGE_2MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
.name = "anonymous_hugetlb_8mb",
- .flag = MAP_HUGETLB | MAP_HUGE_8MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
.name = "anonymous_hugetlb_16mb",
- .flag = MAP_HUGETLB | MAP_HUGE_16MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
.name = "anonymous_hugetlb_32mb",
- .flag = MAP_HUGETLB | MAP_HUGE_32MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
.name = "anonymous_hugetlb_256mb",
- .flag = MAP_HUGETLB | MAP_HUGE_256MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
.name = "anonymous_hugetlb_512mb",
- .flag = MAP_HUGETLB | MAP_HUGE_512MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
.name = "anonymous_hugetlb_1gb",
- .flag = MAP_HUGETLB | MAP_HUGE_1GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
.name = "anonymous_hugetlb_2gb",
- .flag = MAP_HUGETLB | MAP_HUGE_2GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
.name = "anonymous_hugetlb_16gb",
- .flag = MAP_HUGETLB | MAP_HUGE_16GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
+ },
+ [VM_MEM_SRC_SHMEM] = {
+ .name = "shmem",
+ .flag = MAP_SHARED,
+ },
+ [VM_MEM_SRC_SHARED_HUGETLB] = {
+ .name = "shared_hugetlb",
+ /*
+ * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
+ * we're using "file backed" memory, we need to specify
+ * this when the FD is created, not when the area is
+ * mapped.
+ */
+ .flag = MAP_SHARED,
},
};
_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
@@ -250,10 +267,12 @@ size_t get_backing_src_pagesz(uint32_t i)
switch (i) {
case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_SHMEM:
return getpagesize();
case VM_MEM_SRC_ANONYMOUS_THP:
return get_trans_hugepagesz();
case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ case VM_MEM_SRC_SHARED_HUGETLB:
return get_def_hugetlb_pagesz();
default:
return MAP_HUGE_PAGE_SIZE(flag);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
new file mode 100644
index 000000000000..7168e25c194e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a8906e60a108..28cb881f440d 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -17,13 +17,10 @@
#define DEFAULT_CODE_SELECTOR 0x8
#define DEFAULT_DATA_SELECTOR 0x10
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
vm_vaddr_t exception_handlers;
/* Virtual translation table structure declarations */
-struct pageMapL4Entry {
+struct pageUpperEntry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
@@ -33,37 +30,7 @@ struct pageMapL4Entry {
uint64_t ignored_06:1;
uint64_t page_size:1;
uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
+ uint64_t pfn:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
@@ -79,7 +46,7 @@ struct pageTableEntry {
uint64_t reserved_07:1;
uint64_t global:1;
uint64_t ignored_11_09:3;
- uint64_t address:40;
+ uint64_t pfn:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
@@ -207,96 +174,211 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
}
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
+ vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
}
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+ int level)
+{
+ uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
+ int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+
+ return &page_table[index];
+}
+
+static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t pt_pfn,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int level,
+ enum x86_page_size page_size)
+{
+ struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+ if (!pte->present) {
+ pte->writable = true;
+ pte->present = true;
+ pte->page_size = (level == page_size);
+ if (pte->page_size)
+ pte->pfn = paddr >> vm->page_shift;
+ else
+ pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(level != page_size,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+ page_size, vaddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+ level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size)
+{
+ const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
+ struct pageTableEntry *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
+ vaddr, paddr, 3, page_size);
+ if (pml4e->page_size)
+ return;
+
+ pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
+ if (pdpe->page_size)
+ return;
+
+ pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
+ if (pde->page_size)
+ return;
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+ TEST_ASSERT(!pte->present,
+ "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+ pte->pfn = paddr >> vm->page_shift;
+ pte->writable = true;
+ pte->present = 1;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+}
+
+static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+ uint64_t vaddr)
{
uint16_t index[4];
- struct pageMapL4Entry *pml4e;
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
+ struct pageTableEntry *pte;
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_sregs sregs;
+ int max_phy_addr;
+ /* Set the bottom 52 bits. */
+ uint64_t rsvd_mask = 0x000fffffffffffff;
+
+ entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+ max_phy_addr = entry->eax & 0x000000ff;
+ /* Clear the bottom bits of the reserved mask. */
+ rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+
+ /*
+ * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
+ * with 4-Level Paging and 5-Level Paging".
+ * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
+ * the XD flag (bit 63) is reserved.
+ */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ if ((sregs.efer & EFER_NX) == 0) {
+ rsvd_mask |= (1ull << 63);
+ }
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- vaddr, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
+ /*
+ * Based on the mode check above there are 48 bits in the vaddr, so
+ * shift 16 to sign extend the last bit (bit-47),
+ */
+ TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+ "Canonical check failed. The virtual address is invalid.");
index[0] = (vaddr >> 12) & 0x1ffu;
index[1] = (vaddr >> 21) & 0x1ffu;
index[2] = (vaddr >> 30) & 0x1ffu;
index[3] = (vaddr >> 39) & 0x1ffu;
- /* Allocate page directory pointer table if not present. */
pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].present = true;
- }
+ TEST_ASSERT(pml4e[index[3]].present,
+ "Expected pml4e to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
+ (rsvd_mask | (1ull << 7))) == 0,
+ "Unexpected reserved bits set.");
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+ TEST_ASSERT(pdpe[index[2]].present,
+ "Expected pdpe to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT(pdpe[index[2]].page_size == 0,
+ "Expected pdpe to map a pde not a 1-GByte page.");
+ TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+ TEST_ASSERT(pde[index[1]].present,
+ "Expected pde to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT(pde[index[1]].page_size == 0,
+ "Expected pde to map a pte not a 2-MByte page.");
+ TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+ pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+ TEST_ASSERT(pte[index[0]].present,
+ "Expected pte to be present for gva: 0x%08lx", vaddr);
+
+ return &pte[index[0]];
+}
- /* Allocate page directory table if not present. */
- struct pageDirectoryPointerEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].present = true;
- }
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+{
+ struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
- /* Allocate page table if not present. */
- struct pageDirectoryEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].present = true;
- }
+ return *(uint64_t *)pte;
+}
- /* Fill in page table entry. */
- struct pageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].present = 1;
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte)
+{
+ struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
+ vaddr);
+
+ *(uint64_t *)new_pte = pte;
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- struct pageMapL4Entry *pml4e, *pml4e_start;
- struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
- struct pageDirectoryEntry *pde, *pde_start;
+ struct pageUpperEntry *pml4e, *pml4e_start;
+ struct pageUpperEntry *pdpe, *pdpe_start;
+ struct pageUpperEntry *pde, *pde_start;
struct pageTableEntry *pte, *pte_start;
if (!vm->pgd_created)
@@ -307,8 +389,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
- vm->pgd);
+ pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
if (!pml4e->present)
@@ -317,11 +398,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
pml4e->writable, pml4e->execute_disable);
- pdpe_start = addr_gpa2hva(vm, pml4e->address
- * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
if (!pdpe->present)
@@ -331,11 +411,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->address, pdpe->writable,
+ (uint64_t) pdpe->pfn, pdpe->writable,
pdpe->execute_disable);
- pde_start = addr_gpa2hva(vm,
- pdpe->address * vm->page_size);
+ pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
if (!pde->present)
@@ -344,11 +423,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
"0x%-12lx 0x%-10lx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->address, pde->writable,
+ (uint64_t) pde->pfn, pde->writable,
pde->execute_disable);
- pte_start = addr_gpa2hva(vm,
- pde->address * vm->page_size);
+ pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
if (!pte->present)
@@ -359,7 +437,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->address,
+ (uint64_t) pte->pfn,
pte->writable,
pte->execute_disable,
pte->dirty,
@@ -480,9 +558,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
- struct pageMapL4Entry *pml4e;
- struct pageDirectoryPointerEntry *pdpe;
- struct pageDirectoryEntry *pde;
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
struct pageTableEntry *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -499,43 +575,39 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
if (!pml4e[index[3]].present)
goto unmapped_gva;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
if (!pdpe[index[2]].present)
goto unmapped_gva;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
if (!pde[index[1]].present)
goto unmapped_gva;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
if (!pte[index[0]].present)
goto unmapped_gva;
- return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+ return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
exit(EXIT_FAILURE);
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
- int pgd_memslot)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
{
if (!vm->gdt)
- vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->gdt = vm_vaddr_alloc_page(vm);
dt->base = vm->gdt;
dt->limit = getpagesize();
}
static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector, int gdt_memslot,
- int pgd_memslot)
+ int selector)
{
if (!vm->tss)
- vm->tss = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->tss = vm_vaddr_alloc_page(vm);
memset(segp, 0, sizeof(*segp));
segp->base = vm->tss;
@@ -546,7 +618,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
{
struct kvm_sregs sregs;
@@ -555,7 +627,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.idt.limit = 0;
- kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+ kvm_setup_gdt(vm, &sregs.gdt);
switch (vm->mode) {
case VM_MODE_PXXV48_4K:
@@ -567,7 +639,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
break;
default:
@@ -584,11 +656,11 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_GUEST_STACK_VADDR_MIN);
/* Create VCPU */
vm_vcpu_add(vm, vcpuid);
- vcpu_setup(vm, vcpuid, 0, 0);
+ vcpu_setup(vm, vcpuid);
/* Setup guest general purpose registers */
vcpu_regs_get(vm, vcpuid, &regs);
@@ -600,6 +672,9 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
/* Setup the MP state */
mp_state.mp_state = 0;
vcpu_set_mp_state(vm, vcpuid, &mp_state);
+
+ /* Setup supported CPUIDs */
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
/*
@@ -657,9 +732,7 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
return cpuid;
cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -691,9 +764,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
@@ -986,9 +1057,7 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
struct kvm_msr_list *list;
int nmsrs, r, kvm_fd;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
nmsrs = kvm_get_num_msrs_fd(kvm_fd);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
@@ -1207,7 +1276,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
void kvm_exit_unexpected_vector(uint32_t value)
{
- outl(UNEXPECTED_VECTOR_PORT, value);
+ ucall(UCALL_UNHANDLED, 1, value);
}
void route_exception(struct ex_regs *regs)
@@ -1228,8 +1297,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
extern void *idt_handlers;
int i;
- vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
- vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+ vm->idt = vm_vaddr_alloc_page(vm);
+ vm->handlers = vm_vaddr_alloc_page(vm);
/* Handlers have the same address in both address spaces.*/
for (i = 0; i < NUM_INTERRUPTS; i++)
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
@@ -1250,8 +1319,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
-void vm_handle_exception(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
{
vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
@@ -1260,16 +1329,13 @@ void vm_handle_exception(struct kvm_vm *vm, int vector,
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
- if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
- && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
- && vcpu_state(vm, vcpuid)->io.size == 4) {
- /* Grab pointer to io data */
- uint32_t *data = (void *)vcpu_state(vm, vcpuid)
- + vcpu_state(vm, vcpuid)->io.data_offset;
-
- TEST_ASSERT(false,
- "Unexpected vectored event in guest (vector:0x%x)",
- *data);
+ struct ucall uc;
+
+ if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) {
+ uint64_t vector = uc.args[0];
+
+ TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
+ vector);
}
}
@@ -1312,9 +1378,7 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
return cpuid;
cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 827fe6028dd4..2ac98d70d02b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -30,17 +30,14 @@ u64 rflags;
struct svm_test_data *
vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 2448b30e8efa..d089d8b850b5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -77,50 +77,48 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
struct vmx_pages *
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
/* Setup of a region of guest memory for the VP Assist page. */
- vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm);
vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
/* Setup of a region of guest memory for the enlightened VMCS. */
- vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->enlightened_vmcs_hva =
addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
vmx->enlightened_vmcs_gpa =
@@ -395,7 +393,7 @@ void nested_vmx_check_supported(void)
}
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+ uint64_t nested_paddr, uint64_t paddr)
{
uint16_t index[4];
struct eptPageTableEntry *pml4e;
@@ -428,9 +426,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
/* Allocate page directory pointer table if not present. */
pml4e = vmx->eptp_hva;
if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pml4e[index[3]].writable = true;
pml4e[index[3]].readable = true;
pml4e[index[3]].executable = true;
@@ -440,9 +436,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pdpe;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pdpe[index[2]].writable = true;
pdpe[index[2]].readable = true;
pdpe[index[2]].executable = true;
@@ -452,9 +446,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pde;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pde[index[1]].writable = true;
pde[index[1]].readable = true;
pde[index[1]].executable = true;
@@ -494,8 +486,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* page range starting at nested_paddr to the page range starting at paddr.
*/
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot)
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
{
size_t page_size = vm->page_size;
size_t npages = size / page_size;
@@ -504,7 +495,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ nested_pg_map(vmx, vm, nested_paddr, paddr);
nested_paddr += page_size;
paddr += page_size;
}
@@ -514,7 +505,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot)
+ uint32_t memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
@@ -530,24 +521,21 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
- 1 << vm->page_shift,
- eptp_memslot);
+ 1 << vm->page_shift);
}
}
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
- vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
- vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 6096bf0a5b34..98351ba0933c 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -71,14 +71,22 @@ struct memslot_antagonist_args {
};
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
- uint64_t nr_modifications, uint64_t gpa)
+ uint64_t nr_modifications)
{
+ const uint64_t pages = 1;
+ uint64_t gpa;
int i;
+ /*
+ * Add the dummy memslot just below the perf_test_util memslot, which is
+ * at the top of the guest physical address space.
+ */
+ gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
+
for (i = 0; i < nr_modifications; i++) {
usleep(delay);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
- DUMMY_MEMSLOT_INDEX, 1, 0);
+ DUMMY_MEMSLOT_INDEX, pages, 0);
vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
}
@@ -120,11 +128,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("Started all vCPUs\n");
add_remove_memslot(vm, p->memslot_modification_delay,
- p->nr_memslot_modifications,
- guest_test_phys_mem +
- (guest_percpu_mem_size * nr_vcpus) +
- perf_test_args.host_page_size +
- perf_test_args.guest_page_size);
+ p->nr_memslot_modifications);
run_vcpus = false;
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
new file mode 100644
index 000000000000..d6e381e01db7
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A memslot-related performance benchmark.
+ *
+ * Copyright (C) 2021 Oracle and/or its affiliates.
+ *
+ * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
+ */
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MEM_SIZE ((512U << 20) + 4096)
+#define MEM_SIZE_PAGES (MEM_SIZE / 4096)
+#define MEM_GPA 0x10000000UL
+#define MEM_AUX_GPA MEM_GPA
+#define MEM_SYNC_GPA MEM_AUX_GPA
+#define MEM_TEST_GPA (MEM_AUX_GPA + 4096)
+#define MEM_TEST_SIZE (MEM_SIZE - 4096)
+static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
+static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
+
+/*
+ * 32 MiB is max size that gets well over 100 iterations on 509 slots.
+ * Considering that each slot needs to have at least one page up to
+ * 8194 slots in use can then be tested (although with slightly
+ * limited resolution).
+ */
+#define MEM_SIZE_MAP ((32U << 20) + 4096)
+#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096)
+#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096)
+#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
+static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
+
+/*
+ * 128 MiB is min size that fills 32k slots with at least one page in each
+ * while at the same time gets 100+ iterations in such test
+ */
+#define MEM_TEST_UNMAP_SIZE (128U << 20)
+#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096)
+/* 2 MiB chunk size like a typical huge page */
+#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12))
+static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
+ "invalid unmap test region size");
+static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
+ "invalid unmap test region size");
+static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
+ (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
+ "invalid unmap test region size");
+
+/*
+ * For the move active test the middle of the test area is placed on
+ * a memslot boundary: half lies in the memslot being moved, half in
+ * other memslot(s).
+ *
+ * When running this test with 32k memslots (32764, really) each memslot
+ * contains 4 pages.
+ * The last one additionally contains the remaining 21 pages of memory,
+ * for the total size of 25 pages.
+ * Hence, the maximum size here is 50 pages.
+ */
+#define MEM_TEST_MOVE_SIZE_PAGES (50)
+#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096)
+#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
+static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
+ "invalid move test region size");
+
+#define MEM_TEST_VAL_1 0x1122334455667788
+#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
+
+struct vm_data {
+ struct kvm_vm *vm;
+ pthread_t vcpu_thread;
+ uint32_t nslots;
+ uint64_t npages;
+ uint64_t pages_per_slot;
+ void **hva_slots;
+ bool mmio_ok;
+ uint64_t mmio_gpa_min;
+ uint64_t mmio_gpa_max;
+};
+
+struct sync_area {
+ atomic_bool start_flag;
+ atomic_bool exit_flag;
+ atomic_bool sync_flag;
+ void *move_area_ptr;
+};
+
+/*
+ * Technically, we need also for the atomic bool to be address-free, which
+ * is recommended, but not strictly required, by C11 for lockless
+ * implementations.
+ * However, in practice both GCC and Clang fulfill this requirement on
+ * all KVM-supported platforms.
+ */
+static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
+
+static sem_t vcpu_ready;
+
+static bool map_unmap_verify;
+
+static bool verbose;
+#define pr_info_v(...) \
+ do { \
+ if (verbose) \
+ pr_info(__VA_ARGS__); \
+ } while (0)
+
+static void *vcpu_worker(void *data)
+{
+ struct vm_data *vm = data;
+ struct kvm_run *run;
+ struct ucall uc;
+ uint64_t cmd;
+
+ run = vcpu_state(vm->vm, VCPU_ID);
+ while (1) {
+ vcpu_run(vm->vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ cmd = get_ucall(vm->vm, VCPU_ID, &uc);
+ if (cmd != UCALL_SYNC)
+ break;
+
+ sem_post(&vcpu_ready);
+ continue;
+ }
+
+ if (run->exit_reason != KVM_EXIT_MMIO)
+ break;
+
+ TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
+ TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+ TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
+ run->mmio.phys_addr <= vm->mmio_gpa_max,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+ }
+
+ if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+ TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2]);
+
+ return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+ struct timespec ts;
+
+ TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+ "clock_gettime() failed: %d\n", errno);
+
+ ts.tv_sec += 2;
+ TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+ "sem_timedwait() failed: %d\n", errno);
+}
+
+static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
+{
+ uint64_t gpage, pgoffs;
+ uint32_t slot, slotoffs;
+ void *base;
+
+ TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
+ TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
+ "Too high gpa to translate");
+ gpa -= MEM_GPA;
+
+ gpage = gpa / 4096;
+ pgoffs = gpa % 4096;
+ slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
+ slotoffs = gpage - (slot * data->pages_per_slot);
+
+ if (rempages) {
+ uint64_t slotpages;
+
+ if (slot == data->nslots - 1)
+ slotpages = data->npages - slot * data->pages_per_slot;
+ else
+ slotpages = data->pages_per_slot;
+
+ TEST_ASSERT(!pgoffs,
+ "Asking for remaining pages in slot but gpa not page aligned");
+ *rempages = slotpages - slotoffs;
+ }
+
+ base = data->hva_slots[slot];
+ return (uint8_t *)base + slotoffs * 4096 + pgoffs;
+}
+
+static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
+{
+ TEST_ASSERT(slot < data->nslots, "Too high slot number");
+
+ return MEM_GPA + slot * data->pages_per_slot * 4096;
+}
+
+static struct vm_data *alloc_vm(void)
+{
+ struct vm_data *data;
+
+ data = malloc(sizeof(*data));
+ TEST_ASSERT(data, "malloc(vmdata) failed");
+
+ data->vm = NULL;
+ data->hva_slots = NULL;
+
+ return data;
+}
+
+static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
+ void *guest_code, uint64_t mempages,
+ struct timespec *slot_runtime)
+{
+ uint32_t max_mem_slots;
+ uint64_t rempages;
+ uint64_t guest_addr;
+ uint32_t slot;
+ struct timespec tstart;
+ struct sync_area *sync;
+
+ max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ TEST_ASSERT(max_mem_slots > 1,
+ "KVM_CAP_NR_MEMSLOTS should be greater than 1");
+ TEST_ASSERT(nslots > 1 || nslots == -1,
+ "Slot count cap should be greater than 1");
+ if (nslots != -1)
+ max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
+ pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
+
+ TEST_ASSERT(mempages > 1,
+ "Can't test without any memory");
+
+ data->npages = mempages;
+ data->nslots = max_mem_slots - 1;
+ data->pages_per_slot = mempages / data->nslots;
+ if (!data->pages_per_slot) {
+ *maxslots = mempages + 1;
+ return false;
+ }
+
+ rempages = mempages % data->nslots;
+ data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
+ TEST_ASSERT(data->hva_slots, "malloc() fail");
+
+ data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+
+ pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
+ max_mem_slots - 1, data->pages_per_slot, rempages);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
+ uint64_t npages;
+
+ npages = data->pages_per_slot;
+ if (slot == max_mem_slots - 1)
+ npages += rempages;
+
+ vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
+ guest_addr, slot, npages,
+ 0);
+ guest_addr += npages * 4096;
+ }
+ *slot_runtime = timespec_elapsed(tstart);
+
+ for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
+ uint64_t npages;
+ uint64_t gpa;
+
+ npages = data->pages_per_slot;
+ if (slot == max_mem_slots - 2)
+ npages += rempages;
+
+ gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
+ slot + 1);
+ TEST_ASSERT(gpa == guest_addr,
+ "vm_phy_pages_alloc() failed\n");
+
+ data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
+ memset(data->hva_slots[slot], 0, npages * 4096);
+
+ guest_addr += npages * 4096;
+ }
+
+ virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+ atomic_init(&sync->start_flag, false);
+ atomic_init(&sync->exit_flag, false);
+ atomic_init(&sync->sync_flag, false);
+
+ data->mmio_ok = false;
+
+ return true;
+}
+
+static void launch_vm(struct vm_data *data)
+{
+ pr_info_v("Launching the test VM\n");
+
+ pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
+
+ /* Ensure the guest thread is spun up. */
+ wait_for_vcpu();
+}
+
+static void free_vm(struct vm_data *data)
+{
+ kvm_vm_free(data->vm);
+ free(data->hva_slots);
+ free(data);
+}
+
+static void wait_guest_exit(struct vm_data *data)
+{
+ pthread_join(data->vcpu_thread, NULL);
+}
+
+static void let_guest_run(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->start_flag, true, memory_order_release);
+}
+
+static void guest_spin_until_start(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
+ ;
+}
+
+static void make_guest_exit(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
+}
+
+static bool _guest_should_exit(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
+}
+
+#define guest_should_exit() unlikely(_guest_should_exit())
+
+/*
+ * noinline so we can easily see how much time the host spends waiting
+ * for the guest.
+ * For the same reason use alarm() instead of polling clock_gettime()
+ * to implement a wait timeout.
+ */
+static noinline void host_perform_sync(struct sync_area *sync)
+{
+ alarm(2);
+
+ atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
+ while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
+ ;
+
+ alarm(0);
+}
+
+static bool guest_perform_sync(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ bool expected;
+
+ do {
+ if (guest_should_exit())
+ return false;
+
+ expected = true;
+ } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
+ &expected, false,
+ memory_order_acq_rel,
+ memory_order_relaxed));
+
+ return true;
+}
+
+static void guest_code_test_memslot_move(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (!guest_should_exit()) {
+ uintptr_t ptr;
+
+ for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
+ ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ /*
+ * No host sync here since the MMIO exits are so expensive
+ * that the host would spend most of its time waiting for
+ * the guest and so instead of measuring memslot move
+ * performance we would measure the performance and
+ * likelihood of MMIO exits
+ */
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_map(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_unmap(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr = MEM_TEST_GPA;
+
+ /*
+ * We can afford to access (map) just a small number of pages
+ * per host sync as otherwise the host will spend
+ * a significant amount of its time waiting for the guest
+ * (instead of doing unmap operations), so this will
+ * effectively turn this test into a map performance test.
+ *
+ * Just access a single page to be on the safe side.
+ */
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ ptr += MEM_TEST_UNMAP_SIZE / 2;
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_rw(void)
+{
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + 4096 / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
+ uint64_t val = *(uint64_t *)ptr;
+
+ GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+ *(uint64_t *)ptr = 0;
+ }
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static bool test_memslot_move_prepare(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots, bool isactive)
+{
+ uint64_t movesrcgpa, movetestgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+
+ if (isactive) {
+ uint64_t lastpages;
+
+ vm_gpa2hva(data, movesrcgpa, &lastpages);
+ if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
+ *maxslots = 0;
+ return false;
+ }
+ }
+
+ movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
+ sync->move_area_ptr = (void *)movetestgpa;
+
+ if (isactive) {
+ data->mmio_ok = true;
+ data->mmio_gpa_min = movesrcgpa;
+ data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
+ }
+
+ return true;
+}
+
+static bool test_memslot_move_prepare_active(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, true);
+}
+
+static bool test_memslot_move_prepare_inactive(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, false);
+}
+
+static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t movesrcgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1,
+ MEM_TEST_MOVE_GPA_DEST);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
+}
+
+static void test_memslot_do_unmap(struct vm_data *data,
+ uint64_t offsp, uint64_t count)
+{
+ uint64_t gpa, ctr;
+
+ for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
+ uint64_t npages;
+ void *hva;
+ int ret;
+
+ hva = vm_gpa2hva(data, gpa, &npages);
+ TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
+ npages = min(npages, count - ctr);
+ ret = madvise(hva, npages * 4096, MADV_DONTNEED);
+ TEST_ASSERT(!ret,
+ "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
+ hva, gpa);
+ ctr += npages;
+ gpa += npages * 4096;
+ }
+ TEST_ASSERT(ctr == count,
+ "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
+}
+
+static void test_memslot_map_unmap_check(struct vm_data *data,
+ uint64_t offsp, uint64_t valexp)
+{
+ uint64_t gpa;
+ uint64_t *val;
+
+ if (!map_unmap_verify)
+ return;
+
+ gpa = MEM_TEST_GPA + offsp * 4096;
+ val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
+ TEST_ASSERT(*val == valexp,
+ "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
+ *val, valexp, gpa);
+ *val = 0;
+}
+
+static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
+{
+ /*
+ * Unmap the second half of the test area while guest writes to (maps)
+ * the first half.
+ */
+ test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
+ MEM_TEST_MAP_SIZE_PAGES / 2);
+
+ /*
+ * Wait for the guest to finish writing the first half of the test
+ * area, verify the written value on the first and the last page of
+ * this area and then unmap it.
+ * Meanwhile, the guest is writing to (mapping) the second half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ test_memslot_map_unmap_check(data,
+ MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
+ MEM_TEST_VAL_1);
+ test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
+
+
+ /*
+ * Wait for the guest to finish writing the second half of the test
+ * area and verify the written value on the first and the last page
+ * of this area.
+ * The area will be unmapped at the beginning of the next loop
+ * iteration.
+ * Meanwhile, the guest is writing to (mapping) the first half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
+ MEM_TEST_VAL_2);
+ test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
+ MEM_TEST_VAL_2);
+}
+
+static void test_memslot_unmap_loop_common(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t chunk)
+{
+ uint64_t ctr;
+
+ /*
+ * Wait for the guest to finish mapping page(s) in the first half
+ * of the test area, verify the written value and then perform unmap
+ * of this area.
+ * Meanwhile, the guest is writing to (mapping) page(s) in the second
+ * half of the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+
+ /* Likewise, but for the opposite host / guest areas */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
+ MEM_TEST_VAL_2);
+ for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
+ ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+}
+
+static void test_memslot_unmap_loop(struct vm_data *data,
+ struct sync_area *sync)
+{
+ test_memslot_unmap_loop_common(data, sync, 1);
+}
+
+static void test_memslot_unmap_loop_chunked(struct vm_data *data,
+ struct sync_area *sync)
+{
+ test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
+}
+
+static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t gptr;
+
+ for (gptr = MEM_TEST_GPA + 4096 / 2;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
+ *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
+
+ host_perform_sync(sync);
+
+ for (gptr = MEM_TEST_GPA;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
+ uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
+ uint64_t val = *vptr;
+
+ TEST_ASSERT(val == MEM_TEST_VAL_1,
+ "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
+ val, gptr);
+ *vptr = 0;
+ }
+
+ host_perform_sync(sync);
+}
+
+struct test_data {
+ const char *name;
+ uint64_t mem_size;
+ void (*guest_code)(void);
+ bool (*prepare)(struct vm_data *data, struct sync_area *sync,
+ uint64_t *maxslots);
+ void (*loop)(struct vm_data *data, struct sync_area *sync);
+};
+
+static bool test_execute(int nslots, uint64_t *maxslots,
+ unsigned int maxtime,
+ const struct test_data *tdata,
+ uint64_t *nloops,
+ struct timespec *slot_runtime,
+ struct timespec *guest_runtime)
+{
+ uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
+ struct vm_data *data;
+ struct sync_area *sync;
+ struct timespec tstart;
+ bool ret = true;
+
+ data = alloc_vm();
+ if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
+ mem_size, slot_runtime)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+
+ if (tdata->prepare &&
+ !tdata->prepare(data, sync, maxslots)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ launch_vm(data);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ let_guest_run(sync);
+
+ while (1) {
+ *guest_runtime = timespec_elapsed(tstart);
+ if (guest_runtime->tv_sec >= maxtime)
+ break;
+
+ tdata->loop(data, sync);
+
+ (*nloops)++;
+ }
+
+ make_guest_exit(sync);
+ wait_guest_exit(data);
+
+exit_free:
+ free_vm(data);
+
+ return ret;
+}
+
+static const struct test_data tests[] = {
+ {
+ .name = "map",
+ .mem_size = MEM_SIZE_MAP_PAGES,
+ .guest_code = guest_code_test_memslot_map,
+ .loop = test_memslot_map_loop,
+ },
+ {
+ .name = "unmap",
+ .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop,
+ },
+ {
+ .name = "unmap chunked",
+ .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop_chunked,
+ },
+ {
+ .name = "move active area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_active,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "move inactive area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_inactive,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "RW",
+ .guest_code = guest_code_test_memslot_rw,
+ .loop = test_memslot_rw_loop
+ },
+};
+
+#define NTESTS ARRAY_SIZE(tests)
+
+struct test_args {
+ int tfirst;
+ int tlast;
+ int nslots;
+ int seconds;
+ int runs;
+};
+
+static void help(char *name, struct test_args *targs)
+{
+ int ctr;
+
+ pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
+ name);
+ pr_info(" -h: print this help screen.\n");
+ pr_info(" -v: enable verbose mode (not for benchmarking).\n");
+ pr_info(" -d: enable extra debug checks.\n");
+ pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
+ targs->nslots);
+ pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
+ targs->tfirst, NTESTS - 1);
+ pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
+ targs->tlast, NTESTS - 1);
+ pr_info(" -l: specify the test length in seconds (currently: %i)\n",
+ targs->seconds);
+ pr_info(" -r: specify the number of runs per test (currently: %i)\n",
+ targs->runs);
+
+ pr_info("\nAvailable tests:\n");
+ for (ctr = 0; ctr < NTESTS; ctr++)
+ pr_info("%d: %s\n", ctr, tests[ctr].name);
+}
+
+static bool parse_args(int argc, char *argv[],
+ struct test_args *targs)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
+ switch (opt) {
+ case 'h':
+ default:
+ help(argv[0], targs);
+ return false;
+ case 'v':
+ verbose = true;
+ break;
+ case 'd':
+ map_unmap_verify = true;
+ break;
+ case 's':
+ targs->nslots = atoi(optarg);
+ if (targs->nslots <= 0 && targs->nslots != -1) {
+ pr_info("Slot count cap has to be positive or -1 for no cap\n");
+ return false;
+ }
+ break;
+ case 'f':
+ targs->tfirst = atoi(optarg);
+ if (targs->tfirst < 0) {
+ pr_info("First test to run has to be non-negative\n");
+ return false;
+ }
+ break;
+ case 'e':
+ targs->tlast = atoi(optarg);
+ if (targs->tlast < 0 || targs->tlast >= NTESTS) {
+ pr_info("Last test to run has to be non-negative and less than %zu\n",
+ NTESTS);
+ return false;
+ }
+ break;
+ case 'l':
+ targs->seconds = atoi(optarg);
+ if (targs->seconds < 0) {
+ pr_info("Test length in seconds has to be non-negative\n");
+ return false;
+ }
+ break;
+ case 'r':
+ targs->runs = atoi(optarg);
+ if (targs->runs <= 0) {
+ pr_info("Runs per test has to be positive\n");
+ return false;
+ }
+ break;
+ }
+ }
+
+ if (optind < argc) {
+ help(argv[0], targs);
+ return false;
+ }
+
+ if (targs->tfirst > targs->tlast) {
+ pr_info("First test to run cannot be greater than the last test to run\n");
+ return false;
+ }
+
+ return true;
+}
+
+struct test_result {
+ struct timespec slot_runtime, guest_runtime, iter_runtime;
+ int64_t slottimens, runtimens;
+ uint64_t nloops;
+};
+
+static bool test_loop(const struct test_data *data,
+ const struct test_args *targs,
+ struct test_result *rbestslottime,
+ struct test_result *rbestruntime)
+{
+ uint64_t maxslots;
+ struct test_result result;
+
+ result.nloops = 0;
+ if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
+ &result.nloops,
+ &result.slot_runtime, &result.guest_runtime)) {
+ if (maxslots)
+ pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
+ maxslots);
+ else
+ pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
+
+ return false;
+ }
+
+ pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
+ result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
+ result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
+ if (!result.nloops) {
+ pr_info("No full loops done - too short test time or system too loaded?\n");
+ return true;
+ }
+
+ result.iter_runtime = timespec_div(result.guest_runtime,
+ result.nloops);
+ pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
+ result.nloops,
+ result.iter_runtime.tv_sec,
+ result.iter_runtime.tv_nsec);
+ result.slottimens = timespec_to_ns(result.slot_runtime);
+ result.runtimens = timespec_to_ns(result.iter_runtime);
+
+ /*
+ * Only rank the slot setup time for tests using the whole test memory
+ * area so they are comparable
+ */
+ if (!data->mem_size &&
+ (!rbestslottime->slottimens ||
+ result.slottimens < rbestslottime->slottimens))
+ *rbestslottime = result;
+ if (!rbestruntime->runtimens ||
+ result.runtimens < rbestruntime->runtimens)
+ *rbestruntime = result;
+
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_args targs = {
+ .tfirst = 0,
+ .tlast = NTESTS - 1,
+ .nslots = -1,
+ .seconds = 5,
+ .runs = 1,
+ };
+ struct test_result rbestslottime;
+ int tctr;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ if (!parse_args(argc, argv, &targs))
+ return -1;
+
+ rbestslottime.slottimens = 0;
+ for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
+ const struct test_data *data = &tests[tctr];
+ unsigned int runctr;
+ struct test_result rbestruntime;
+
+ if (tctr > targs.tfirst)
+ pr_info("\n");
+
+ pr_info("Testing %s performance with %i runs, %d seconds each\n",
+ data->name, targs.runs, targs.seconds);
+
+ rbestruntime.runtimens = 0;
+ for (runctr = 0; runctr < targs.runs; runctr++)
+ if (!test_loop(data, &targs,
+ &rbestslottime, &rbestruntime))
+ break;
+
+ if (rbestruntime.runtimens)
+ pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
+ rbestruntime.iter_runtime.tv_sec,
+ rbestruntime.iter_runtime.tv_nsec,
+ rbestruntime.nloops);
+ }
+
+ if (rbestslottime.slottimens)
+ pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
+ rbestslottime.slot_runtime.tv_sec,
+ rbestslottime.slot_runtime.tv_nsec);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 978f5b5f4dc0..85b18bb8f762 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -132,7 +132,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
- virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
/* Ditto for the host mapping so that both pages can be zeroed. */
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@ -376,7 +376,7 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment,
+ mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
@@ -401,7 +401,7 @@ static void test_add_max_memory_regions(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment);
+ munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index fcc840088c91..b0031f2d38fd 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -73,8 +73,6 @@ static void steal_time_init(struct kvm_vm *vm)
for (i = 0; i < NR_VCPUS; ++i) {
int ret;
- vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
@@ -295,7 +293,7 @@ int main(int ac, char **av)
vm = vm_create_default(0, 0, guest_code);
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
- virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
ucall_init(vm, NULL);
/* Add the rest of the VCPUs */
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
new file mode 100644
index 000000000000..f070ff0224fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID 1
+#define PAGE_SIZE 4096
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(void)
+{
+ __asm__ __volatile__("flds (%[addr])"
+ :: [addr]"r"(MEM_REGION_GVA));
+
+ GUEST_DONE();
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+ int rc;
+
+ rc = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+/*
+ * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
+ * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
+ */
+#define GET_RM(insn_byte) (insn_byte & 0x7)
+#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3)
+#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6)
+
+/* Ensure we are dealing with a simple 2-byte flds instruction. */
+static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
+{
+ return insn_size >= 2 &&
+ insn_bytes[0] == 0xd9 &&
+ GET_REG(insn_bytes[1]) == 0x0 &&
+ GET_MOD(insn_bytes[1]) == 0x0 &&
+ /* Ensure there is no SIB byte. */
+ GET_RM(insn_bytes[1]) != 0x4 &&
+ /* Ensure there is no displacement byte. */
+ GET_RM(insn_bytes[1]) != 0x5;
+}
+
+static void process_exit_on_emulation_error(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint8_t insn_size;
+ uint64_t flags;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ if (run->emulation_failure.ndata >= 1) {
+ flags = run->emulation_failure.flags;
+ if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) &&
+ run->emulation_failure.ndata >= 3) {
+ insn_size = run->emulation_failure.insn_size;
+ insn_bytes = run->emulation_failure.insn_bytes;
+
+ TEST_ASSERT(insn_size <= 15 && insn_size > 0,
+ "Unexpected instruction size: %u",
+ insn_size);
+
+ TEST_ASSERT(is_flds(insn_bytes, insn_size),
+ "Unexpected instruction. Expected 'flds' (0xd9 /0)");
+
+ /*
+ * If is_flds() succeeded then the instruction bytes
+ * contained an flds instruction that is 2-bytes in
+ * length (ie: no prefix, no SIB, no displacement).
+ */
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ regs.rip += 2;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ }
+ }
+}
+
+static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc)
+{
+ TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__,
+ uc->args[1]);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ if (run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+ do_guest_assert(vm, &uc);
+ }
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ check_for_guest_assert(vm);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+ "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+ uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ do_guest_assert(vm, &uc);
+ break;
+ case UCALL_DONE:
+ process_ucall_done(vm);
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected ucall");
+ }
+
+ return uc.cmd;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_enable_cap emul_failure_cap = {
+ .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE,
+ .args[0] = 1,
+ };
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_vm *vm;
+ uint64_t gpa, pte;
+ uint64_t *hva;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) {
+ printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n");
+ return 0;
+ }
+
+ cpuid = kvm_get_supported_cpuid();
+
+ entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+ entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR;
+ set_cpuid(cpuid, entry);
+
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, &emul_failure_cap);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+ pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA);
+ vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36));
+
+ run_guest(vm);
+ process_exit_on_emulation_error(vm);
+ run_guest(vm);
+
+ TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE");
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 63096cea26c6..2b46dcca86a8 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -22,15 +22,6 @@
static int ud_count;
-void enable_x2apic(void)
-{
- uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
-
- wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
- MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
- wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
-}
-
static void guest_ud_handler(struct ex_regs *regs)
{
ud_count++;
@@ -59,7 +50,7 @@ void guest_code(struct vmx_pages *vmx_pages)
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- enable_x2apic();
+ x2apic_enable();
GUEST_SYNC(1);
GUEST_SYNC(2);
@@ -121,14 +112,38 @@ void inject_nmi(struct kvm_vm *vm)
vcpu_events_set(vm, VCPU_ID, &events);
}
+static void save_restore_vm(struct kvm_vm *vm)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+ vcpu_enable_evmcs(vm, VCPU_ID);
+ vcpu_load_state(vm, VCPU_ID, state);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva = 0;
- struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
struct kvm_run *run;
- struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -145,21 +160,18 @@ int main(int argc, char *argv[])
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
- vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
pr_info("Running L1 which uses EVMCS to run L2\n");
for (stage = 1;; stage++) {
+ run = vcpu_state(vm, VCPU_ID);
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
@@ -184,32 +196,23 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
+ save_restore_vm(vm);
/* Force immediate L2->L1 exit before resuming */
if (stage == 8) {
pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
inject_nmi(vm);
}
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ save_restore_vm(vm);
+ }
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
index 9b78e8889638..a711f83749ea 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -19,7 +19,12 @@ struct {
u32 function;
u32 index;
} mangled_cpuids[] = {
+ /*
+ * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
+ * which are not controlled for by this test.
+ */
{.function = 0xd, .index = 0},
+ {.function = 0xd, .index = 1},
};
static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
@@ -140,8 +145,7 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
{
int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
- vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
- getpagesize(), 0, 0);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
memcpy(guest_cpuids, cpuid, size);
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
index cb953df4d7d0..8aed0db1331d 100644
--- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -37,9 +37,7 @@ static void test_get_msr_index(void)
int old_res, res, kvm_fd, r;
struct kvm_msr_list *list;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
old_res = kvm_num_index_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
@@ -101,9 +99,7 @@ static void test_get_msr_feature(void)
int res, old_res, i, kvm_fd;
struct kvm_msr_list *feature_list;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_fd = open_kvm_dev_path_or_exit();
old_res = kvm_num_feature_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 7f1d2765572c..bab10ae787b6 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -7,6 +7,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "hyperv.h"
struct ms_hyperv_tsc_page {
volatile u32 tsc_sequence;
@@ -15,13 +16,6 @@ struct ms_hyperv_tsc_page {
volatile s64 tsc_offset;
} __packed;
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-
/* Simplified mul_u64_u64_shr() */
static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
{
@@ -220,7 +214,7 @@ int main(void)
vcpu_set_hv_cpuid(vm, VCPU_ID);
- tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 000000000000..42bd658f52a8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define VCPU_ID 0
+#define LINUX_OS_ID ((u64)0x8100 << 48)
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+ u32 lo, hi;
+
+ asm volatile("rdmsr_start: rdmsr;"
+ "rdmsr_end:"
+ : "=a"(lo), "=c"(hi)
+ : "c"(idx));
+
+ return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+ u32 lo, hi;
+
+ lo = val;
+ hi = val >> 32;
+
+ asm volatile("wrmsr_start: wrmsr;"
+ "wrmsr_end:"
+ : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address)
+{
+ u64 hv_status;
+
+ asm volatile("mov %3, %%r8\n"
+ "vmcall"
+ : "=a" (hv_status),
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+
+ return hv_status;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ unsigned char *rip = (unsigned char *)regs->rip;
+ bool r, w;
+
+ r = rip == &rdmsr_start;
+ w = rip == &wrmsr_start;
+ GUEST_ASSERT(r || w);
+
+ nr_gp++;
+
+ if (r)
+ regs->rip = (uint64_t)&rdmsr_end;
+ else
+ regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+ uint32_t idx;
+ bool available;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+};
+
+static void guest_msr(struct msr_data *msr)
+{
+ int i = 0;
+
+ while (msr->idx) {
+ WRITE_ONCE(nr_gp, 0);
+ if (!msr->write)
+ do_rdmsr(msr->idx);
+ else
+ do_wrmsr(msr->idx, msr->write_val);
+
+ if (msr->available)
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 0);
+ else
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+ GUEST_SYNC(i++);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ int i = 0;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ while (hcall->control) {
+ GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
+ pgs_gpa + 4096) == hcall->expect);
+ GUEST_SYNC(i++);
+ }
+
+ GUEST_DONE();
+}
+
+static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 *feat,
+ struct kvm_cpuid_entry2 *recomm,
+ struct kvm_cpuid_entry2 *dbg)
+{
+ TEST_ASSERT(set_cpuid(cpuid, feat),
+ "failed to set KVM_CPUID_FEATURES leaf");
+ TEST_ASSERT(set_cpuid(cpuid, recomm),
+ "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf");
+ TEST_ASSERT(set_cpuid(cpuid, dbg),
+ "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf");
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+}
+
+static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
+ struct kvm_cpuid2 *best)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int stage = 0, r;
+ struct kvm_cpuid_entry2 feat = {
+ .function = HYPERV_CPUID_FEATURES
+ };
+ struct kvm_cpuid_entry2 recomm = {
+ .function = HYPERV_CPUID_ENLIGHTMENT_INFO
+ };
+ struct kvm_cpuid_entry2 dbg = {
+ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+ };
+ struct kvm_enable_cap cap = {0};
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 2:
+ feat.eax |= HV_MSR_HYPERCALL_AVAILABLE;
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 1;
+ msr->write_val = LINUX_OS_ID;
+ msr->available = 1;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 6:
+ feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 7:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 9:
+ feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 10:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 12:
+ feat.eax |= HV_MSR_VP_INDEX_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 13:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 15:
+ feat.eax |= HV_MSR_RESET_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 16:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 18:
+ feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 19:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ cap.cap = KVM_CAP_HYPERV_SYNIC2;
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+ break;
+ case 22:
+ feat.eax |= HV_MSR_SYNIC_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 23:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 25:
+ feat.eax |= HV_MSR_SYNTIMER_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 26:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->write = 1;
+ msr->write_val = 1 << 12;
+ msr->available = 0;
+ break;
+ case 28:
+ feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+ msr->available = 1;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 30:
+ feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 32:
+ feat.eax |= HV_ACCESS_FREQUENCY_MSRS;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 33:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 35:
+ feat.eax |= HV_ACCESS_REENLIGHTENMENT;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 36:
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 39:
+ feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 40:
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 42:
+ feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+ dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 43:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 44:
+ /* END */
+ msr->idx = 0;
+ break;
+ }
+
+ hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+ if (msr->idx)
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+ else
+ pr_debug("Stage %d: finish\n", stage);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)\n",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+
+ stage++;
+ }
+}
+
+static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
+ void *input, void *output, struct kvm_cpuid2 *best)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int stage = 0, r;
+ struct kvm_cpuid_entry2 feat = {
+ .function = HYPERV_CPUID_FEATURES,
+ .eax = HV_MSR_HYPERCALL_AVAILABLE
+ };
+ struct kvm_cpuid_entry2 recomm = {
+ .function = HYPERV_CPUID_ENLIGHTMENT_INFO
+ };
+ struct kvm_cpuid_entry2 dbg = {
+ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+ };
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ switch (stage) {
+ case 0:
+ hcall->control = 0xdeadbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ feat.ebx |= HV_POST_MESSAGES;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ feat.ebx |= HV_SIGNAL_EVENTS;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ feat.ebx |= HV_DEBUGGING;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ recomm.ebx = 0xfff;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 17:
+ /* END */
+ hcall->control = 0;
+ break;
+ }
+
+ hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+ if (hcall->control)
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage,
+ hcall->control);
+ else
+ pr_debug("Stage %d: finish\n", stage);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)\n",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+
+ stage++;
+ }
+}
+
+int main(void)
+{
+ struct kvm_cpuid2 *best;
+ struct kvm_vm *vm;
+ vm_vaddr_t msr_gva, hcall_page, hcall_params;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+ .args = {1}
+ };
+
+ /* Test MSRs */
+ vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
+ best);
+ kvm_vm_free(vm);
+
+ /* Test hypercalls */
+ vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+ vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
+ addr_gva2hva(vm, hcall_page),
+ addr_gva2hva(vm, hcall_page) + getpagesize(),
+ best);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 732b244d6956..04ed975662c9 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -227,7 +227,7 @@ int main(void)
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
enter_guest(vm);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
new file mode 100644
index 000000000000..523371cf8e8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 1
+
+#define MMIO_GPA 0x100000000ull
+
+static void guest_code(void)
+{
+ (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+ (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+
+ GUEST_ASSERT(0);
+}
+
+static void guest_pf_handler(struct ex_regs *regs)
+{
+ /* PFEC == RSVD | PRESENT (read, kernel). */
+ GUEST_ASSERT(regs->error_code == 0x9);
+ GUEST_DONE();
+}
+
+static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
+{
+ u32 good_cpuid_val = *cpuid_reg;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t cmd;
+ int r;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Map 1gb page without a backing memlot. */
+ __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+
+ r = _vcpu_run(vm, VCPU_ID);
+
+ /* Guest access to the 1gb page should trigger MMIO. */
+ TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO,
+ "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len);
+
+ TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA,
+ "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr);
+
+ /*
+ * Effect the CPUID change for the guest and re-enter the guest. Its
+ * access should now #PF due to the PAGE_SIZE bit being reserved or
+ * the resulting GPA being invalid. Note, kvm_get_supported_cpuid()
+ * returns the struct that contains the entry being modified. Eww.
+ */
+ *cpuid_reg = evil_cpuid_val;
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /*
+ * Add a dummy memslot to coerce KVM into bumping the MMIO generation.
+ * KVM does not "officially" support mucking with CPUID after KVM_RUN,
+ * and will incorrectly reuse MMIO SPTEs. Don't delete the memslot!
+ * KVM x86 zaps all shadow pages on memslot deletion.
+ */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MMIO_GPA << 1, 10, 1, 0);
+
+ /* Set up a #PF handler to eat the RSVD #PF and signal all done! */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_handle_exception(vm, PF_VECTOR, guest_pf_handler);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+
+ cmd = get_ucall(vm, VCPU_ID, NULL);
+ TEST_ASSERT(cmd == UCALL_DONE,
+ "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n",
+ exit_reason_str(run->exit_reason), cmd);
+
+ /*
+ * Restore the happy CPUID value for the next test. Yes, changes are
+ * indeed persistent across VM destruction.
+ */
+ *cpuid_reg = good_cpuid_val;
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ int opt;
+
+ /*
+ * All tests are opt-in because TDP doesn't play nice with reserved #PF
+ * in the GVA->GPA translation. The hardware page walker doesn't let
+ * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk
+ * the GVA on fault for performance reasons.
+ */
+ bool do_gbpages = false;
+ bool do_maxphyaddr = false;
+
+ setbuf(stdout, NULL);
+
+ while ((opt = getopt(argc, argv, "gm")) != -1) {
+ switch (opt) {
+ case 'g':
+ do_gbpages = true;
+ break;
+ case 'm':
+ do_maxphyaddr = true;
+ break;
+ case 'h':
+ default:
+ printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]);
+ break;
+ }
+ }
+
+ if (!do_gbpages && !do_maxphyaddr) {
+ print_skip("No sub-tests selected");
+ return 0;
+ }
+
+ entry = kvm_get_supported_cpuid_entry(0x80000001);
+ if (!(entry->edx & CPUID_GBPAGES)) {
+ print_skip("1gb hugepages not supported");
+ return 0;
+ }
+
+ if (do_gbpages) {
+ pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n");
+ mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES);
+ }
+
+ if (do_maxphyaddr) {
+ pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n");
+ entry = kvm_get_supported_cpuid_entry(0x80000008);
+ mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 12c558fc8074..ae76436af0cc 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -14,16 +14,12 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "apic.h"
#define N_VCPU 2
#define VCPU_ID0 0
#define VCPU_ID1 1
-static uint32_t get_bsp_flag(void)
-{
- return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
static void guest_bsp_vcpu(void *arg)
{
GUEST_SYNC(1);
@@ -94,7 +90,7 @@ static struct kvm_vm *create_vm(void)
pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
vm_create_irqchip(vm);
return vm;
@@ -106,8 +102,6 @@ static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
else
vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
-
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
static void run_vm_bsp(uint32_t bsp_vcpu)
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 613c42c5a9b8..c1f831803ad2 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -55,8 +55,8 @@ static inline void sync_with_host(uint64_t phase)
void self_smi(void)
{
- wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
void guest_code(void *arg)
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d672f0a473f8..fc03a150278d 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -24,6 +24,10 @@
#define UCALL_PIO_PORT ((uint16_t)0x1000)
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
/*
* ucall is embedded here to protect against compiler reshuffling registers
* before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,7 +38,8 @@ void guest_code(void)
asm volatile("1: in %[port], %%al\n"
"add $0x1, %%rbx\n"
"jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
}
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index e357d8e222d4..5a6a662f2e59 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -18,15 +18,6 @@
#define rounded_rdmsr(x) ROUND(rdmsr(x))
#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
-#define GUEST_ASSERT_EQ(a, b) do { \
- __typeof(a) _a = (a); \
- __typeof(b) _b = (b); \
- if (_a != _b) \
- ucall(UCALL_ABORT, 4, \
- "Failed guest assert: " \
- #a " == " #b, __LINE__, _a, _b); \
- } while(0)
-
static void guest_code(void)
{
u64 val = 0;
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index 72c0d0797522..e3e20e8848d0 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) {
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
@@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) {
run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
run_guest(vm);
- vm_handle_exception(vm, UD_VECTOR, NULL);
+ vm_install_exception_handler(vm, UD_VECTOR, NULL);
if (process_ucall(vm) != UCALL_DONE) {
- vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
/* Process emulated rdmsr and wrmsr instructions. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index d14888b34adb..d438c4d3228a 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -96,7 +96,7 @@ int main(int argc, char *argv[])
}
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- prepare_virtualize_apic_accesses(vmx, vm, 0);
+ prepare_virtualize_apic_accesses(vmx, vm);
vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
while (!done) {
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 537de1068554..06a64980a5d2 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -97,7 +97,7 @@ int main(int argc, char *argv[])
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -107,9 +107,9 @@ int main(int argc, char *argv[])
* meaning after the last call to virt_map.
*/
prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
bmap = bitmap_alloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..280c01fd2412
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+
+#define VCPU_ID 0
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void tsc_scaling_check_supported(void)
+{
+ if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
+ print_skip("TSC scaling not supported by the HW");
+ exit(KSFT_SKIP);
+ }
+}
+
+static void stable_tsc_check_supported(void)
+{
+ FILE *fp;
+ char buf[4];
+
+ fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+ if (fp == NULL)
+ goto skip_test;
+
+ if (fgets(buf, sizeof(buf), fp) == NULL)
+ goto skip_test;
+
+ if (strncmp(buf, "tsc", sizeof(buf)))
+ goto skip_test;
+
+ return;
+skip_test:
+ print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
+ exit(KSFT_SKIP);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ vm_vaddr_t vmx_pages_gva;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ nested_vmx_check_supported();
+ tsc_scaling_check_supported();
+ stable_tsc_check_supported();
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ,
+ (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *) uc.args[0]);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 2f964cdc273c..afbbc40df884 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -42,8 +42,6 @@
#define HALTER_VCPU_ID 0
#define SENDER_VCPU_ID 1
-volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
-
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
@@ -86,45 +84,6 @@ struct thread_params {
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
-uint32_t read_apic_reg(uint reg)
-{
- return apic_base[reg >> 2];
-}
-
-void write_apic_reg(uint reg, uint32_t val)
-{
- apic_base[reg >> 2] = val;
-}
-
-void disable_apic(void)
-{
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) &
- ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void enable_xapic(void)
-{
- uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
- /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
- if (val & MSR_IA32_APICBASE_EXTD) {
- disable_apic();
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
- } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
- wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
- }
-
- /*
- * Per SDM: reset value of spurious interrupt vector register has the
- * APIC software enabled bit=0. It must be enabled in addition to the
- * enable bit in the MSR.
- */
- val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
- write_apic_reg(APIC_SPIV, val);
-}
-
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
@@ -136,10 +95,10 @@ void verify_apic_base_addr(void)
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
- data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
- data->halter_lvr = read_apic_reg(APIC_LVR);
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
@@ -150,8 +109,8 @@ static void halter_guest_code(struct test_data_page *data)
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
- data->halter_tpr = read_apic_reg(APIC_TASKPRI);
- data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
asm volatile("sti; hlt; cli");
data->wake_count++;
@@ -166,7 +125,7 @@ static void halter_guest_code(struct test_data_page *data)
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
- write_apic_reg(APIC_EOI, 77);
+ xapic_write_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
@@ -179,7 +138,7 @@ static void sender_guest_code(struct test_data_page *data)
uint64_t tsc_start;
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
/*
* Init interrupt command register for sending IPIs
@@ -206,8 +165,8 @@ static void sender_guest_code(struct test_data_page *data)
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
- write_apic_reg(APIC_ICR2, icr2_val);
- write_apic_reg(APIC_ICR, icr_val);
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
@@ -462,13 +421,13 @@ int main(int argc, char *argv[])
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
- vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
- test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
data =
(struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 1f4a0599683c..117bf49a3d79 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -146,7 +146,7 @@ int main(int argc, char *argv[])
/* Map a region for the shared_info page */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
- virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index 8389e0bfd711..adc94452b57c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
/* Map a region for the hypercall pages */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
- virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..ee71fc99d5b5 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..645839b50b0a 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,4 +1,5 @@
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
new file mode 100755
index 000000000000..b59b8ba561c3
--- /dev/null
+++ b/tools/testing/selftests/lib/scanf.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Tests the scanf infrastructure using test_scanf kernel module.
+$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 4e94e566e040..f31205f04ee0 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -136,6 +136,10 @@ struct mount_attr {
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
struct mount_attr *attr, size_t size)
{
@@ -235,6 +239,10 @@ static int prepare_unpriv_mountns(void)
return 0;
}
+#ifndef ST_NOSYMFOLLOW
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
+#endif
+
static int read_mnt_flags(const char *path)
{
int ret;
@@ -245,9 +253,9 @@ static int read_mnt_flags(const char *path)
if (ret != 0)
return -EINVAL;
- if (stat.f_flag &
- ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
- ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
+ ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
+ ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
return -EINVAL;
mnt_flags = 0;
@@ -269,6 +277,8 @@ static int read_mnt_flags(const char *path)
mnt_flags |= MS_SYNCHRONOUS;
if (stat.f_flag & ST_MANDLOCK)
mnt_flags |= ST_MANDLOCK;
+ if (stat.f_flag & ST_NOSYMFOLLOW)
+ mnt_flags |= ST_NOSYMFOLLOW;
return mnt_flags;
}
@@ -368,8 +378,13 @@ static bool mount_setattr_supported(void)
FIXTURE(mount_setattr) {
};
+#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
+#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
+
FIXTURE_SETUP(mount_setattr)
{
+ int fd = -EBADF;
+
if (!mount_setattr_supported())
SKIP(return, "mount_setattr syscall not supported");
@@ -412,6 +427,11 @@ FIXTURE_SETUP(mount_setattr)
ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
+ ASSERT_EQ(close(fd), 0);
}
FIXTURE_TEARDOWN(mount_setattr)
@@ -1421,4 +1441,66 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
}
+TEST_F(mount_setattr, mount_attr_nosymfollow)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= ST_NOSYMFOLLOW;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ELOOP);
+
+ attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
+ attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~ST_NOSYMFOLLOW;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore
new file mode 100644
index 000000000000..448eeb4590fc
--- /dev/null
+++ b/tools/testing/selftests/nci/.gitignore
@@ -0,0 +1 @@
+/nci_dev
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 61ae899cfc17..19deb9cdf72f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -30,3 +30,4 @@ hwtstamp_config
rxtimestamp
timestamping
txtimestamp
+so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3915bb7bfc39..79c9eb0034d5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -30,7 +30,7 @@ TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 614d5477365a..6f905b53904f 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,4 +1,5 @@
CONFIG_USER_NS=y
+CONFIG_NET_NS=y
CONFIG_BPF_SYSCALL=y
CONFIG_TEST_BPF=m
CONFIG_NUMA=y
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 834066d465fc..2b5d6ff87373 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -18,6 +18,8 @@ import sys
#
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
Port = collections.namedtuple('Port', 'bus_info name')
@@ -239,7 +241,11 @@ def main(cmdline=None):
assert stderr == ""
devs = json.loads(stdout)['dev']
- dev = list(devs.keys())[0]
+ if devs:
+ dev = list(devs.keys())[0]
+ else:
+ print("no devlink device was found, test skipped")
+ sys.exit(KSFT_SKIP)
cmd = "devlink dev show %s" % dev
stdout, stderr = run_command(cmd)
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 49774a8a7736..0d293391e9a4 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -925,6 +925,14 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ # route can not use prefsrc with nexthops
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1"
+ log_test $? 2 "IPv6 route can not use src routing with external nexthop"
+
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo"
+ log_test $? 2 "IPv6 route with invalid metric"
+
# rpfilter and default route
$IP nexthop flush >/dev/null 2>&1
run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
@@ -1366,6 +1374,10 @@ ipv4_fcnal_runtime()
run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
log_test $? 2 "Nexthop replace with invalid scope for existing route"
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo"
+ log_test $? 2 "IPv4 route with invalid metric"
+
#
# add route with nexthop and check traffic
#
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 76d9487fb03c..5abe92d55b69 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
ipv4_rt_replace_mpath
}
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+ run_cmd "ip addr add 10.0.0.1/32 dev lo"
+ run_cmd "ip netns add test-ns"
+ run_cmd "ip link add veth-outside type veth peer name veth-inside"
+ run_cmd "ip link add vrf-100 type vrf table 1100"
+ run_cmd "ip link set veth-outside master vrf-100"
+ run_cmd "ip link set veth-inside netns test-ns"
+ run_cmd "ip link set veth-outside up"
+ run_cmd "ip link set vrf-100 up"
+ run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+ run_cmd "ip netns exec test-ns ip link set veth-inside up"
+ run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+ run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
+ run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
+ run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
+ run_cmd "ip link delete vrf-100"
+
+ # if we do not hang test is a success
+ log_test $? 0 "Cached route removed from VRF port device"
+}
+
ipv4_route_test()
{
route_setup
ipv4_rt_add
ipv4_rt_replace
+ ipv4_local_rt_cache
route_cleanup
}
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
new file mode 100755
index 000000000000..a15d21dc035a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution between two paths when using custom hash policy.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------------+
+# | SW1 | |
+# | $rp1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | |
+# | $rp11 + + $rp12 |
+# | 192.0.2.1/28 | | 192.0.2.17/28 |
+# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 |
+# +------------------|-------------|------------------+
+# | |
+# +------------------|-------------|------------------+
+# | SW2 | | |
+# | | | |
+# | $rp21 + + $rp22 |
+# | 192.0.2.2/28 192.0.2.18/28 |
+# | 2001:db8:2::2/64 2001:db8:3::2/64 |
+# | |
+# | |
+# | $rp2 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:4::1/64 | |
+# +-------------------------|-------------------------+
+# |
+# +-------------------------|------+
+# | H2 | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:4::{2-fd}/64 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64
+ __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64
+
+ ip route add vrf v$rp1 203.0.113.0/24 \
+ nexthop via 192.0.2.2 dev $rp11 \
+ nexthop via 192.0.2.18 dev $rp12
+
+ ip -6 route add vrf v$rp1 2001:db8:4::/64 \
+ nexthop via 2001:db8:2::2 dev $rp11 \
+ nexthop via 2001:db8:3::2 dev $rp12
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$rp1 2001:db8:4::/64
+
+ ip route del vrf v$rp1 203.0.113.0/24
+
+ __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64
+ __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64
+ simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64
+ __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64
+ __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64
+
+ ip route add vrf v$rp2 198.51.100.0/24 \
+ nexthop via 192.0.2.1 dev $rp21 \
+ nexthop via 192.0.2.17 dev $rp22
+
+ ip -6 route add vrf v$rp2 2001:db8:1::/64 \
+ nexthop via 2001:db8:2::1 dev $rp21 \
+ nexthop via 2001:db8:3::1 dev $rp22
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$rp2 2001:db8:1::/64
+
+ ip route del vrf v$rp2 198.51.100.0/24
+
+ __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64
+ __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64
+ simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ rp1=${NETIFS[p2]}
+
+ rp11=${NETIFS[p3]}
+ rp21=${NETIFS[p4]}
+
+ rp12=${NETIFS[p5]}
+ rp22=${NETIFS[p6]}
+
+ rp2=${NETIFS[p7]}
+
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:4::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_rp11=$(link_stats_tx_packets_get $rp11)
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+
+ $send_flows
+
+ local t1_rp11=$(link_stats_tx_packets_get $rp11)
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+
+ local d_rp11=$((t1_rp11 - t0_rp11))
+ local d_rp12=$((t1_rp12 - t0_rp12))
+
+ local diff=$((d_rp12 - d_rp11))
+ local sum=$((d_rp11 + d_rp12))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 custom multipath hash tests"
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv4
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp4
+ custom_hash_test "Source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp4
+ custom_hash_test "Destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 custom multipath hash tests"
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv6
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008
+ custom_hash_test "Flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp6
+ custom_hash_test "Source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp6
+ custom_hash_test "Destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+ custom_hash_v4
+ custom_hash_v6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 9c12c4fd3afc..13d3d4428a32 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -18,6 +18,12 @@ if [[ ! -v DEVLINK_DEV ]]; then
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
-n | cut -d" " -f3)
+elif [[ ! -z "$DEVLINK_DEV" ]]; then
+ devlink dev show $DEVLINK_DEV &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
+ exit 1
+ fi
fi
##############################################################################
@@ -318,6 +324,14 @@ devlink_trap_rx_bytes_get()
| jq '.[][][]["stats"]["rx"]["bytes"]'
}
+devlink_trap_drop_packets_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
devlink_trap_stats_idle_test()
{
local trap_name=$1; shift
@@ -339,6 +353,24 @@ devlink_trap_stats_idle_test()
fi
}
+devlink_trap_drop_stats_idle_test()
+{
+ local trap_name=$1; shift
+ local t0_packets t0_bytes
+
+ t0_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
devlink_traps_enable_all()
{
local trap_name
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..a73f52efcb6c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -0,0 +1,456 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv4 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + g1 (gre) |
+# | loc=192.0.2.1 |
+# | rem=192.0.2.2 --. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 192.0.2.17/28 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 192.0.2.18/28 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# | | | |
+# +--|----------------------|------------------+
+# | |
+# +--|----------------------|------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 192.0.2.34/28 | 192.0.2.50/28 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 192.0.2.65/28 | SW3 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | + $ul4 |
+# | ^ 192.0.2.66/28 |
+# | | |
+# | + g2 (gre) | |
+# | loc=192.0.2.2 | |
+# | rem=192.0.2.1 --' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 192.0.2.17/28
+
+ tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1
+ __simple_if_init g1 v$ol1 192.0.2.1/32
+ ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 192.0.2.2/32
+ __simple_if_fini g1 192.0.2.1/32
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 192.0.2.17/28
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 192.0.2.18/28
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 192.0.2.33/28
+ vlan_create $ul22 222 v$ul21 192.0.2.49/28
+
+ ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17
+ ip route add vrf v$ul21 192.0.2.2/32 \
+ nexthop via 192.0.2.34 \
+ nexthop via 192.0.2.50
+}
+
+sw2_destroy()
+{
+ ip route del vrf v$ul21 192.0.2.2/32
+ ip route del vrf v$ul21 192.0.2.1/32
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 192.0.2.18/28
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 192.0.2.65/28
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 192.0.2.34/28
+ vlan_create $ul32 222 v$ul31 192.0.2.50/28
+
+ ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66
+ ip route add vrf v$ul31 192.0.2.1/32 \
+ nexthop via 192.0.2.33 \
+ nexthop via 192.0.2.49
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip route del vrf v$ul31 192.0.2.1/32
+ ip route del vrf v$ul31 192.0.2.2/32
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 192.0.2.65/28
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 192.0.2.66/28
+
+ tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4
+ __simple_if_init g2 v$ol4 192.0.2.2/32
+ ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip route del vrf v$ol4 192.0.2.1/32
+ __simple_if_fini g2 192.0.2.2/32
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 192.0.2.66/28
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..8fea2c2e0b25
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -0,0 +1,458 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv6 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# |+ g1 (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 -. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 2001:db8:10::1/64 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 2001:db8:10::2/64 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 |
+# | | | |
+# +--|----------------------|-------------------+
+# | |
+# +--|----------------------|-------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 2001:db8:13::1/64 | SW3 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | + $ul4 |
+# | ^ 2001:db8:13::2/64 |
+# | | |
+# |+ g2 (ip6gre) | |
+# | loc=2001:db8:3::2 | |
+# | rem=2001:db8:3::1 -' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|-------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 2001:db8:10::1/64
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ dev v$ol1
+ __simple_if_init g1 v$ol1 2001:db8:3::1/128
+ ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 2001:db8:3::2/128
+ __simple_if_fini g1 2001:db8:3::1/128
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 2001:db8:10::1/64
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 2001:db8:10::2/64
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 2001:db8:11::1/64
+ vlan_create $ul22 222 v$ul21 2001:db8:12::1/64
+
+ ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1
+ ip -6 route add vrf v$ul21 2001:db8:3::2/128 \
+ nexthop via 2001:db8:11::2 \
+ nexthop via 2001:db8:12::2
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$ul21 2001:db8:3::2/128
+ ip -6 route del vrf v$ul21 2001:db8:3::1/128
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 2001:db8:10::2/64
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 2001:db8:13::1/64
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 2001:db8:11::2/64
+ vlan_create $ul32 222 v$ul31 2001:db8:12::2/64
+
+ ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2
+ ip -6 route add vrf v$ul31 2001:db8:3::1/128 \
+ nexthop via 2001:db8:11::1 \
+ nexthop via 2001:db8:12::1
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip -6 route del vrf v$ul31 2001:db8:3::1/128
+ ip -6 route del vrf v$ul31 2001:db8:3::2/128
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 2001:db8:13::1/64
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 2001:db8:13::2/64
+
+ tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+ dev v$ol4
+ __simple_if_init g2 v$ol4 2001:db8:3::2/128
+ ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip -6 route del vrf v$ol4 2001:db8:3::1/128
+ __simple_if_fini g2 2001:db8:3::2/128
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 2001:db8:13::2/64
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 55eeacf59241..64fbd211d907 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -75,7 +75,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
index 5f20d289ee43..10e594c55117 100755
--- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -71,7 +71,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index e3bd8a6bb8b4..bde11dc27873 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -72,7 +72,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755
index 000000000000..e4b04cd1644a
--- /dev/null
+++ b/tools/testing/selftests/net/icmp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+ +--------------------+
+# | ns1 | v4-via-v6 routes: | ns2 |
+# | | ' | |
+# | +--------+ -> 172.16.1.0/24 -> +--------+ |
+# | | veth0 +--------------------------+ veth0 | |
+# | +--------+ <- 172.16.0.0/24 <- +--------+ |
+# | 172.16.0.1 | | 2001:db8:1::2/64 |
+# | 2001:db8:1::2/64 | | |
+# +----------------------+ +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+NS1=ns1
+NS2=ns2
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+ rm -f "$TMPFILE"
+ ip netns del $NS1
+ ip netns del $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS1
+ip netns add $NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+ echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index bf361f30d6ef..c19ecc6a8614 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -63,10 +63,14 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local xfail=$4
if [ ${rc} -eq ${expected} ]; then
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
+ elif [ ${rc} -eq ${xfail} ]; then
+ printf "TEST: %-60s [XFAIL]\n" "${msg}"
+ nxfail=$((nxfail+1))
else
ret=1
nfail=$((nfail+1))
@@ -322,7 +326,7 @@ check_exception()
ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
grep -v "mtu" | grep -q "${R1_LLADDR}"
fi
- log_test $? 0 "IPv6: ${desc}"
+ log_test $? 0 "IPv6: ${desc}" 1
}
run_ping()
@@ -488,6 +492,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
ret=0
nsuccess=0
nfail=0
+nxfail=0
while getopts :pv o
do
@@ -532,5 +537,6 @@ fi
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+printf "Tests xfailed: %3d\n" ${nxfail}
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d88e1fdfb147..89c4753c2760 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -6,6 +6,7 @@
#include <limits.h>
#include <fcntl.h>
#include <string.h>
+#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -25,6 +26,7 @@
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <linux/time_types.h>
extern int optind;
@@ -66,6 +68,13 @@ static unsigned int cfg_do_w;
static int cfg_wait;
static uint32_t cfg_mark;
+struct cfg_cmsg_types {
+ unsigned int cmsg_enabled:1;
+ unsigned int timestampns:1;
+};
+
+static struct cfg_cmsg_types cfg_cmsg_types;
+
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
@@ -80,11 +89,22 @@ static void die_usage(void)
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
+ fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
}
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
static void handle_signal(int nr)
{
quit = true;
@@ -338,6 +358,58 @@ static size_t do_write(const int fd, char *buf, const size_t len)
return offset;
}
+static void process_cmsg(struct msghdr *msgh)
+{
+ struct __kernel_timespec ts;
+ bool ts_found = false;
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
+ memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
+ ts_found = true;
+ continue;
+ }
+ }
+
+ if (cfg_cmsg_types.timestampns) {
+ if (!ts_found)
+ xerror("TIMESTAMPNS not present\n");
+ }
+}
+
+static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
+{
+ char msg_buf[8192];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_buf,
+ .msg_controllen = sizeof(msg_buf),
+ };
+ int flags = 0;
+ int ret = recvmsg(fd, &msg, flags);
+
+ if (ret <= 0)
+ return ret;
+
+ if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
+ xerror("got %lu bytes of cmsg data, expected 0\n",
+ (unsigned long)msg.msg_controllen);
+
+ if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
+ xerror("%s\n", "got no cmsg data");
+
+ if (msg.msg_controllen)
+ process_cmsg(&msg);
+
+ return ret;
+}
+
static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
{
int ret = 0;
@@ -357,6 +429,8 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
} else if (cfg_peek == CFG_AFTER_PEEK) {
ret = recv(fd, buf, cap, MSG_PEEK);
ret = (ret < 0) ? ret : read(fd, buf, cap);
+ } else if (cfg_cmsg_types.cmsg_enabled) {
+ ret = do_recvmsg_cmsg(fd, buf, cap);
} else {
ret = read(fd, buf, cap);
}
@@ -786,6 +860,48 @@ static void init_rng(void)
srand(foo);
}
+static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
+{
+ int err;
+
+ err = setsockopt(fd, level, optname, optval, optlen);
+ if (err) {
+ perror("setsockopt");
+ exit(1);
+ }
+}
+
+static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
+{
+ static const unsigned int on = 1;
+
+ if (cmsg->timestampns)
+ xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+}
+
+static void parse_cmsg_types(const char *type)
+{
+ char *next = strchr(type, ',');
+ unsigned int len = 0;
+
+ cfg_cmsg_types.cmsg_enabled = 1;
+
+ if (next) {
+ parse_cmsg_types(next + 1);
+ len = next - type;
+ } else {
+ len = strlen(type);
+ }
+
+ if (strncmp(type, "TIMESTAMPNS", len) == 0) {
+ cfg_cmsg_types.timestampns = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized cmsg option %s\n", type);
+ exit(1);
+}
+
int main_loop(void)
{
int fd;
@@ -801,6 +917,8 @@ int main_loop(void)
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return copyfd_io(0, fd, 1);
}
@@ -887,7 +1005,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) {
+ while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -943,6 +1061,9 @@ static void parse_opts(int argc, char **argv)
case 'P':
cfg_peek = parse_peek(optarg);
break;
+ case 'c':
+ parse_cmsg_types(optarg);
+ break;
}
}
@@ -976,6 +1097,8 @@ int main(int argc, char *argv[])
set_sndbuf(fd, cfg_sndbuf);
if (cfg_mark)
set_mark(fd, cfg_mark);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return main_loop_s(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 3c4cb72ed8a4..559173a8e387 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:f:t"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
@@ -22,6 +22,7 @@ sndbuf=0
rcvbuf=0
options_log=true
do_tcp=0
+checksum=false
filesize=0
if [ $tc_loss -eq 100 ];then
@@ -47,6 +48,7 @@ usage() {
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+ echo -e "\t-C: enable the MPTCP data checksum"
}
while getopts "$optstring" option;do
@@ -104,6 +106,9 @@ while getopts "$optstring" option;do
"t")
do_tcp=$((do_tcp+1))
;;
+ "C")
+ checksum=true
+ ;;
"?")
usage $0
exit 1
@@ -197,8 +202,11 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+if $checksum; then
+ for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+ done
+fi
set_ethtool_flags() {
local ns="$1"
@@ -501,6 +509,7 @@ do_transfer()
local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -518,10 +527,14 @@ do_transfer()
"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
- printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
- "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
- rets=1
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ooo_now} -eq 0 ]; then
+ printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
+ "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+ rets=1
+ else
+ printf "[ Note ] fallback due to TCP OoO"
+ fi
fi
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
@@ -667,6 +680,25 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
+display_time()
+{
+ time_end=$(date +%s)
+ time_run=$((time_end-time_start))
+
+ echo "Time: ${time_run} seconds"
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ display_time
+ exit ${ret}
+ fi
+}
+
make_file "$cin" "client"
make_file "$sin" "server"
@@ -674,6 +706,8 @@ check_mptcp_disabled
check_mptcp_ulp_setsockopt
+stop_if_error "The kernel configuration is not valid for MPTCP"
+
echo "INFO: validating network environment with pings"
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns1" $sender 10.0.1.1
@@ -693,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
+stop_if_error "Could not even run ping tests"
+
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
@@ -720,18 +756,24 @@ echo "on ns3eth4"
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
+run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
+stop_if_error "Could not even run loopback test"
+
+run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
+stop_if_error "Could not even run loopback v6 test"
+
for sender in $ns1 $ns2 $ns3 $ns4;do
- run_tests_lo "$ns1" "$sender" 10.0.1.1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback test" 1>&2
- exit $ret
- fi
- run_tests_lo "$ns1" $sender dead:beef:1::1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback v6 test" 2>&1
- exit $ret
+ # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+ # mptcp syncookie support.
+ if [ $sender = $ns1 ]; then
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+ else
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
fi
+ run_tests "$ns1" $sender 10.0.1.1
+ run_tests "$ns1" $sender dead:beef:1::1
+
run_tests "$ns2" $sender 10.0.1.2
run_tests "$ns2" $sender dead:beef:1::2
run_tests "$ns2" $sender 10.0.2.1
@@ -744,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests "$ns4" $sender 10.0.3.1
run_tests "$ns4" $sender dead:beef:3::1
+
+ stop_if_error "Tests with $sender as a sender have failed"
done
run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
+stop_if_error "Tests with peek mode have failed"
-time_end=$(date +%s)
-time_run=$((time_end-time_start))
-
-echo "Time: ${time_run} seconds"
-
+display_time
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fd99485cf2a4..9a191c1a5de8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -12,6 +12,7 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
+checksum=0
do_all_tests=1
TEST_COUNT=0
@@ -49,6 +50,9 @@ init()
ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ if [ $checksum -eq 1 ]; then
+ ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+ fi
done
# ns1 ns2
@@ -124,6 +128,28 @@ reset_with_add_addr_timeout()
-j DROP
}
+reset_with_checksum()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+}
+
+reset_with_allow_join_id0()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+}
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -476,6 +502,45 @@ run_tests()
fi
}
+chk_csum_nr()
+{
+ local msg=${1:-""}
+ local count
+ local dump_stats
+
+ if [ ! -z "$msg" ]; then
+ printf "%02u" "$TEST_COUNT"
+ else
+ echo -n " "
+ fi
+ printf " %-36s %s" "$msg" "sum"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+ echo -n " - csum "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -523,6 +588,9 @@ chk_join_nr()
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
+ if [ $checksum -eq 1 ]; then
+ chk_csum_nr
+ fi
}
chk_add_nr()
@@ -1374,6 +1442,94 @@ syncookies_tests()
chk_add_nr 1 1
}
+checksum_tests()
+{
+ # checksum test 0 0
+ reset_with_checksum 0 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 0"
+
+ # checksum test 1 1
+ reset_with_checksum 1 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 1"
+
+ # checksum test 0 1
+ reset_with_checksum 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 1"
+
+ # checksum test 1 0
+ reset_with_checksum 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 0"
+}
+
+deny_join_id0_tests()
+{
+ # subflow allow join id0 ns1
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+
+ # subflow allow join id0 ns2
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+
+ # signal address allow join id0 ns1
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal address allow join id0 ns1" 1 1 1
+ chk_add_nr 1 1
+
+ # signal address allow join id0 ns2
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal address allow join id0 ns2" 1 1 1
+ chk_add_nr 1 1
+
+ # subflow and address allow join id0 ns1
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "subflow and address allow join id0 1" 2 2 2
+
+ # subflow and address allow join id0 ns2
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "subflow and address allow join id0 2" 1 1 1
+}
+
all_tests()
{
subflows_tests
@@ -1387,6 +1543,8 @@ all_tests()
backup_tests
add_addr_ports_tests
syncookies_tests
+ checksum_tests
+ deny_join_id0_tests
}
usage()
@@ -1403,7 +1561,10 @@ usage()
echo " -b backup_tests"
echo " -p add_addr_ports_tests"
echo " -k syncookies_tests"
+ echo " -S checksum_tests"
+ echo " -d deny_join_id0_tests"
echo " -c capture pcap files"
+ echo " -C enable data checksum"
echo " -h help"
}
@@ -1418,13 +1579,16 @@ make_file "$sin" "server" 1
trap cleanup EXIT
for arg in "$@"; do
- # check for "capture" arg before launching tests
+ # check for "capture/checksum" args before launching tests
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
capture=1
fi
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
+ checksum=1
+ fi
- # exception for the capture option, the rest means: a part of the tests
- if [ "${arg}" != "-c" ]; then
+ # exception for the capture/checksum options, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
do_all_tests=0
fi
done
@@ -1434,7 +1598,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkch' opt; do
+while getopts 'fsltra64bpkdchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1469,8 +1633,16 @@ while getopts 'fsltra64bpkch' opt; do
k)
syncookies_tests
;;
+ S)
+ checksum_tests
+ ;;
+ d)
+ deny_join_id0_tests
+ ;;
c)
;;
+ C)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 2fa13946ac04..1579e471a5e7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -178,7 +178,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \
${local_addr} < "$sin" > "$sout" &
spid=$!
@@ -186,7 +186,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \
$connect_addr < "$cin" > "$cout" &
cpid=$!
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 3aeef3bcb101..fd63ebfe9a2b 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -60,6 +60,8 @@ setup()
for i in "$ns1" "$ns2" "$ns3";do
ip netns add $i || exit $ksft_skip
ip -net $i link set lo up
+ ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
done
ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
@@ -80,7 +82,6 @@ setup()
ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
- ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c
new file mode 100644
index 000000000000..b39e87e967cd
--- /dev/null
+++ b/tools/testing/selftests/net/so_netns_cookie.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ uint64_t cookie1, cookie2;
+ socklen_t vallen;
+ int sock1, sock2;
+
+ sock1 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock1 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie1);
+ if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie1)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (unshare(CLONE_NEWNET))
+ return pr_err("unshare");
+
+ sock2 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock2 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie2);
+ if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie2)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (cookie1 == cookie2)
+ return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns");
+
+ close(sock1);
+ close(sock2);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..75ada17ac061
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT46 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+
+ ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 426d07875a48..112d41d01b12 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,6 +25,47 @@
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
+struct tls_crypto_info_keys {
+ union {
+ struct tls12_crypto_info_aes_gcm_128 aes128;
+ struct tls12_crypto_info_chacha20_poly1305 chacha20;
+ };
+ size_t len;
+};
+
+static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
+ struct tls_crypto_info_keys *tls12)
+{
+ memset(tls12, 0, sizeof(*tls12));
+
+ switch (cipher_type) {
+ case TLS_CIPHER_CHACHA20_POLY1305:
+ tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305);
+ tls12->chacha20.info.version = tls_version;
+ tls12->chacha20.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_GCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128);
+ tls12->aes128.info.version = tls_version;
+ tls12->aes128.info.cipher_type = cipher_type;
+ break;
+ default:
+ break;
+ }
+}
+
+static void memrnd(void *s, size_t n)
+{
+ int *dword = s;
+ char *byte;
+
+ for (; n >= 4; n -= 4)
+ *dword++ = rand();
+ byte = (void *)dword;
+ while (n--)
+ *byte++ = rand();
+}
+
FIXTURE(tls_basic)
{
int fd, cfd;
@@ -133,33 +174,16 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha)
FIXTURE_SETUP(tls)
{
- union {
- struct tls12_crypto_info_aes_gcm_128 aes128;
- struct tls12_crypto_info_chacha20_poly1305 chacha20;
- } tls12;
+ struct tls_crypto_info_keys tls12;
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
- size_t tls12_sz;
self->notls = false;
len = sizeof(addr);
- memset(&tls12, 0, sizeof(tls12));
- switch (variant->cipher_type) {
- case TLS_CIPHER_CHACHA20_POLY1305:
- tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305);
- tls12.chacha20.info.version = variant->tls_version;
- tls12.chacha20.info.cipher_type = variant->cipher_type;
- break;
- case TLS_CIPHER_AES_GCM_128:
- tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128);
- tls12.aes128.info.version = variant->tls_version;
- tls12.aes128.info.cipher_type = variant->cipher_type;
- break;
- default:
- tls12_sz = 0;
- }
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -187,7 +211,7 @@ FIXTURE_SETUP(tls)
if (!self->notls) {
ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
- tls12_sz);
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -200,7 +224,7 @@ FIXTURE_SETUP(tls)
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
- tls12_sz);
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -308,6 +332,8 @@ TEST_F(tls, recv_max)
char recv_mem[TLS_PAYLOAD_MAX_LEN];
char buf[TLS_PAYLOAD_MAX_LEN];
+ memrnd(buf, sizeof(buf));
+
EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
@@ -588,6 +614,8 @@ TEST_F(tls, recvmsg_single_max)
struct iovec vec;
struct msghdr hdr;
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
vec.iov_base = (char *)recv_mem;
vec.iov_len = TLS_PAYLOAD_MAX_LEN;
@@ -610,6 +638,8 @@ TEST_F(tls, recvmsg_multiple)
struct msghdr hdr;
int i;
+ memrnd(buf, sizeof(buf));
+
EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
for (i = 0; i < msg_iovlen; i++) {
iov_base[i] = (char *)malloc(iov_len);
@@ -634,6 +664,8 @@ TEST_F(tls, single_send_multiple_recv)
char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -834,18 +866,17 @@ TEST_F(tls, bidir)
int ret;
if (!self->notls) {
- struct tls12_crypto_info_aes_gcm_128 tls12;
+ struct tls_crypto_info_keys tls12;
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = variant->tls_version;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
}
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index a8fa64136282..7f26591f236b 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
readonly BASE="ns-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index dbf0421986df..66354cdd5ce4 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -189,6 +189,15 @@ segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255
route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
#
+# Test support for lowest address ending in .0
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)"
+#
+# Test support for lowest address not ending in .0
+segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)"
+#
+# Routing using lowest address as a gateway/endpoint
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address"
+#
# ==============================================
# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
# ==============================================
@@ -202,14 +211,6 @@ segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forb
# Currently Linux does not allow this, so this should fail too
segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
#
-# Test support for lowest address
-# Currently Linux does not allow this, so this should fail too
-segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)"
-#
-# Routing using lowest address as a gateway/endpoint
-# Currently Linux does not allow this, so this should fail too
-route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)"
-#
# Test support for unicast use of class D
# Currently Linux does not allow this, so this should fail too
segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 2fedc0781ce8..11d7cdb898c0 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -18,7 +18,8 @@ ret=0
cleanup() {
local ns
- local -r jobs="$(jobs -p)"
+ local jobs
+ readonly jobs="$(jobs -p)"
[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
rm -f $STATS
@@ -108,7 +109,7 @@ chk_gro() {
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
- exit -1
+ exit 1
fi
create_ns
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3171069a6b46..cd6430b39982 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755
index 000000000000..6caf6ac8c285
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+ ip netns del ${nsrouter}
+
+ [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ dmesg | grep -q ' nft_rpfilter: '
+ if [ $? -eq 0 ]; then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+ ret=$?
+
+ if [ $ret -ne 0 ];then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec ${ns} nft list table inet filter
+ return 1
+ fi
+
+ if [ $want -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+exit 0
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 381d874cce99..d7ec1e7da0d0 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -155,7 +155,7 @@ struct flag_test {
int err;
};
-#define NUM_OPENAT2_FLAG_TESTS 24
+#define NUM_OPENAT2_FLAG_TESTS 25
void test_openat2_flags(void)
{
@@ -229,6 +229,11 @@ void test_openat2_flags(void)
{ .name = "invalid how.resolve and O_PATH",
.how.flags = O_PATH,
.how.resolve = 0x1337, .err = -EINVAL },
+
+ /* currently unknown upper 32 bit rejected. */
+ { .name = "currently unknown bit (1 << 63)",
+ .how.flags = O_RDONLY | (1ULL << 63),
+ .how.resolve = 0, .err = -EINVAL },
};
BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
index 78ddf5e11625..8e83cf91513a 100644
--- a/tools/testing/selftests/perf_events/sigtrap_threads.c
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -43,7 +43,7 @@ static struct {
siginfo_t first_siginfo; /* First observed siginfo_t. */
} ctx;
-/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */
+/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
#define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
@@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event)
EXPECT_EQ(ctx.signal_count, NUM_THREADS);
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
- EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
/* Check enabled for parent. */
ctx.iterate_on = 0;
@@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event)
EXPECT_EQ(ctx.signal_count, NUM_THREADS);
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
- EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
/* Check enabled for parent. */
ctx.iterate_on = 0;
@@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress)
EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
- EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index bed4b5318a86..8f3e72e626fa 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -10,6 +10,7 @@
/proc-self-map-files-002
/proc-self-syscall
/proc-self-wchan
+/proc-subset-pid
/proc-uptime-001
/proc-uptime-002
/read
diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore
new file mode 100644
index 000000000000..091021f255b3
--- /dev/null
+++ b/tools/testing/selftests/rlimits/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rlimits-per-userns
diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile
new file mode 100644
index 000000000000..03aadb406212
--- /dev/null
+++ b/tools/testing/selftests/rlimits/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g
+TEST_GEN_PROGS := rlimits-per-userns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/rlimits/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
new file mode 100644
index 000000000000..26dc949e93ea
--- /dev/null
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+
+#define NR_CHILDS 2
+
+static char *service_prog;
+static uid_t user = 60000;
+static uid_t group = 60000;
+
+static void setrlimit_nproc(rlim_t n)
+{
+ pid_t pid = getpid();
+ struct rlimit limit = {
+ .rlim_cur = n,
+ .rlim_max = n
+ };
+
+ warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+
+ if (setrlimit(RLIMIT_NPROC, &limit) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid = fork();
+
+ if (pid < 0)
+ err(EXIT_FAILURE, "fork");
+
+ if (pid > 0)
+ return pid;
+
+ pid = getpid();
+
+ warnx("(pid=%d): New process starting ...", pid);
+
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+ err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid);
+
+ signal(SIGUSR1, SIG_DFL);
+
+ warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
+
+ if (setgid(group) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group);
+ if (setuid(user) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user);
+
+ warnx("(pid=%d): Service running ...", pid);
+
+ warnx("(pid=%d): Unshare user namespace", pid);
+ if (unshare(CLONE_NEWUSER) < 0)
+ err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+
+ char *const argv[] = { "service", NULL };
+ char *const envp[] = { "I_AM_SERVICE=1", NULL };
+
+ warnx("(pid=%d): Executing real service ...", pid);
+
+ execve(service_prog, argv, envp);
+ err(EXIT_FAILURE, "(pid=%d): execve", pid);
+}
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ pid_t child[NR_CHILDS];
+ int wstatus[NR_CHILDS];
+ int childs = NR_CHILDS;
+ pid_t pid;
+
+ if (getenv("I_AM_SERVICE")) {
+ pause();
+ exit(EXIT_SUCCESS);
+ }
+
+ service_prog = argv[0];
+ pid = getpid();
+
+ warnx("(pid=%d) Starting testcase", pid);
+
+ /*
+ * This rlimit is not a problem for root because it can be exceeded.
+ */
+ setrlimit_nproc(1);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ child[i] = fork_child();
+ wstatus[i] = 0;
+ usleep(250000);
+ }
+
+ while (1) {
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+
+ errno = 0;
+ pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG);
+
+ if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i])))
+ continue;
+
+ if (ret < 0 && errno != ECHILD)
+ warn("(pid=%d): waitpid(%d)", pid, child[i]);
+
+ child[i] *= -1;
+ childs -= 1;
+ }
+
+ if (!childs)
+ break;
+
+ usleep(250000);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+ kill(child[i], SIGUSR1);
+ }
+ }
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (WIFEXITED(wstatus[i]))
+ warnx("(pid=%d): pid %d exited, status=%d",
+ pid, -child[i], WEXITSTATUS(wstatus[i]));
+ else if (WIFSIGNALED(wstatus[i]))
+ warnx("(pid=%d): pid %d killed by signal %d",
+ pid, -child[i], WTERMSIG(wstatus[i]));
+
+ if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1)
+ continue;
+
+ warnx("(pid=%d): Test failed", pid);
+ exit(EXIT_FAILURE);
+ }
+
+ warnx("(pid=%d): Test passed", pid);
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore
new file mode 100644
index 000000000000..6996d4654d92
--- /dev/null
+++ b/tools/testing/selftests/sched/.gitignore
@@ -0,0 +1 @@
+cs_prctl_test
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
new file mode 100644
index 000000000000..10c72f14fea9
--- /dev/null
+++ b/tools/testing/selftests/sched/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+CLANG_FLAGS += -no-integrated-as
+endif
+
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -Wl,-rpath=./ \
+ $(CLANG_FLAGS)
+LDLIBS += -lpthread
+
+TEST_GEN_FILES := cs_prctl_test
+TEST_PROGS := cs_prctl_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
new file mode 100644
index 000000000000..e8b09aa7c0c4
--- /dev/null
+++ b/tools/testing/selftests/sched/config
@@ -0,0 +1 @@
+CONFIG_SCHED_DEBUG=y
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
new file mode 100644
index 000000000000..63fe6521c56d
--- /dev/null
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Use the core scheduling prctl() to test core scheduling cookies control.
+ *
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ * Author: Chris Hyser <chris.hyser@oracle.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#define _GNU_SOURCE
+#include <sys/eventfd.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+#endif
+
+#define MAX_PROCESSES 128
+#define MAX_THREADS 128
+
+static const char USAGE[] = "cs_prctl_test [options]\n"
+" options:\n"
+" -P : number of processes to create.\n"
+" -T : number of threads per process to create.\n"
+" -d : delay time to keep tasks alive.\n"
+" -k : keep tasks alive until keypress.\n";
+
+enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
+
+const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
+
+static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ int res;
+
+ res = prctl(option, arg2, arg3, arg4, arg5);
+ printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3,
+ (long)arg4, arg5);
+ return res;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
+static void __handle_error(char *fn, int ln, char *msg)
+{
+ printf("(%s:%d) - ", fn, ln);
+ perror(msg);
+ exit(EXIT_FAILURE);
+}
+
+static void handle_usage(int rc, char *msg)
+{
+ puts(USAGE);
+ puts(msg);
+ putchar('\n');
+ exit(rc);
+}
+
+static unsigned long get_cs_cookie(int pid)
+{
+ unsigned long long cookie;
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID,
+ (unsigned long)&cookie);
+ if (ret) {
+ printf("Not a core sched system\n");
+ return -1UL;
+ }
+
+ return cookie;
+}
+
+struct child_args {
+ int num_threads;
+ int pfd[2];
+ int cpid;
+ int thr_tids[MAX_THREADS];
+};
+
+static int child_func_thread(void __attribute__((unused))*arg)
+{
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static void create_threads(int num_threads, int thr_tids[])
+{
+ void *child_stack;
+ pid_t tid;
+ int i;
+
+ for (i = 0; i < num_threads; ++i) {
+ child_stack = malloc(STACK_SIZE);
+ if (!child_stack)
+ handle_error("child stack allocate");
+
+ tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL);
+ if (tid == -1)
+ handle_error("clone thread");
+ thr_tids[i] = tid;
+ }
+}
+
+static int child_func_process(void *arg)
+{
+ struct child_args *ca = (struct child_args *)arg;
+
+ close(ca->pfd[0]);
+
+ create_threads(ca->num_threads, ca->thr_tids);
+
+ write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads);
+ close(ca->pfd[1]);
+
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static unsigned char child_func_process_stack[STACK_SIZE];
+
+void create_processes(int num_processes, int num_threads, struct child_args proc[])
+{
+ pid_t cpid;
+ int i;
+
+ for (i = 0; i < num_processes; ++i) {
+ proc[i].num_threads = num_threads;
+
+ if (pipe(proc[i].pfd) == -1)
+ handle_error("pipe() failed");
+
+ cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE,
+ SIGCHLD, &proc[i]);
+ proc[i].cpid = cpid;
+ close(proc[i].pfd[1]);
+ }
+
+ for (i = 0; i < num_processes; ++i) {
+ read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads);
+ close(proc[i].pfd[0]);
+ }
+}
+
+void disp_processes(int num_processes, struct child_args proc[])
+{
+ int i, j;
+
+ printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0),
+ get_cs_cookie(getpid()));
+
+ for (i = 0; i < num_processes; ++i) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid,
+ getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid));
+ for (j = 0; j < proc[i].num_threads; ++j) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j],
+ proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j]));
+ }
+ }
+ puts("\n");
+}
+
+static int errors;
+
+#define validate(v) _validate(__LINE__, v, #v)
+void _validate(int line, int val, char *msg)
+{
+ if (!val) {
+ ++errors;
+ printf("(%d) FAILED: %s\n", line, msg);
+ } else {
+ printf("(%d) PASSED: %s\n", line, msg);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct child_args procs[MAX_PROCESSES];
+
+ int keypress = 0;
+ int num_processes = 2;
+ int num_threads = 3;
+ int delay = 0;
+ int res = 0;
+ int pidx;
+ int pid;
+ int opt;
+
+ while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) {
+ switch (opt) {
+ case 'P':
+ num_processes = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'T':
+ num_threads = (int)strtoul(optarg, NULL, 10);
+ break;
+ case 'd':
+ delay = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'k':
+ keypress = 1;
+ break;
+ case 'h':
+ printf(USAGE);
+ exit(EXIT_SUCCESS);
+ default:
+ handle_usage(20, "unknown option");
+ }
+ }
+
+ if (num_processes < 1 || num_processes > MAX_PROCESSES)
+ handle_usage(1, "Bad processes value");
+
+ if (num_threads < 1 || num_threads > MAX_THREADS)
+ handle_usage(2, "Bad thread value");
+
+ if (keypress)
+ delay = -1;
+
+ srand(time(NULL));
+
+ /* put into separate process group */
+ if (setpgid(0, 0) != 0)
+ handle_error("process group");
+
+ printf("\n## Create a thread/process/process group hiearchy\n");
+ create_processes(num_processes, num_threads, procs);
+ disp_processes(num_processes, procs);
+ validate(get_cs_cookie(0) == 0);
+
+ printf("\n## Set a cookie on entire process group\n");
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched create failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != 0);
+
+ /* get a random process pid */
+ pidx = rand() % num_processes;
+ pid = procs[pidx].cpid;
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0)
+ handle_error("core_sched create failed -- TGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n",
+ getpid(), pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share to itself failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n",
+ procs[pidx].thr_tids[0], getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0],
+ PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share from thread failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched share to self failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ if (errors) {
+ printf("TESTS FAILED. errors: %d\n", errors);
+ res = 10;
+ } else {
+ printf("SUCCESS !!!\n");
+ }
+
+ if (keypress)
+ getchar();
+ else
+ sleep(delay);
+
+ for (pidx = 0; pidx < num_processes; ++pidx)
+ kill(procs[pidx].cpid, 15);
+
+ return res;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index fcc806585266..6e5102a7d7c9 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -143,9 +143,15 @@ int main(int argc, char *argv[])
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
unsigned long long entry, per_filter1, per_filter2;
+ setbuf(stdout, NULL);
+
+ printf("Running on:\n");
+ system("uname -a");
+
printf("Current BPF sysctl settings:\n");
- system("sysctl net.core.bpf_jit_enable");
- system("sysctl net.core.bpf_jit_harden");
+ /* Avoid using "sysctl" which may not be installed. */
+ system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+ system("grep -H . /proc/sys/net/core/bpf_jit_harden");
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 98c3b647f54d..1d64891e6492 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
};
#endif
+#ifndef SECCOMP_ADDFD_FLAG_SEND
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
+#endif
+
struct seccomp_notif_addfd_small {
__u64 id;
char weird[4];
@@ -1753,16 +1757,25 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define SYSCALL_RET_SET(_regs, _val) \
do { \
typeof(_val) _result = (_val); \
- /* \
- * A syscall error is signaled by CR0 SO bit \
- * and the code is stored as a positive value. \
- */ \
- if (_result < 0) { \
- SYSCALL_RET(_regs) = -_result; \
- (_regs).ccr |= 0x10000000; \
- } else { \
+ if ((_regs.trap & 0xfff0) == 0x3000) { \
+ /* \
+ * scv 0 system call uses -ve result \
+ * for error, so no need to adjust. \
+ */ \
SYSCALL_RET(_regs) = _result; \
- (_regs).ccr &= ~0x10000000; \
+ } else { \
+ /* \
+ * A syscall error is signaled by the \
+ * CR0 SO bit and the code is stored as \
+ * a positive value. \
+ */ \
+ if (_result < 0) { \
+ SYSCALL_RET(_regs) = -_result; \
+ (_regs).ccr |= 0x10000000; \
+ } else { \
+ SYSCALL_RET(_regs) = _result; \
+ (_regs).ccr &= ~0x10000000; \
+ } \
} \
} while (0)
# define SYSCALL_RET_SET_ON_PTRACE_EXIT
@@ -3950,7 +3963,7 @@ TEST(user_notification_addfd)
{
pid_t pid;
long ret;
- int status, listener, memfd, fd;
+ int status, listener, memfd, fd, nextfd;
struct seccomp_notif_addfd addfd = {};
struct seccomp_notif_addfd_small small = {};
struct seccomp_notif_addfd_big big = {};
@@ -3959,25 +3972,34 @@ TEST(user_notification_addfd)
/* 100 ms */
struct timespec delay = { .tv_nsec = 100000000 };
+ /* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
+ nextfd = memfd + 1;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ /* fd: 4 */
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
- ASSERT_GE(listener, 0);
+ ASSERT_EQ(listener, nextfd++);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
+ /* fds will be added and this value is expected */
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
exit(1);
+
+ /* Atomic addfd+send is received here. Check it is a valid fd */
+ if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
+ exit(1);
+
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
}
@@ -4019,14 +4041,14 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@@ -4056,6 +4078,32 @@ TEST(user_notification_addfd)
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(addfd.id, req.id);
+ /* Verify we can do an atomic addfd and send */
+ addfd.newfd = 0;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * Child has earlier "low" fds and now 42, so we expect the next
+ * lowest available fd to be assigned here.
+ */
+ EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
@@ -4116,6 +4164,10 @@ TEST(user_notification_addfd_rlimit)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
addfd.newfd = 100;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
index 229ee185b27e..254136e3da5a 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin):
return
# Check for required fields
- scapyinfo = self.args.caseinfo['scapy']
- scapy_keys = ['iface', 'count', 'packet']
- missing_keys = []
- keyfail = False
- for k in scapy_keys:
- if k not in scapyinfo:
- keyfail = True
- missing_keys.add(k)
- if keyfail:
- print('{}: Scapy block present in the test, but is missing info:'
- .format(self.sub_class))
- print('{}'.format(missing_keys))
-
- pkt = eval(scapyinfo['packet'])
- if '$' in scapyinfo['iface']:
- tpl = Template(scapyinfo['iface'])
- scapyinfo['iface'] = tpl.safe_substitute(NAMES)
- for count in range(scapyinfo['count']):
- sendp(pkt, iface=scapyinfo['iface'])
+ lscapyinfo = self.args.caseinfo['scapy']
+ if type(lscapyinfo) != list:
+ lscapyinfo = [ lscapyinfo, ]
+
+ for scapyinfo in lscapyinfo:
+ scapy_keys = ['iface', 'count', 'packet']
+ missing_keys = []
+ keyfail = False
+ for k in scapy_keys:
+ if k not in scapyinfo:
+ keyfail = True
+ missing_keys.append(k)
+ if keyfail:
+ print('{}: Scapy block present in the test, but is missing info:'
+ .format(self.sub_class))
+ print('{}'.format(missing_keys))
+
+ pkt = eval(scapyinfo['packet'])
+ if '$' in scapyinfo['iface']:
+ tpl = Template(scapyinfo['iface'])
+ scapyinfo['iface'] = tpl.safe_substitute(NAMES)
+ for count in range(scapyinfo['count']):
+ sendp(pkt, iface=scapyinfo['iface'])
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 4202e95e27b9..bd843ab00a58 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -406,5 +406,50 @@
"teardown": [
"$TC actions flush action ct"
]
+ },
+ {
+ "id": "3992",
+ "name": "Add ct action triggering DNAT tuple conflict",
+ "category": [
+ "actions",
+ "ct",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ [
+ "$TC qdisc del dev $DEV1 ingress",
+ 0,
+ 1,
+ 2,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)"
+ },
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "cat /proc/net/nf_conntrack",
+ "matchPattern": "dst=10.0.0.20",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 41d783254b08..2aad4caa8581 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -446,6 +446,30 @@
"teardown": []
},
{
+ "id": "ba5b",
+ "name": "Add vlan modify action for protocol 802.1Q setting priority 0",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 100",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
"id": "6812",
"name": "Add vlan modify action for protocol 802.1Q",
"category": [
@@ -463,7 +487,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 100",
- "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref",
"matchCount": "0",
"teardown": [
"$TC actions flush action vlan"
@@ -487,7 +511,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 12",
- "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action vlan"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index 1cda2e11b3ad..773c5027553d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -9,11 +9,11 @@
"setup": [
"$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
- "expExitCode": "2",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536",
+ "expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
- "matchPattern": "qdisc",
- "matchCount": "0",
+ "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536",
+ "matchCount": "1",
"teardown": [
"$IP link del dev $DUMMY"
]
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1e662d59c502..fe043f67798b 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -6,6 +6,8 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <pthread.h>
+#include <assert.h>
#include "../../../../mm/gup_test.h"
#define MB (1UL << 20)
@@ -15,6 +17,12 @@
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
static char *cmd_to_str(unsigned long cmd)
{
switch (cmd) {
@@ -34,17 +42,55 @@ static char *cmd_to_str(unsigned long cmd)
return "Unknown command";
}
+void *gup_thread(void *data)
+{
+ struct gup_test gup = *(struct gup_test *)data;
+ int i;
+
+ /* Only report timing information on the *_BENCHMARK commands: */
+ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+ (cmd == PIN_LONGTERM_BENCHMARK)) {
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup))
+ perror("ioctl"), exit(1);
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
+ if (gup.size != size)
+ printf(", truncated (size: %lld)", gup.size);
+ printf("\n");
+ pthread_mutex_unlock(&print_mutex);
+ }
+ } else {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: done\n", cmd_to_str(cmd));
+ if (gup.size != size)
+ printf("Truncated (size: %lld)\n", gup.size);
+ pthread_mutex_unlock(&print_mutex);
+ }
+
+ return NULL;
+}
+
int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
- unsigned long size = 128 * MB;
- int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1;
- unsigned long cmd = GUP_FAST_BENCHMARK;
+ int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
int flags = MAP_PRIVATE, touch = 0;
char *file = "/dev/zero";
+ pthread_t *tid;
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
switch (opt) {
case 'a':
cmd = PIN_FAST_BENCHMARK;
@@ -74,6 +120,9 @@ int main(int argc, char **argv)
/* strtol, so you can pass flags in hex form */
gup.gup_flags = strtol(optarg, 0, 0);
break;
+ case 'j':
+ nthreads = atoi(optarg);
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -154,8 +203,8 @@ int main(int argc, char **argv)
if (write)
gup.gup_flags |= FOLL_WRITE;
- fd = open("/sys/kernel/debug/gup_test", O_RDWR);
- if (fd == -1) {
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1) {
perror("open");
exit(1);
}
@@ -185,32 +234,17 @@ int main(int argc, char **argv)
p[0] = 0;
}
- /* Only report timing information on the *_BENCHMARK commands: */
- if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
- (cmd == PIN_LONGTERM_BENCHMARK)) {
- for (i = 0; i < repeats; i++) {
- gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
-
- printf("%s: Time: get:%lld put:%lld us",
- cmd_to_str(cmd), gup.get_delta_usec,
- gup.put_delta_usec);
- if (gup.size != size)
- printf(", truncated (size: %lld)", gup.size);
- printf("\n");
- }
- } else {
- gup.size = size;
- if (ioctl(fd, cmd, &gup)) {
- perror("ioctl");
- exit(1);
- }
-
- printf("%s: done\n", cmd_to_str(cmd));
- if (gup.size != size)
- printf("Truncated (size: %lld)\n", gup.size);
+ tid = malloc(sizeof(pthread_t) * nthreads);
+ assert(tid);
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_create(&tid[i], NULL, gup_thread, &gup);
+ assert(ret == 0);
+ }
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(tid[i], NULL);
+ assert(ret == 0);
}
+ free(tid);
return 0;
}
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 7ed7cd95e58f..ebc4ee0fe179 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0
ip1 -4 route add default dev wg0 table 51820
ip1 -4 rule add not fwmark 51820 table 51820
ip1 -4 rule add table main suppress_prefixlength 0
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
n1 ping -W 1 -c 100 -f 192.168.99.7
n1 ping -W 1 -c 100 -f abab::1111
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 4eecb432a66c..74db83a0aedd 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
-CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_NAT_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c..991591718bb0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
* Copyright (c) 2018 Andrew Lutomirski
*/
@@ -11,79 +13,470 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <sysexits.h>
-static int nerrs;
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
-#define X32_BIT 0x40000000UL
+#include <linux/ptrace.h>
-static void check_enosys(unsigned long nr, bool *ok)
+/* Common system call numbers */
+#define SYS_READ 0
+#define SYS_WRITE 1
+#define SYS_GETPID 39
+/* x64-only system call numbers */
+#define X64_IOCTL 16
+#define X64_READV 19
+#define X64_WRITEV 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL 514
+#define X32_READV 515
+#define X32_WRITEV 516
+
+#define X32_BIT 0x40000000
+
+static int nullfd = -1; /* File descriptor for /dev/null */
+static bool with_x32; /* x32 supported on this kernel? */
+
+enum ptrace_pass {
+ PTP_NOTHING,
+ PTP_GETREGS,
+ PTP_WRITEBACK,
+ PTP_FUZZRET,
+ PTP_FUZZHIGH,
+ PTP_INTNUM,
+ PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
{
- /* If this fails, a segfault is reasonably likely. */
- fflush(stdout);
+ [PTP_NOTHING] = "just stop, no data read",
+ [PTP_GETREGS] = "only getregs",
+ [PTP_WRITEBACK] = "getregs, unmodified setregs",
+ [PTP_FUZZRET] = "modifying the default return",
+ [PTP_FUZZHIGH] = "clobbering the top 32 bits",
+ [PTP_INTNUM] = "sign-extending the syscall number",
+};
- long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
- if (ret == 0) {
- printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
- *ok = false;
- } else if (errno != ENOSYS) {
- printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
- *ok = false;
- }
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+ unsigned int nerr; /* Total error count */
+ unsigned int indent; /* Message indentation level */
+ enum ptrace_pass ptrace_pass;
+ bool probing_syscall; /* In probe_syscall() */
+};
+static volatile struct shared *sh;
+
+static inline unsigned int offset(void)
+{
+ unsigned int level = sh ? sh->indent : 0;
+
+ return 8 + level * 4;
}
-static void test_x32_without_x32_bit(void)
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+ ## __VA_ARGS__)
+
+#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...) \
+ do { \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ sh->nerr++; \
+ } while (0)
+
+#define crit(fmt, ...) \
+ do { \
+ sh->indent = 0; \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ msg(SKIP, "Unable to run test\n"); \
+ exit(EX_OSERR); \
+ } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE -9999
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static long long probe_syscall(int msb, int lsb)
{
- bool ok = true;
+ register long long arg1 asm("rdi") = nullfd;
+ register long long arg2 asm("rsi") = 0;
+ register long long arg3 asm("rdx") = 0;
+ register long long arg4 asm("r10") = 0;
+ register long long arg5 asm("r8") = 0;
+ register long long arg6 asm("r9") = 0;
+ long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+ long long ret;
/*
- * Syscalls 512-547 are "x32" syscalls. They are intended to be
- * called with the x32 (0x40000000) bit set. Calling them without
- * the x32 bit set is nonsense and should not work.
+ * We pass in an extra copy of the extended system call number
+ * in %rbx, so we can examine it from the ptrace handler without
+ * worrying about it being possibly modified. This is to test
+ * the validity of struct user regs.orig_rax a.k.a.
+ * struct pt_regs.orig_ax.
*/
- printf("[RUN]\tChecking syscalls 512-547\n");
- for (int i = 512; i <= 547; i++)
- check_enosys(i, &ok);
+ sh->probing_syscall = true;
+ asm volatile("syscall"
+ : "=a" (ret)
+ : "a" (nr), "b" (nr),
+ "r" (arg1), "r" (arg2), "r" (arg3),
+ "r" (arg4), "r" (arg5), "r" (arg6)
+ : "rcx", "r11", "memory", "cc");
+ sh->probing_syscall = false;
+
+ return ret;
+}
+
+static const char *syscall_str(int msb, int start, int end)
+{
+ static char buf[64];
+ const char * const type = (start & X32_BIT) ? "x32" : "x64";
+ int lsb = start;
/*
- * Check that a handful of 64-bit-only syscalls are rejected if the x32
- * bit is set.
+ * Improve readability by stripping the x32 bit, but round
+ * toward zero so we don't display -1 as -1073741825.
*/
- printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
- check_enosys(16 | X32_BIT, &ok); /* ioctl */
- check_enosys(19 | X32_BIT, &ok); /* readv */
- check_enosys(20 | X32_BIT, &ok); /* writev */
+ if (lsb < 0)
+ lsb |= X32_BIT;
+ else
+ lsb &= ~X32_BIT;
+
+ if (start == end)
+ snprintf(buf, sizeof buf, "%s syscall %d:%d",
+ type, msb, lsb);
+ else
+ snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+ type, msb, lsb, lsb + (end-start));
+
+ return buf;
+}
+
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+ const char *expect_str)
+{
+ unsigned int err = 0;
+
+ sh->indent++;
+ if (start != end)
+ sh->indent++;
+
+ for (int nr = start; nr <= end; nr++) {
+ long long ret = probe_syscall(msb, nr);
+
+ if (ret != expect) {
+ fail("%s returned %lld, but it should have returned %s\n",
+ syscall_str(msb, nr, nr),
+ ret, expect_str);
+ err++;
+ }
+ }
+
+ if (start != end)
+ sh->indent--;
+
+ if (err) {
+ if (start != end)
+ fail("%s had %u failure%s\n",
+ syscall_str(msb, start, end),
+ err, err == 1 ? "s" : "");
+ } else {
+ ok("%s returned %s as expected\n",
+ syscall_str(msb, start, end), expect_str);
+ }
+
+ sh->indent--;
+
+ return err;
+}
+
+#define check_for(msb,start,end,expect) \
+ _check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+ return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+ return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+ long long ret;
+ pid_t mypid = getpid();
+
+ run("Checking for x32 by calling x32 getpid()\n");
+ ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+ sh->indent++;
+ if (ret == mypid) {
+ info("x32 is supported\n");
+ with_x32 = true;
+ } else if (ret == -ENOSYS) {
+ info("x32 is not supported\n");
+ with_x32 = false;
+ } else {
+ fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
+ with_x32 = false;
+ }
+ sh->indent--;
+ return with_x32;
+}
+
+static void test_syscalls_common(int msb)
+{
+ enum ptrace_pass pass = sh->ptrace_pass;
+
+ run("Checking some common syscalls as 64 bit\n");
+ check_zero(msb, SYS_READ);
+ check_zero(msb, SYS_WRITE);
+
+ run("Checking some 64-bit only syscalls as 64 bit\n");
+ check_zero(msb, X64_READV);
+ check_zero(msb, X64_WRITEV);
+
+ run("Checking out of range system calls\n");
+ check_for(msb, -64, -2, -ENOSYS);
+ if (pass >= PTP_FUZZRET)
+ check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+ else
+ check_for(msb, -1, -1, -ENOSYS);
+ check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+ check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+ check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+static void test_syscalls_with_x32(int msb)
+{
/*
- * Check some syscalls with high bits set.
+ * Syscalls 512-547 are "x32" syscalls. They are
+ * intended to be called with the x32 (0x40000000) bit
+ * set. Calling them without the x32 bit set is
+ * nonsense and should not work.
*/
- printf("[RUN]\tChecking numbers above 2^32-1\n");
- check_enosys((1UL << 32), &ok);
- check_enosys(X32_BIT | (1UL << 32), &ok);
+ run("Checking x32 syscalls as 64 bit\n");
+ check_for(msb, 512, 547, -ENOSYS);
- if (!ok)
- nerrs++;
- else
- printf("[OK]\tThey all returned -ENOSYS\n");
+ run("Checking some common syscalls as x32\n");
+ check_zero(msb, SYS_READ | X32_BIT);
+ check_zero(msb, SYS_WRITE | X32_BIT);
+
+ run("Checking some x32 syscalls as x32\n");
+ check_zero(msb, X32_READV | X32_BIT);
+ check_zero(msb, X32_WRITEV | X32_BIT);
+
+ run("Checking some 64-bit syscalls as x32\n");
+ check_enosys(msb, X64_IOCTL | X32_BIT);
+ check_enosys(msb, X64_READV | X32_BIT);
+ check_enosys(msb, X64_WRITEV | X32_BIT);
}
-int main()
+static void test_syscalls_without_x32(int msb)
{
+ run("Checking for absence of x32 system calls\n");
+ check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+ static const int msbs[] = {
+ 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+ INT_MIN, INT_MIN+1
+ };
+
+ sh->indent++;
+
/*
- * Anyone diagnosing a failure will want to know whether the kernel
- * supports x32. Tell them.
+ * The MSB is supposed to be ignored, so we loop over a few
+ * to test that out.
*/
- printf("\tChecking for x32...");
- fflush(stdout);
- if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
- printf(" supported\n");
- } else if (errno == ENOSYS) {
- printf(" not supported\n");
+ for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ int msb = msbs[i];
+ run("Checking system calls with msb = %d (0x%x)\n",
+ msb, msb);
+
+ sh->indent++;
+
+ test_syscalls_common(msb);
+ if (with_x32)
+ test_syscalls_with_x32(msb);
+ else
+ test_syscalls_without_x32(msb);
+
+ sh->indent--;
+ }
+
+ sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+ enum ptrace_pass pass;
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ crit("Failed to request tracing\n");
+ return;
+ }
+ raise(SIGSTOP);
+
+ for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+ sh->ptrace_pass = ++pass) {
+ run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+ test_syscall_numbering();
+ }
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+ struct user_regs_struct regs;
+
+ sh->probing_syscall = false; /* Do this on entry only */
+
+ /* For these, don't even getregs */
+ if (pass == PTP_NOTHING || pass == PTP_DONE)
+ return;
+
+ ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+ if (regs.orig_rax != regs.rbx) {
+ fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+ (unsigned long long)regs.orig_rax,
+ (unsigned long long)regs.rbx);
+ }
+
+ switch (pass) {
+ case PTP_GETREGS:
+ /* Just read, no writeback */
+ return;
+ case PTP_WRITEBACK:
+ /* Write back the same register state verbatim */
+ break;
+ case PTP_FUZZRET:
+ regs.rax = MODIFIED_BY_PTRACE;
+ break;
+ case PTP_FUZZHIGH:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+ break;
+ case PTP_INTNUM:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = (int)regs.orig_rax;
+ break;
+ default:
+ crit("invalid ptrace_pass\n");
+ break;
+ }
+
+ ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+ int wstatus;
+
+ do {
+ pid_t wpid = waitpid(testpid, &wstatus, 0);
+ if (wpid < 0 && errno != EINTR)
+ break;
+ if (wpid != testpid)
+ continue;
+ if (!WIFSTOPPED(wstatus))
+ break; /* Thread exited? */
+
+ if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+ mess_with_syscall(testpid, sh->ptrace_pass);
+ } while (sh->ptrace_pass != PTP_DONE &&
+ !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+ ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+ /* Wait for the child process to terminate */
+ while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+ /* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+ pid_t testpid;
+
+ /* Launch the test thread; this thread continues as the tracer thread */
+ testpid = fork();
+
+ if (testpid < 0) {
+ crit("Unable to launch tracer process\n");
+ } else if (testpid == 0) {
+ syscall_numbering_tracee();
+ _exit(0);
} else {
- printf(" confused\n");
+ syscall_numbering_tracer(testpid);
}
+}
- test_x32_without_x32_bit();
+int main(void)
+{
+ unsigned int nerr;
- return nerrs ? 1 : 0;
+ /*
+ * It is quite likely to get a segfault on a failure, so make
+ * sure the message gets out by setting stdout to nonbuffered.
+ */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /*
+ * Harmless file descriptor to work on...
+ */
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0) {
+ crit("Unable to open /dev/null: %s\n", strerror(errno));
+ }
+
+ /*
+ * Set up a block of shared memory...
+ */
+ sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+ if (sh == MAP_FAILED) {
+ crit("Unable to allocated shared memory block: %s\n",
+ strerror(errno));
+ }
+
+ with_x32 = test_x32();
+
+ run("Running tests without ptrace...\n");
+ test_syscall_numbering();
+
+ test_traced_syscall_numbering();
+
+ nerr = sh->nerr;
+ if (!nerr) {
+ ok("All system calls succeeded or failed as expected\n");
+ return 0;
+ } else {
+ fail("A total of %u system call%s had incorrect behavior\n",
+ nerr, nerr != 1 ? "s" : "");
+ return 1;
+ }
}
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 93cbd6f603f9..2acbb7703c6a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -84,7 +84,7 @@ void vsock_wait_remote_close(int fd)
}
/* Connect to <cid, port> and return the file descriptor. */
-int vsock_stream_connect(unsigned int cid, unsigned int port)
+static int vsock_connect(unsigned int cid, unsigned int port, int type)
{
union {
struct sockaddr sa;
@@ -101,7 +101,7 @@ int vsock_stream_connect(unsigned int cid, unsigned int port)
control_expectln("LISTENING");
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ fd = socket(AF_VSOCK, type, 0);
timeout_begin(TIMEOUT);
do {
@@ -120,11 +120,21 @@ int vsock_stream_connect(unsigned int cid, unsigned int port)
return fd;
}
+int vsock_stream_connect(unsigned int cid, unsigned int port)
+{
+ return vsock_connect(cid, port, SOCK_STREAM);
+}
+
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
+{
+ return vsock_connect(cid, port, SOCK_SEQPACKET);
+}
+
/* Listen on <cid, port> and return the first incoming connection. The remote
* address is stored to clientaddrp. clientaddrp may be NULL.
*/
-int vsock_stream_accept(unsigned int cid, unsigned int port,
- struct sockaddr_vm *clientaddrp)
+static int vsock_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp, int type)
{
union {
struct sockaddr sa;
@@ -145,7 +155,7 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
int client_fd;
int old_errno;
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ fd = socket(AF_VSOCK, type, 0);
if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
perror("bind");
@@ -189,6 +199,18 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
return client_fd;
}
+int vsock_stream_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp)
+{
+ return vsock_accept(cid, port, clientaddrp, SOCK_STREAM);
+}
+
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp)
+{
+ return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET);
+}
+
/* Transmit one byte and check the return value.
*
* expected_ret:
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e53dd09d26d9..a3375ad2fb7f 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -36,8 +36,11 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
int vsock_stream_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
void send_byte(int fd, int expected_ret, int flags);
void recv_byte(int fd, int expected_ret, int flags);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 5a4fb80fa832..67766bfe176f 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -14,6 +14,8 @@
#include <errno.h>
#include <unistd.h>
#include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/socket.h>
#include "timeout.h"
#include "control.h"
@@ -279,6 +281,110 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
close(fd);
}
+#define MESSAGES_CNT 7
+static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Send several messages, one with MSG_EOR flag */
+ for (int i = 0; i < MESSAGES_CNT; i++)
+ send_byte(fd, 1, 0);
+
+ control_writeln("SENDDONE");
+ close(fd);
+}
+
+static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
+{
+ int fd;
+ char buf[16];
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SENDDONE");
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ for (int i = 0; i < MESSAGES_CNT; i++) {
+ if (recvmsg(fd, &msg, 0) != 1) {
+ perror("message bound violated");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ close(fd);
+}
+
+#define MESSAGE_TRUNC_SZ 32
+static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
+{
+ int fd;
+ char buf[MESSAGE_TRUNC_SZ];
+
+ fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) {
+ perror("send failed");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("SENDDONE");
+ close(fd);
+}
+
+static void test_seqpacket_msg_trunc_server(const struct test_opts *opts)
+{
+ int fd;
+ char buf[MESSAGE_TRUNC_SZ / 2];
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SENDDONE");
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ssize_t ret = recvmsg(fd, &msg, MSG_TRUNC);
+
+ if (ret != MESSAGE_TRUNC_SZ) {
+ printf("%zi\n", ret);
+ perror("MSG_TRUNC doesn't work");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!(msg.msg_flags & MSG_TRUNC)) {
+ fprintf(stderr, "MSG_TRUNC expected\n");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -309,6 +415,16 @@ static struct test_case test_cases[] = {
.run_client = test_stream_msg_peek_client,
.run_server = test_stream_msg_peek_server,
},
+ {
+ .name = "SOCK_SEQPACKET msg bounds",
+ .run_client = test_seqpacket_msg_bounds_client,
+ .run_server = test_seqpacket_msg_bounds_server,
+ },
+ {
+ .name = "SOCK_SEQPACKET MSG_TRUNC flag",
+ .run_client = test_seqpacket_msg_trunc_client,
+ .run_server = test_seqpacket_msg_trunc_server,
+ },
{},
};
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index 85eb65ea16d3..0e75f22c9475 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -132,6 +132,10 @@ int main(int argc, char **argv)
qsort(list, list_size, sizeof(list[0]), compare_txt);
list2 = malloc(sizeof(*list) * list_size);
+ if (!list2) {
+ printf("Out of memory\n");
+ exit(1);
+ }
printf("culling\n");