summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 17:44:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 17:44:08 -0700
commit9187210eee7d87eea37b45ea93454a88681894a4 (patch)
tree31b4610e62cdd5e1dfb700014aa619e41145d7d3 /tools
parent1f440397665f4241346e4cc6d93f8b73880815d1 (diff)
parented1f164038b50c5864aa85389f3ffd456f050cca (diff)
Merge tag 'net-next-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Large effort by Eric to lower rtnl_lock pressure and remove locks: - Make commonly used parts of rtnetlink (address, route dumps etc) lockless, protected by RCU instead of rtnl_lock. - Add a netns exit callback which already holds rtnl_lock, allowing netns exit to take rtnl_lock once in the core instead of once for each driver / callback. - Remove locks / serialization in the socket diag interface. - Remove 6 calls to synchronize_rcu() while holding rtnl_lock. - Remove the dev_base_lock, depend on RCU where necessary. - Support busy polling on a per-epoll context basis. Poll length and budget parameters can be set independently of system defaults. - Introduce struct net_hotdata, to make sure read-mostly global config variables fit in as few cache lines as possible. - Add optional per-nexthop statistics to ease monitoring / debug of ECMP imbalance problems. - Support TCP_NOTSENT_LOWAT in MPTCP. - Ensure that IPv6 temporary addresses' preferred lifetimes are long enough, compared to other configured lifetimes, and at least 2 sec. - Support forwarding of ICMP Error messages in IPSec, per RFC 4301. - Add support for the independent control state machine for bonding per IEEE 802.1AX-2008 5.4.15 in addition to the existing coupled control state machine. - Add "network ID" to MCTP socket APIs to support hosts with multiple disjoint MCTP networks. - Re-use the mono_delivery_time skbuff bit for packets which user space wants to be sent at a specified time. Maintain the timing information while traversing veth links, bridge etc. - Take advantage of MSG_SPLICE_PAGES for RxRPC DATA and ACK packets. - Simplify many places iterating over netdevs by using an xarray instead of a hash table walk (hash table remains in place, for use on fastpaths). - Speed up scanning for expired routes by keeping a dedicated list. - Speed up "generic" XDP by trying harder to avoid large allocations. - Support attaching arbitrary metadata to netconsole messages. Things we sprinkled into general kernel code: - Enforce VM_IOREMAP flag and range in ioremap_page_range and introduce VM_SPARSE kind and vm_area_[un]map_pages (used by bpf_arena). - Rework selftest harness to enable the use of the full range of ksft exit code (pass, fail, skip, xfail, xpass). Netfilter: - Allow userspace to define a table that is exclusively owned by a daemon (via netlink socket aliveness) without auto-removing this table when the userspace program exits. Such table gets marked as orphaned and a restarting management daemon can re-attach/regain ownership. - Speed up element insertions to nftables' concatenated-ranges set type. Compact a few related data structures. BPF: - Add BPF token support for delegating a subset of BPF subsystem functionality from privileged system-wide daemons such as systemd through special mount options for userns-bound BPF fs to a trusted & unprivileged application. - Introduce bpf_arena which is sparse shared memory region between BPF program and user space where structures inside the arena can have pointers to other areas of the arena, and pointers work seamlessly for both user-space programs and BPF programs. - Introduce may_goto instruction that is a contract between the verifier and the program. The verifier allows the program to loop assuming it's behaving well, but reserves the right to terminate it. - Extend the BPF verifier to enable static subprog calls in spin lock critical sections. - Support registration of struct_ops types from modules which helps projects like fuse-bpf that seeks to implement a new struct_ops type. - Add support for retrieval of cookies for perf/kprobe multi links. - Support arbitrary TCP SYN cookie generation / validation in the TC layer with BPF to allow creating SYN flood handling in BPF firewalls. - Add code generation to inline the bpf_kptr_xchg() helper which improves performance when stashing/popping the allocated BPF objects. Wireless: - Add SPP (signaling and payload protected) AMSDU support. - Support wider bandwidth OFDMA, as required for EHT operation. Driver API: - Major overhaul of the Energy Efficient Ethernet internals to support new link modes (2.5GE, 5GE), share more code between drivers (especially those using phylib), and encourage more uniform behavior. Convert and clean up drivers. - Define an API for querying per netdev queue statistics from drivers. - IPSec: account in global stats for fully offloaded sessions. - Create a concept of Ethernet PHY Packages at the Device Tree level, to allow parameterizing the existing PHY package code. - Enable Rx hashing (RSS) on GTP protocol fields. Misc: - Improvements and refactoring all over networking selftests. - Create uniform module aliases for TC classifiers, actions, and packet schedulers to simplify creating modprobe policies. - Address all missing MODULE_DESCRIPTION() warnings in networking. - Extend the Netlink descriptions in YAML to cover message encapsulation or "Netlink polymorphism", where interpretation of nested attributes depends on link type, classifier type or some other "class type". Drivers: - Ethernet high-speed NICs: - Add a new driver for Marvell's Octeon PCI Endpoint NIC VF. - Intel (100G, ice, idpf): - support E825-C devices - nVidia/Mellanox: - support devices with one port and multiple PCIe links - Broadcom (bnxt): - support n-tuple filters - support configuring the RSS key - Wangxun (ngbe/txgbe): - implement irq_domain for TXGBE's sub-interrupts - Pensando/AMD: - support XDP - optimize queue submission and wakeup handling (+17% bps) - optimize struct layout, saving 28% of memory on queues - Ethernet NICs embedded and virtual: - Google cloud vNIC: - refactor driver to perform memory allocations for new queue config before stopping and freeing the old queue memory - Synopsys (stmmac): - obey queueMaxSDU and implement counters required by 802.1Qbv - Renesas (ravb): - support packet checksum offload - suspend to RAM and runtime PM support - Ethernet switches: - nVidia/Mellanox: - support for nexthop group statistics - Microchip: - ksz8: implement PHY loopback - add support for KSZ8567, a 7-port 10/100Mbps switch - PTP: - New driver for RENESAS FemtoClock3 Wireless clock generator. - Support OCP PTP cards designed and built by Adva. - CAN: - Support recvmsg() flags for own, local and remote traffic on CAN BCM sockets. - Support for esd GmbH PCIe/402 CAN device family. - m_can: - Rx/Tx submission coalescing - wake on frame Rx - WiFi: - Intel (iwlwifi): - enable signaling and payload protected A-MSDUs - support wider-bandwidth OFDMA - support for new devices - bump FW API to 89 for AX devices; 90 for BZ/SC devices - MediaTek (mt76): - mt7915: newer ADIE version support - mt7925: radio temperature sensor support - Qualcomm (ath11k): - support 6 GHz station power modes: Low Power Indoor (LPI), Standard Power) SP and Very Low Power (VLP) - QCA6390 & WCN6855: support 2 concurrent station interfaces - QCA2066 support - Qualcomm (ath12k): - refactoring in preparation for Multi-Link Operation (MLO) support - 1024 Block Ack window size support - firmware-2.bin support - support having multiple identical PCI devices (firmware needs to have ATH12K_FW_FEATURE_MULTI_QRTR_ID) - QCN9274: support split-PHY devices - WCN7850: enable Power Save Mode in station mode - WCN7850: P2P support - RealTek: - rtw88: support for more rtw8811cu and rtw8821cu devices - rtw89: support SCAN_RANDOM_SN and SET_SCAN_DWELL - rtlwifi: speed up USB firmware initialization - rtwl8xxxu: - RTL8188F: concurrent interface support - Channel Switch Announcement (CSA) support in AP mode - Broadcom (brcmfmac): - per-vendor feature support - per-vendor SAE password setup - DMI nvram filename quirk for ACEPC W5 Pro" * tag 'net-next-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2255 commits) nexthop: Fix splat with CONFIG_DEBUG_PREEMPT=y nexthop: Fix out-of-bounds access during attribute validation nexthop: Only parse NHA_OP_FLAGS for dump messages that require it nexthop: Only parse NHA_OP_FLAGS for get messages that require it bpf: move sleepable flag from bpf_prog_aux to bpf_prog bpf: hardcode BPF_PROG_PACK_SIZE to 2MB * num_possible_nodes() selftests/bpf: Add kprobe multi triggering benchmarks ptp: Move from simple ida to xarray vxlan: Remove generic .ndo_get_stats64 vxlan: Do not alloc tstats manually devlink: Add comments to use netlink gen tool nfp: flower: handle acti_netdevs allocation failure net/packet: Add getsockopt support for PACKET_COPY_THRESH net/netlink: Add getsockopt support for NETLINK_LISTEN_ALL_NSID selftests/bpf: Add bpf_arena_htab test. selftests/bpf: Add bpf_arena_list test. selftests/bpf: Add unit tests for bpf_arena_alloc/free_pages bpf: Add helper macro bpf_addr_space_cast() libbpf: Recognize __arena global variables. bpftool: Recognize arena map type ...
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst58
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst2
-rw-r--r--tools/bpf/bpftool/gen.c277
-rw-r--r--tools/bpf/bpftool/link.c94
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/bpf/resolve_btfids/main.c70
-rw-r--r--tools/include/linux/btf_ids.h9
-rw-r--r--tools/include/uapi/linux/bpf.h123
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/netdev.h20
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c42
-rw-r--r--tools/lib/bpf/bpf.h79
-rw-r--r--tools/lib/bpf/bpf_core_read.h60
-rw-r--r--tools/lib/bpf/bpf_helpers.h4
-rw-r--r--tools/lib/bpf/btf.c43
-rw-r--r--tools/lib/bpf/elf.c2
-rw-r--r--tools/lib/bpf/features.c583
-rw-r--r--tools/lib/bpf/libbpf.c1158
-rw-r--r--tools/lib/bpf/libbpf.h23
-rw-r--r--tools/lib/bpf/libbpf.map6
-rw-r--r--tools/lib/bpf/libbpf_internal.h68
-rw-r--r--tools/lib/bpf/libbpf_probes.c19
-rw-r--r--tools/lib/bpf/linker.c2
-rw-r--r--tools/lib/bpf/netlink.c4
-rw-r--r--tools/lib/bpf/str_error.h3
-rw-r--r--tools/net/ynl/Makefile4
-rw-r--r--tools/net/ynl/Makefile.deps5
-rwxr-xr-xtools/net/ynl/cli.py43
-rw-r--r--tools/net/ynl/generated/Makefile9
-rw-r--r--tools/net/ynl/lib/Makefile5
-rw-r--r--tools/net/ynl/lib/__init__.py4
-rw-r--r--tools/net/ynl/lib/nlspec.py11
-rw-r--r--tools/net/ynl/lib/ynl-priv.h359
-rw-r--r--tools/net/ynl/lib/ynl.c399
-rw-r--r--tools/net/ynl/lib/ynl.h5
-rw-r--r--tools/net/ynl/lib/ynl.py311
-rw-r--r--tools/net/ynl/samples/.gitignore1
-rw-r--r--tools/net/ynl/samples/Makefile6
-rw-r--r--tools/net/ynl/samples/ovs.c60
-rw-r--r--tools/net/ynl/samples/page-pool.c2
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py124
-rwxr-xr-xtools/net/ynl/ynl-gen-rst.py9
-rw-r--r--tools/testing/kunit/configs/all_tests.config6
-rw-r--r--tools/testing/selftests/Makefile7
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch643
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x3
-rw-r--r--tools/testing/selftests/bpf/Makefile51
-rw-r--r--tools/testing/selftests/bpf/README.rst32
-rw-r--r--tools/testing/selftests/bpf/bench.c40
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c182
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh9
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_alloc.h67
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h70
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_htab.h100
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h92
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h76
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h30
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile19
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c84
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c129
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h65
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_probes.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_redirect.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_reroute.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_destroy.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c159
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c1052
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdpwall.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c48
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab_asm.c5
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c87
-rw-r--r--tools/testing/selftests/bpf/progs/async_stack_depth.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_compiler.h33
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h16
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c26
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h57
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c9
-rw-r--r--tools/testing/selftests/bpf/progs/kptr_xchg_inline.c48
-rw-r--r--tools/testing/selftests/bpf/progs/loop4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/priv_map.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h17
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h7
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c120
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c3
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c4
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h18
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate.c52
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c32
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c29
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c24
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_module.c56
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c102
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_fill_link_info.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_siphash.h64
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c595
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h140
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c5
-rw-r--r--tools/testing/selftests/bpf/progs/token_lsm.c32
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_failure.c20
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c28
-rw-r--r--tools/testing/selftests/bpf/progs/type_cast.c13
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c146
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c182
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c29
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c103
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c24
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c553
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spin_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c6
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c3
-rw-r--r--tools/testing/selftests/bpf/test_loader.c13
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c18
-rw-r--r--tools/testing/selftests/bpf/test_maps.c6
-rw-r--r--tools/testing/selftests/bpf/test_progs.c77
-rw-r--r--tools/testing/selftests/bpf/test_progs.h10
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c3
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c60
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c96
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h10
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c6
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile7
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh19
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh21
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh38
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/lag_lib.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh2
l---------tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh1
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile18
l---------tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_mld.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh2
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh2
l---------tools/testing/selftests/drivers/net/dsa/lib.sh1
l---------tools/testing/selftests/drivers/net/dsa/local_termination.sh2
l---------tools/testing/selftests/drivers/net/dsa/no_forwarding.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh9
l---------tools/testing/selftests/drivers/net/dsa/tc_actions.sh2
l---------tools/testing/selftests/drivers/net/dsa/tc_common.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh2
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile18
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh143
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh40
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile7
-rwxr-xr-xtools/testing/selftests/drivers/net/team/dev_addr_lists.sh4
l---------tools/testing/selftests/drivers/net/team/lag_lib.sh1
l---------tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh1
-rw-r--r--tools/testing/selftests/kselftest.h45
-rw-r--r--tools/testing/selftests/kselftest_harness.h198
-rw-r--r--tools/testing/selftests/landlock/base_test.c2
-rw-r--r--tools/testing/selftests/landlock/common.h58
-rw-r--r--tools/testing/selftests/landlock/fs_test.c26
-rw-r--r--tools/testing/selftests/landlock/net_test.c4
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c7
-rw-r--r--tools/testing/selftests/lib.mk19
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c4
-rw-r--r--tools/testing/selftests/net/Makefile2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh34
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh6
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh148
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile4
-rw-r--r--tools/testing/selftests/net/forwarding/config35
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh16
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh16
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh.sh41
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh42
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh16
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/ip6gre_lib.sh4
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh65
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh52
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh16
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh10
-rwxr-xr-xtools/testing/selftests/net/fq_band_pktlimit.sh14
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c6
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh56
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh248
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh293
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh213
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh104
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh65
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c39
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh60
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh173
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh62
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh2
-rw-r--r--tools/testing/selftests/net/so_txtime.c7
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh36
-rw-r--r--tools/testing/selftests/net/tls.c2
-rw-r--r--tools/testing/selftests/net/txtimestamp.c3
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh12
-rw-r--r--tools/testing/selftests/net/udpgso.c134
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh49
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c9
-rw-r--r--tools/testing/selftests/tc-testing/config1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json403
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json2
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py2
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh3
-rw-r--r--tools/testing/vsock/util.c17
-rw-r--r--tools/testing/vsock/util.h4
-rw-r--r--tools/testing/vsock/vsock_diag_test.c23
-rw-r--r--tools/testing/vsock/vsock_test.c102
-rw-r--r--tools/testing/vsock/vsock_test_zerocopy.c12
-rw-r--r--tools/testing/vsock/vsock_uring_test.c17
-rw-r--r--tools/virtio/.gitignore1
-rw-r--r--tools/virtio/Makefile8
-rw-r--r--tools/virtio/linux/virtio_config.h4
-rw-r--r--tools/virtio/vhost_net_test.c532
299 files changed, 12167 insertions, 2853 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 5006e724d1bc..5e60825818dd 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -257,18 +257,48 @@ EXAMPLES
return 0;
}
-This is example BPF application with two BPF programs and a mix of BPF maps
-and global variables. Source code is split across two source code files.
+**$ cat example3.bpf.c**
+
+::
+
+ #include <linux/ptrace.h>
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+ /* This header file is provided by the bpf_testmod module. */
+ #include "bpf_testmod.h"
+
+ int test_2_result = 0;
+
+ /* bpf_Testmod.ko calls this function, passing a "4"
+ * and testmod_map->data.
+ */
+ SEC("struct_ops/test_2")
+ void BPF_PROG(test_2, int a, int b)
+ {
+ test_2_result = a + b;
+ }
+
+ SEC(".struct_ops")
+ struct bpf_testmod_ops testmod_map = {
+ .test_2 = (void *)test_2,
+ .data = 0x1,
+ };
+
+This is example BPF application with three BPF programs and a mix of BPF
+maps and global variables. Source code is split across three source code
+files.
**$ clang --target=bpf -g example1.bpf.c -o example1.bpf.o**
**$ clang --target=bpf -g example2.bpf.c -o example2.bpf.o**
-**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o**
+**$ clang --target=bpf -g example3.bpf.c -o example3.bpf.o**
+
+**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o example3.bpf.o**
-This set of commands compiles *example1.bpf.c* and *example2.bpf.c*
-individually and then statically links respective object files into the final
-BPF ELF object file *example.bpf.o*.
+This set of commands compiles *example1.bpf.c*, *example2.bpf.c* and
+*example3.bpf.c* individually and then statically links respective object
+files into the final BPF ELF object file *example.bpf.o*.
**$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h**
@@ -291,7 +321,15 @@ BPF ELF object file *example.bpf.o*.
struct bpf_map *data;
struct bpf_map *bss;
struct bpf_map *my_map;
+ struct bpf_map *testmod_map;
} maps;
+ struct {
+ struct example__testmod_map__bpf_testmod_ops {
+ const struct bpf_program *test_1;
+ const struct bpf_program *test_2;
+ int data;
+ } *testmod_map;
+ } struct_ops;
struct {
struct bpf_program *handle_sys_enter;
struct bpf_program *handle_sys_exit;
@@ -304,6 +342,7 @@ BPF ELF object file *example.bpf.o*.
struct {
int x;
} data;
+ int test_2_result;
} *bss;
struct example__data {
_Bool global_flag;
@@ -342,10 +381,16 @@ BPF ELF object file *example.bpf.o*.
skel->rodata->param1 = 128;
+ /* Change the value through the pointer of shadow type */
+ skel->struct_ops.testmod_map->data = 13;
+
err = example__load(skel);
if (err)
goto cleanup;
+ /* The result of the function test_2() */
+ printf("test_2_result: %d\n", skel->bss->test_2_result);
+
err = example__attach(skel);
if (err)
goto cleanup;
@@ -372,6 +417,7 @@ BPF ELF object file *example.bpf.o*.
::
+ test_2_result: 17
my_map name: my_map
sys_enter prog FD: 8
my_static_var: 7
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 3b7ba037af95..9d6a314dfd7a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,7 @@ MAP COMMANDS
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** }
+| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index ee3ce2b8000d..4fa4ade1ce74 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -7,6 +7,7 @@
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
+#include <libgen.h>
#include <linux/err.h>
#include <stdbool.h>
#include <stdio.h>
@@ -54,11 +55,27 @@ static bool str_has_suffix(const char *str, const char *suffix)
return true;
}
+static const struct btf_type *
+resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+ const struct btf_type *t;
+
+ t = skip_mods_and_typedefs(btf, id, NULL);
+ if (!btf_is_ptr(t))
+ return NULL;
+
+ t = skip_mods_and_typedefs(btf, t->type, res_id);
+
+ return btf_is_func_proto(t) ? t : NULL;
+}
+
static void get_obj_name(char *name, const char *file)
{
- /* Using basename() GNU version which doesn't modify arg. */
- strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1);
- name[MAX_OBJ_NAME_LEN - 1] = '\0';
+ char file_copy[PATH_MAX];
+
+ /* Using basename() POSIX version to be more portable. */
+ strncpy(file_copy, file, PATH_MAX - 1)[PATH_MAX - 1] = '\0';
+ strncpy(name, basename(file_copy), MAX_OBJ_NAME_LEN - 1)[MAX_OBJ_NAME_LEN - 1] = '\0';
if (str_has_suffix(name, ".o"))
name[strlen(name) - 2] = '\0';
sanitize_identifier(name);
@@ -103,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
int i, n;
+ /* recognize hard coded LLVM section name */
+ if (strcmp(sec_name, ".arena.1") == 0) {
+ /* this is the name to use in skeleton */
+ snprintf(buf, buf_sz, "arena");
+ return true;
+ }
for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) {
const char *pfx = pfxs[i];
@@ -231,8 +254,15 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map
return NULL;
}
-static bool is_internal_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
+static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
{
+ size_t tmp_sz;
+
+ if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) {
+ snprintf(buf, sz, "arena");
+ return true;
+ }
+
if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
return false;
@@ -257,7 +287,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
bpf_object__for_each_map(map, obj) {
/* only generate definitions for memory-mapped internal maps */
- if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident)))
+ if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
continue;
sec = find_type_for_map(btf, map_ident);
@@ -310,7 +340,7 @@ static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name
bpf_object__for_each_map(map, obj) {
/* only generate definitions for memory-mapped internal maps */
- if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident)))
+ if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
continue;
sec = find_type_for_map(btf, map_ident);
@@ -487,7 +517,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name)
", obj_name);
bpf_object__for_each_map(map, obj) {
- if (!is_internal_mmapable_map(map, map_ident, sizeof(map_ident)))
+ if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
continue;
sec = find_type_for_map(btf, map_ident);
@@ -703,7 +733,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
const void *mmap_data = NULL;
size_t mmap_size = 0;
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
codegen("\
@@ -765,7 +795,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
bpf_object__for_each_map(map, obj) {
const char *mmap_flags;
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
@@ -854,7 +884,7 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped)
",
i, bpf_map__name(map), i, ident);
/* memory-mapped internal maps */
- if (mmaped && is_internal_mmapable_map(map, ident, sizeof(ident))) {
+ if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
i, ident);
}
@@ -906,6 +936,207 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li
}
}
+static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident,
+ const struct btf_type *map_type, __u32 map_type_id)
+{
+ LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, .indent_level = 3);
+ const struct btf_type *member_type;
+ __u32 offset, next_offset = 0;
+ const struct btf_member *m;
+ struct btf_dump *d = NULL;
+ const char *member_name;
+ __u32 member_type_id;
+ int i, err = 0, n;
+ int size;
+
+ d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
+ if (!d)
+ return -errno;
+
+ n = btf_vlen(map_type);
+ for (i = 0, m = btf_members(map_type); i < n; i++, m++) {
+ member_type = skip_mods_and_typedefs(btf, m->type, &member_type_id);
+ member_name = btf__name_by_offset(btf, m->name_off);
+
+ offset = m->offset / 8;
+ if (next_offset < offset)
+ printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset);
+
+ switch (btf_kind(member_type)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ /* scalar type */
+ printf("\t\t\t");
+ opts.field_name = member_name;
+ err = btf_dump__emit_type_decl(d, member_type_id, &opts);
+ if (err) {
+ p_err("Failed to emit type declaration for %s: %d", member_name, err);
+ goto out;
+ }
+ printf(";\n");
+
+ size = btf__resolve_size(btf, member_type_id);
+ if (size < 0) {
+ p_err("Failed to resolve size of %s: %d\n", member_name, size);
+ err = size;
+ goto out;
+ }
+
+ next_offset = offset + size;
+ break;
+
+ case BTF_KIND_PTR:
+ if (resolve_func_ptr(btf, m->type, NULL)) {
+ /* Function pointer */
+ printf("\t\t\tstruct bpf_program *%s;\n", member_name);
+
+ next_offset = offset + sizeof(void *);
+ break;
+ }
+ /* All pointer types are unsupported except for
+ * function pointers.
+ */
+ fallthrough;
+
+ default:
+ /* Unsupported types
+ *
+ * Types other than scalar types and function
+ * pointers are currently not supported in order to
+ * prevent conflicts in the generated code caused
+ * by multiple definitions. For instance, if the
+ * struct type FOO is used in a struct_ops map,
+ * bpftool has to generate definitions for FOO,
+ * which may result in conflicts if FOO is defined
+ * in different skeleton files.
+ */
+ size = btf__resolve_size(btf, member_type_id);
+ if (size < 0) {
+ p_err("Failed to resolve size of %s: %d\n", member_name, size);
+ err = size;
+ goto out;
+ }
+ printf("\t\t\tchar __unsupported_%d[%d];\n", i, size);
+
+ next_offset = offset + size;
+ break;
+ }
+ }
+
+ /* Cannot fail since it must be a struct type */
+ size = btf__resolve_size(btf, map_type_id);
+ if (next_offset < (__u32)size)
+ printf("\t\t\tchar __padding_end[%d];\n", size - next_offset);
+
+out:
+ btf_dump__free(d);
+
+ return err;
+}
+
+/* Generate the pointer of the shadow type for a struct_ops map.
+ *
+ * This function adds a pointer of the shadow type for a struct_ops map.
+ * The members of a struct_ops map can be exported through a pointer to a
+ * shadow type. The user can access these members through the pointer.
+ *
+ * A shadow type includes not all members, only members of some types.
+ * They are scalar types and function pointers. The function pointers are
+ * translated to the pointer of the struct bpf_program. The scalar types
+ * are translated to the original type without any modifiers.
+ *
+ * Unsupported types will be translated to a char array to occupy the same
+ * space as the original field, being renamed as __unsupported_*. The user
+ * should treat these fields as opaque data.
+ */
+static int gen_st_ops_shadow_type(const char *obj_name, struct btf *btf, const char *ident,
+ const struct bpf_map *map)
+{
+ const struct btf_type *map_type;
+ const char *type_name;
+ __u32 map_type_id;
+ int err;
+
+ map_type_id = bpf_map__btf_value_type_id(map);
+ if (map_type_id == 0)
+ return -EINVAL;
+ map_type = btf__type_by_id(btf, map_type_id);
+ if (!map_type)
+ return -EINVAL;
+
+ type_name = btf__name_by_offset(btf, map_type->name_off);
+
+ printf("\t\tstruct %s__%s__%s {\n", obj_name, ident, type_name);
+
+ err = walk_st_ops_shadow_vars(btf, ident, map_type, map_type_id);
+ if (err)
+ return err;
+
+ printf("\t\t} *%s;\n", ident);
+
+ return 0;
+}
+
+static int gen_st_ops_shadow(const char *obj_name, struct btf *btf, struct bpf_object *obj)
+{
+ int err, st_ops_cnt = 0;
+ struct bpf_map *map;
+ char ident[256];
+
+ if (!btf)
+ return 0;
+
+ /* Generate the pointers to shadow types of
+ * struct_ops maps.
+ */
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+ if (!get_map_ident(map, ident, sizeof(ident)))
+ continue;
+
+ if (st_ops_cnt == 0) /* first struct_ops map */
+ printf("\tstruct {\n");
+ st_ops_cnt++;
+
+ err = gen_st_ops_shadow_type(obj_name, btf, ident, map);
+ if (err)
+ return err;
+ }
+
+ if (st_ops_cnt)
+ printf("\t} struct_ops;\n");
+
+ return 0;
+}
+
+/* Generate the code to initialize the pointers of shadow types. */
+static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ char ident[256];
+
+ if (!btf)
+ return;
+
+ /* Initialize the pointers to_ops shadow types of
+ * struct_ops maps.
+ */
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+ if (!get_map_ident(map, ident, sizeof(ident)))
+ continue;
+ codegen("\
+ \n\
+ obj->struct_ops.%1$s = bpf_map__initial_value(obj->maps.%1$s, NULL);\n\
+ \n\
+ ", ident);
+ }
+}
+
static int do_skeleton(int argc, char **argv)
{
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
@@ -1049,6 +1280,11 @@ static int do_skeleton(int argc, char **argv)
printf("\t} maps;\n");
}
+ btf = bpf_object__btf(obj);
+ err = gen_st_ops_shadow(obj_name, btf, obj);
+ if (err)
+ goto out;
+
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
@@ -1072,7 +1308,6 @@ static int do_skeleton(int argc, char **argv)
printf("\t} links;\n");
}
- btf = bpf_object__btf(obj);
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
@@ -1130,6 +1365,12 @@ static int do_skeleton(int argc, char **argv)
if (err) \n\
goto err_out; \n\
\n\
+ ", obj_name);
+
+ gen_st_ops_shadow_init(btf, obj);
+
+ codegen("\
+ \n\
return obj; \n\
err_out: \n\
%1$s__destroy(obj); \n\
@@ -1389,7 +1630,7 @@ static int do_subskeleton(int argc, char **argv)
/* Also count all maps that have a name */
map_cnt++;
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
map_type_id = bpf_map__btf_value_type_id(map);
@@ -1439,6 +1680,10 @@ static int do_subskeleton(int argc, char **argv)
printf("\t} maps;\n");
}
+ err = gen_st_ops_shadow(obj_name, btf, obj);
+ if (err)
+ goto out;
+
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
@@ -1507,7 +1752,7 @@ static int do_subskeleton(int argc, char **argv)
/* walk through each symbol and emit the runtime representation */
bpf_object__for_each_map(map, obj) {
- if (!is_internal_mmapable_map(map, ident, sizeof(ident)))
+ if (!is_mmapable_map(map, ident, sizeof(ident)))
continue;
map_type_id = bpf_map__btf_value_type_id(map);
@@ -1550,6 +1795,12 @@ static int do_subskeleton(int argc, char **argv)
if (err) \n\
goto err; \n\
\n\
+ ");
+
+ gen_st_ops_shadow_init(btf, obj);
+
+ codegen("\
+ \n\
return obj; \n\
err: \n\
%1$s__destroy(obj); \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index cb46667a6b2e..afde9d0c2ea1 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -249,18 +249,44 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info)
return err;
}
-static int cmp_u64(const void *A, const void *B)
+struct addr_cookie {
+ __u64 addr;
+ __u64 cookie;
+};
+
+static int cmp_addr_cookie(const void *A, const void *B)
{
- const __u64 *a = A, *b = B;
+ const struct addr_cookie *a = A, *b = B;
- return *a - *b;
+ if (a->addr == b->addr)
+ return 0;
+ return a->addr < b->addr ? -1 : 1;
+}
+
+static struct addr_cookie *
+get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
+{
+ struct addr_cookie *data;
+ __u32 i;
+
+ data = calloc(count, sizeof(data[0]));
+ if (!data) {
+ p_err("mem alloc failed");
+ return NULL;
+ }
+ for (i = 0; i < count; i++) {
+ data[i].addr = addrs[i];
+ data[i].cookie = cookies[i];
+ }
+ qsort(data, count, sizeof(data[0]), cmp_addr_cookie);
+ return data;
}
static void
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
{
+ struct addr_cookie *data;
__u32 i, j = 0;
- __u64 *addrs;
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
@@ -268,14 +294,20 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed);
jsonw_name(json_wtr, "funcs");
jsonw_start_array(json_wtr);
- addrs = u64_to_ptr(info->kprobe_multi.addrs);
- qsort(addrs, info->kprobe_multi.count, sizeof(addrs[0]), cmp_u64);
+ data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs),
+ u64_to_ptr(info->kprobe_multi.cookies),
+ info->kprobe_multi.count);
+ if (!data)
+ return;
/* Load it once for all. */
if (!dd.sym_count)
kernel_syms_load(&dd);
+ if (!dd.sym_count)
+ goto error;
+
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != addrs[j])
+ if (dd.sym_mapping[i].address != data[j].addr)
continue;
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
@@ -287,11 +319,14 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
} else {
jsonw_string_field(json_wtr, "module", dd.sym_mapping[i].module);
}
+ jsonw_uint_field(json_wtr, "cookie", data[j].cookie);
jsonw_end_object(json_wtr);
if (j++ == info->kprobe_multi.count)
break;
}
jsonw_end_array(json_wtr);
+error:
+ free(data);
}
static __u64 *u64_to_arr(__u64 val)
@@ -334,6 +369,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
u64_to_ptr(info->perf_event.kprobe.func_name));
jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset);
jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed);
+ jsonw_uint_field(wtr, "cookie", info->perf_event.kprobe.cookie);
}
static void
@@ -343,6 +379,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_string_field(wtr, "file",
u64_to_ptr(info->perf_event.uprobe.file_name));
jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset);
+ jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie);
}
static void
@@ -350,6 +387,7 @@ show_perf_event_tracepoint_json(struct bpf_link_info *info, json_writer_t *wtr)
{
jsonw_string_field(wtr, "tracepoint",
u64_to_ptr(info->perf_event.tracepoint.tp_name));
+ jsonw_uint_field(wtr, "cookie", info->perf_event.tracepoint.cookie);
}
static char *perf_config_hw_cache_str(__u64 config)
@@ -426,6 +464,8 @@ show_perf_event_event_json(struct bpf_link_info *info, json_writer_t *wtr)
else
jsonw_uint_field(wtr, "event_config", config);
+ jsonw_uint_field(wtr, "cookie", info->perf_event.event.cookie);
+
if (type == PERF_TYPE_HW_CACHE && perf_config)
free((void *)perf_config);
}
@@ -670,8 +710,8 @@ void netfilter_dump_plain(const struct bpf_link_info *info)
static void show_kprobe_multi_plain(struct bpf_link_info *info)
{
+ struct addr_cookie *data;
__u32 i, j = 0;
- __u64 *addrs;
if (!info->kprobe_multi.count)
return;
@@ -683,21 +723,24 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
printf("func_cnt %u ", info->kprobe_multi.count);
if (info->kprobe_multi.missed)
printf("missed %llu ", info->kprobe_multi.missed);
- addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs);
- qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64);
+ data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs),
+ u64_to_ptr(info->kprobe_multi.cookies),
+ info->kprobe_multi.count);
+ if (!data)
+ return;
/* Load it once for all. */
if (!dd.sym_count)
kernel_syms_load(&dd);
if (!dd.sym_count)
- return;
+ goto error;
- printf("\n\t%-16s %s", "addr", "func [module]");
+ printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != addrs[j])
+ if (dd.sym_mapping[i].address != data[j].addr)
continue;
- printf("\n\t%016lx %s",
- dd.sym_mapping[i].address, dd.sym_mapping[i].name);
+ printf("\n\t%016lx %-16llx %s",
+ dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
if (dd.sym_mapping[i].module[0] != '\0')
printf(" [%s] ", dd.sym_mapping[i].module);
else
@@ -706,6 +749,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
if (j++ == info->kprobe_multi.count)
break;
}
+error:
+ free(data);
}
static void show_uprobe_multi_plain(struct bpf_link_info *info)
@@ -754,6 +799,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info)
printf("+%#x", info->perf_event.kprobe.offset);
if (info->perf_event.kprobe.missed)
printf(" missed %llu", info->perf_event.kprobe.missed);
+ if (info->perf_event.kprobe.cookie)
+ printf(" cookie %llu", info->perf_event.kprobe.cookie);
printf(" ");
}
@@ -770,6 +817,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info)
else
printf("\n\tuprobe ");
printf("%s+%#x ", buf, info->perf_event.uprobe.offset);
+ if (info->perf_event.uprobe.cookie)
+ printf("cookie %llu ", info->perf_event.uprobe.cookie);
}
static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
@@ -781,6 +830,8 @@ static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
return;
printf("\n\ttracepoint %s ", buf);
+ if (info->perf_event.tracepoint.cookie)
+ printf("cookie %llu ", info->perf_event.tracepoint.cookie);
}
static void show_perf_event_event_plain(struct bpf_link_info *info)
@@ -802,6 +853,9 @@ static void show_perf_event_event_plain(struct bpf_link_info *info)
else
printf("%llu ", config);
+ if (info->perf_event.event.cookie)
+ printf("cookie %llu ", info->perf_event.event.cookie);
+
if (type == PERF_TYPE_HW_CACHE && perf_config)
free((void *)perf_config);
}
@@ -952,6 +1006,14 @@ again:
return -ENOMEM;
}
info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ cookies = calloc(count, sizeof(__u64));
+ if (!cookies) {
+ p_err("mem alloc failed");
+ free(addrs);
+ close(fd);
+ return -ENOMEM;
+ }
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
goto again;
}
}
@@ -977,7 +1039,7 @@ again:
cookies = calloc(count, sizeof(__u64));
if (!cookies) {
p_err("mem alloc failed");
- free(cookies);
+ free(ref_ctr_offsets);
free(offsets);
close(fd);
return -ENOMEM;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f98f7bbea2b1..b89bd792c1d5 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1463,7 +1463,7 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n"
+ " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-n|--nomount} }\n"
"",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index feb8e305804f..9cb42a3366c0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2298,7 +2298,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
int map_fd;
profile_perf_events = calloc(
- sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
+ obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int));
if (!profile_perf_events) {
p_err("failed to allocate memory for perf_event array: %s",
strerror(errno));
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 27a23196d58e..d9520cb826b3 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -70,6 +70,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
+#include <linux/btf_ids.h>
#include <linux/rbtree.h>
#include <linux/zalloc.h>
#include <linux/err.h>
@@ -78,7 +79,7 @@
#include <subcmd/parse-options.h>
#define BTF_IDS_SECTION ".BTF_ids"
-#define BTF_ID "__BTF_ID__"
+#define BTF_ID_PREFIX "__BTF_ID__"
#define BTF_STRUCT "struct"
#define BTF_UNION "union"
@@ -89,6 +90,14 @@
#define ADDR_CNT 100
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define ELFDATANATIVE ELFDATA2LSB
+#elif __BYTE_ORDER == __BIG_ENDIAN
+# define ELFDATANATIVE ELFDATA2MSB
+#else
+# error "Unknown machine endianness!"
+#endif
+
struct btf_id {
struct rb_node rb_node;
char *name;
@@ -116,6 +125,7 @@ struct object {
int idlist_shndx;
size_t strtabidx;
unsigned long idlist_addr;
+ int encoding;
} efile;
struct rb_root sets;
@@ -161,7 +171,7 @@ static int eprintf(int level, int var, const char *fmt, ...)
static bool is_btf_id(const char *name)
{
- return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
+ return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1);
}
static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
@@ -319,6 +329,7 @@ static int elf_collect(struct object *obj)
{
Elf_Scn *scn = NULL;
size_t shdrstrndx;
+ GElf_Ehdr ehdr;
int idx = 0;
Elf *elf;
int fd;
@@ -350,6 +361,13 @@ static int elf_collect(struct object *obj)
return -1;
}
+ if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) {
+ pr_err("FAILED cannot get ELF header: %s\n",
+ elf_errmsg(-1));
+ return -1;
+ }
+ obj->efile.encoding = ehdr.e_ident[EI_DATA];
+
/*
* Scan all the elf sections and look for save data
* from .BTF_ids section and symbols.
@@ -441,7 +459,7 @@ static int symbols_collect(struct object *obj)
* __BTF_ID__TYPE__vfs_truncate__0
* prefix = ^
*/
- prefix = name + sizeof(BTF_ID) - 1;
+ prefix = name + sizeof(BTF_ID_PREFIX) - 1;
/* struct */
if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
@@ -649,19 +667,18 @@ static int cmp_id(const void *pa, const void *pb)
static int sets_patch(struct object *obj)
{
Elf_Data *data = obj->efile.idlist;
- int *ptr = data->d_buf;
struct rb_node *next;
next = rb_first(&obj->sets);
while (next) {
- unsigned long addr, idx;
+ struct btf_id_set8 *set8;
+ struct btf_id_set *set;
+ unsigned long addr, off;
struct btf_id *id;
- int *base;
- int cnt;
id = rb_entry(next, struct btf_id, rb_node);
addr = id->addr[0];
- idx = addr - obj->efile.idlist_addr;
+ off = addr - obj->efile.idlist_addr;
/* sets are unique */
if (id->addr_cnt != 1) {
@@ -670,14 +687,39 @@ static int sets_patch(struct object *obj)
return -1;
}
- idx = idx / sizeof(int);
- base = &ptr[idx] + (id->is_set8 ? 2 : 1);
- cnt = ptr[idx];
+ if (id->is_set) {
+ set = data->d_buf + off;
+ qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id);
+ } else {
+ set8 = data->d_buf + off;
+ /*
+ * Make sure id is at the beginning of the pairs
+ * struct, otherwise the below qsort would not work.
+ */
+ BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id);
+ qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id);
- pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
- (idx + 1) * sizeof(int), cnt, id->name);
+ /*
+ * When ELF endianness does not match endianness of the
+ * host, libelf will do the translation when updating
+ * the ELF. This, however, corrupts SET8 flags which are
+ * already in the target endianness. So, let's bswap
+ * them to the host endianness and libelf will then
+ * correctly translate everything.
+ */
+ if (obj->efile.encoding != ELFDATANATIVE) {
+ int i;
+
+ set8->flags = bswap_32(set8->flags);
+ for (i = 0; i < set8->cnt; i++) {
+ set8->pairs[i].flags =
+ bswap_32(set8->pairs[i].flags);
+ }
+ }
+ }
- qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id);
+ pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
+ off, id->is_set ? set->cnt : set8->cnt, id->name);
next = rb_next(next);
}
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 2f882d5cb30f..72535f00572f 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -8,6 +8,15 @@ struct btf_id_set {
u32 ids[];
};
+struct btf_id_set8 {
+ u32 cnt;
+ u32 flags;
+ struct {
+ u32 id;
+ u32 flags;
+ } pairs[];
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7f24d898efbb..3c42b9f1bada 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -42,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@@ -50,6 +51,10 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+enum bpf_cond_pseudo_jmp {
+ BPF_MAY_GOTO = 0,
+};
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
@@ -77,12 +82,29 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
+ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
+ * the trailing flexible array member) instead.
+ */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
+/* Header for bpf_lpm_trie_key structs */
+struct bpf_lpm_trie_key_hdr {
+ __u32 prefixlen;
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
+struct bpf_lpm_trie_key_u8 {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ __u8 data[]; /* Arbitrary size */
+};
+
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
@@ -617,7 +639,11 @@ union bpf_iter_link_info {
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
- * continue iteration from the current point.
+ * continue iteration from the current point. Both *in_batch* and
+ * *out_batch* must point to memory large enough to hold a key,
+ * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters
+ * must be at least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
@@ -847,6 +873,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +957,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +1009,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ BPF_MAP_TYPE_ARENA,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1055,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1278,6 +1339,10 @@ enum {
*/
#define BPF_PSEUDO_KFUNC_CALL 2
+enum bpf_addr_space_cast {
+ BPF_ADDR_SPACE_CAST = 1,
+};
+
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
BPF_ANY = 0, /* create new element or update existing */
@@ -1330,6 +1395,18 @@ enum {
/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
BPF_F_PATH_FD = (1U << 14),
+
+/* Flag for value_type_btf_obj_fd, the fd is available */
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
+
+/* BPF token FD is passed in a corresponding command's token_fd field */
+ BPF_F_TOKEN_FD = (1U << 16),
+
+/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
+ BPF_F_SEGV_ON_FAULT = (1U << 17),
+
+/* Do not translate kernel bpf_arena pointers to user pointers */
+ BPF_F_NO_USER_CONV = (1U << 18),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1401,8 +1478,20 @@ union bpf_attr {
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
* number of hash functions (if 0, the bloom filter will default
* to using 5 hash functions).
+ *
+ * BPF_MAP_TYPE_ARENA - contains the address where user space
+ * is going to mmap() the arena. It has to be page aligned.
*/
__u64 map_extra;
+
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
+ * type data for
+ * btf_vmlinux_value_type_id.
+ */
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1472,6 +1561,10 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 prog_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1584,6 +1677,11 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_flags;
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 btf_token_fd;
};
struct {
@@ -1714,6 +1812,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -4839,9 +4942,9 @@ union bpf_attr {
* going through the CPU's backlog queue.
*
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types at the ingress
- * hook and for veth device types. The peer device must reside in a
- * different network namespace.
+ * currently only supported for tc BPF program types at the
+ * ingress hook and for veth and netkit target device types. The
+ * peer device must reside in a different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -6487,7 +6590,7 @@ struct bpf_map_info {
__u32 btf_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
- __u32 :32; /* alignment pad */
+ __u32 btf_vmlinux_id;
__u64 map_extra;
} __attribute__((aligned(8)));
@@ -6563,6 +6666,7 @@ struct bpf_link_info {
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
__u64 missed;
+ __aligned_u64 cookies;
} kprobe_multi;
struct {
__aligned_u64 path;
@@ -6582,6 +6686,7 @@ struct bpf_link_info {
__aligned_u64 file_name; /* in/out */
__u32 name_len;
__u32 offset; /* offset from file_name */
+ __u64 cookie;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6589,14 +6694,19 @@ struct bpf_link_info {
__u32 offset; /* offset from func_name */
__u64 addr;
__u64 missed;
+ __u64 cookie;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
__u32 name_len;
+ __u32 :32;
+ __u64 cookie;
} tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
struct {
__u64 config;
__u32 type;
+ __u32 :32;
+ __u64 cookie;
} event; /* BPF_PERF_EVENT_EVENT */
};
} perf_event;
@@ -6904,6 +7014,7 @@ enum {
BPF_TCP_LISTEN,
BPF_TCP_CLOSING, /* Now a valid state */
BPF_TCP_NEW_SYN_RECV,
+ BPF_TCP_BOUND_INACTIVE,
BPF_TCP_MAX_STATES /* Leave at the end! */
};
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index a0aa05a28cf2..f0d71b2a3f1e 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -974,6 +974,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_COUPLED_CONTROL,
__IFLA_BOND_MAX,
};
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 93cb411adf72..bb65ee840cda 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -70,6 +70,10 @@ enum netdev_queue_type {
NETDEV_QUEUE_TYPE_TX,
};
+enum netdev_qstats_scope {
+ NETDEV_QSTATS_SCOPE_QUEUE = 1,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
@@ -133,6 +137,21 @@ enum {
};
enum {
+ NETDEV_A_QSTATS_IFINDEX = 1,
+ NETDEV_A_QSTATS_QUEUE_TYPE,
+ NETDEV_A_QSTATS_QUEUE_ID,
+ NETDEV_A_QSTATS_SCOPE,
+ NETDEV_A_QSTATS_RX_PACKETS = 8,
+ NETDEV_A_QSTATS_RX_BYTES,
+ NETDEV_A_QSTATS_TX_PACKETS,
+ NETDEV_A_QSTATS_TX_BYTES,
+ NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+
+ __NETDEV_A_QSTATS_MAX,
+ NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -144,6 +163,7 @@ enum {
NETDEV_CMD_PAGE_POOL_STATS_GET,
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
+ NETDEV_CMD_QSTATS_GET,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 2d0c282c8588..b6619199a706 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
- usdt.o zip.o elf.o
+ usdt.o zip.o elf.o features.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9dc9625651dc..97ec005c3c47 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
* [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
* [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
*/
-int probe_memcg_account(void)
+int probe_memcg_account(int token_fd)
{
const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
struct bpf_insn insns[] = {
@@ -120,6 +120,9 @@ int probe_memcg_account(void)
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = insn_cnt;
attr.license = ptr_to_u64("GPL");
+ attr.prog_token_fd = token_fd;
+ if (token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
if (prog_fd >= 0) {
@@ -146,7 +149,7 @@ int bump_rlimit_memlock(void)
struct rlimit rlim;
/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
- if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
+ if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
return 0;
memlock_bumped = true;
@@ -169,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
union bpf_attr attr;
int fd;
@@ -181,7 +184,7 @@ int bpf_map_create(enum bpf_map_type map_type,
return libbpf_err(-EINVAL);
attr.map_type = map_type;
- if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
+ if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -191,6 +194,7 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
+ attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, 0);
attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
attr.map_flags = OPTS_GET(opts, map_flags, 0);
@@ -198,6 +202,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.numa_node = OPTS_GET(opts, numa_node, 0);
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+ attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
}
@@ -232,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, log_true_size);
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -261,8 +267,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
attr.kern_version = OPTS_GET(opts, kern_version, 0);
+ attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
- if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
+ if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
attr.license = ptr_to_u64(license);
@@ -1182,7 +1189,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size);
+ const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
union bpf_attr attr;
char *log_buf;
size_t log_size;
@@ -1207,6 +1214,10 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts
attr.btf = ptr_to_u64(btf_data);
attr.btf_size = btf_size;
+
+ attr.btf_flags = OPTS_GET(opts, btf_flags, 0);
+ attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
+
/* log_level == 0 and log_buf != NULL means "try loading without
* log_buf, but retry with log_buf and log_level=1 on error", which is
* consistent across low-level and high-level BTF and program loading
@@ -1287,3 +1298,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd,
ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
return libbpf_err_errno(ret);
}
+
+int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, token_create);
+ union bpf_attr attr;
+ int fd;
+
+ if (!OPTS_VALID(opts, bpf_token_create_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.token_create.bpffs_fd = bpffs_fd;
+ attr.token_create.flags = OPTS_GET(opts, flags, 0);
+
+ fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
+ return libbpf_err_errno(fd);
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index d0f53772bdc0..df0db2f0cdb7 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -35,7 +35,7 @@
extern "C" {
#endif
-int libbpf_set_memlock_rlim(size_t memlock_bytes);
+LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes);
struct bpf_map_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
@@ -51,8 +51,12 @@ struct bpf_map_create_opts {
__u32 numa_node;
__u32 map_ifindex;
+ __s32 value_type_btf_obj_fd;
+
+ __u32 token_fd;
+ size_t :0;
};
-#define bpf_map_create_opts__last_field map_ifindex
+#define bpf_map_create_opts__last_field token_fd
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
@@ -102,9 +106,10 @@ struct bpf_prog_load_opts {
* If kernel doesn't support this feature, log_size is left unchanged.
*/
__u32 log_true_size;
+ __u32 token_fd;
size_t :0;
};
-#define bpf_prog_load_opts__last_field log_true_size
+#define bpf_prog_load_opts__last_field token_fd
LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name, const char *license,
@@ -130,9 +135,12 @@ struct bpf_btf_load_opts {
* If kernel doesn't support this feature, log_size is left unchanged.
*/
__u32 log_true_size;
+
+ __u32 btf_flags;
+ __u32 token_fd;
size_t :0;
};
-#define bpf_btf_load_opts__last_field log_true_size
+#define bpf_btf_load_opts__last_field token_fd
LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
struct bpf_btf_load_opts *opts);
@@ -182,10 +190,14 @@ LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
/**
* @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
*
- * The parameter *in_batch* is the address of the first element in the batch to read.
- * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
- * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
- * that the batched lookup starts from the beginning of the map.
+ * The parameter *in_batch* is the address of the first element in the batch to
+ * read. *out_batch* is an output parameter that should be passed as *in_batch*
+ * to subsequent calls to **bpf_map_lookup_batch()**. NULL can be passed for
+ * *in_batch* to indicate that the batched lookup starts from the beginning of
+ * the map. Both *in_batch* and *out_batch* must point to memory large enough to
+ * hold a single key, except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which the memory size must be at
+ * least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point to memory large enough to
* hold *count* items based on the key and value size of the map *map_fd*. The *keys*
@@ -218,7 +230,10 @@ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
*
* @param fd BPF map file descriptor
* @param in_batch address of the first element in batch to read, can pass NULL to
- * get address of the first element in *out_batch*
+ * get address of the first element in *out_batch*. If not NULL, must be large
+ * enough to hold a key. For **BPF_MAP_TYPE_{HASH, PERCPU_HASH, LRU_HASH,
+ * LRU_PERCPU_HASH}**, the memory size must be at least 4 bytes wide regardless
+ * of key size.
* @param out_batch output parameter that should be passed to next call as *in_batch*
* @param keys pointer to an array of *count* keys
* @param values pointer to an array large enough for *count* values
@@ -492,7 +507,10 @@ LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
* program corresponding to *prog_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param prog_fd BPF program file descriptor
* @param info pointer to **struct bpf_prog_info** that will be populated with
@@ -509,7 +527,10 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info,
* map corresponding to *map_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param map_fd BPF map file descriptor
* @param info pointer to **struct bpf_map_info** that will be populated with
@@ -522,11 +543,14 @@ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info,
LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len);
/**
- * @brief **bpf_btf_get_info_by_fd()** obtains information about the
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the
* BTF object corresponding to *btf_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param btf_fd BTF object file descriptor
* @param info pointer to **struct bpf_btf_info** that will be populated with
@@ -543,7 +567,10 @@ LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u
* link corresponding to *link_fd*.
*
* Populates up to *info_len* bytes of *info* and updates *info_len* with the
- * actual number of bytes written to *info*.
+ * actual number of bytes written to *info*. Note that *info* should be
+ * zero-initialized or initialized as expected by the requested *info*
+ * type. Failing to (zero-)initialize *info* under certain circumstances can
+ * result in this helper returning an error.
*
* @param link_fd BPF link file descriptor
* @param info pointer to **struct bpf_link_info** that will be populated with
@@ -640,6 +667,30 @@ struct bpf_test_run_opts {
LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
struct bpf_test_run_opts *opts);
+struct bpf_token_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ size_t :0;
+};
+#define bpf_token_create_opts__last_field flags
+
+/**
+ * @brief **bpf_token_create()** creates a new instance of BPF token derived
+ * from specified BPF FS mount point.
+ *
+ * BPF token created with this API can be passed to bpf() syscall for
+ * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc.
+ *
+ * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token
+ * instance.
+ * @param opts optional BPF token creation options, can be NULL
+ *
+ * @return BPF token FD > 0, on success; negative error code, otherwise (errno
+ * is also set to the error code)
+ */
+LIBBPF_API int bpf_token_create(int bpffs_fd,
+ struct bpf_token_create_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7325a12692a3..1ce738d91685 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -2,6 +2,8 @@
#ifndef __BPF_CORE_READ_H__
#define __BPF_CORE_READ_H__
+#include <bpf/bpf_helpers.h>
+
/*
* enum bpf_field_info_kind is passed as a second argument into
* __builtin_preserve_field_info() built-in to get a specific aspect of
@@ -44,7 +46,7 @@ enum bpf_enum_value_kind {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read_kernel( \
- (void *)dst, \
+ (void *)dst, \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
@@ -143,8 +145,29 @@ enum bpf_enum_value_kind {
} \
})
+/* Differentiator between compilers builtin implementations. This is a
+ * requirement due to the compiler parsing differences where GCC optimizes
+ * early in parsing those constructs of type pointers to the builtin specific
+ * type, resulting in not being possible to collect the required type
+ * information in the builtin expansion.
+ */
+#ifdef __clang__
+#define ___bpf_typeof(type) ((typeof(type) *) 0)
+#else
+#define ___bpf_typeof1(type, NR) ({ \
+ extern typeof(type) *___concat(bpf_type_tmp_, NR); \
+ ___concat(bpf_type_tmp_, NR); \
+})
+#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__)
+#endif
+
+#ifdef __clang__
#define ___bpf_field_ref1(field) (field)
-#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field)
+#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field)
+#else
+#define ___bpf_field_ref1(field) (&(field))
+#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field))
+#endif
#define ___bpf_field_ref(args...) \
___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args)
@@ -194,7 +217,7 @@ enum bpf_enum_value_kind {
* BTF. Always succeeds.
*/
#define bpf_core_type_id_local(type) \
- __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+ __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL)
/*
* Convenience macro to get BTF type ID of a target kernel's type that matches
@@ -204,7 +227,7 @@ enum bpf_enum_value_kind {
* - 0, if no matching type was found in a target kernel BTF.
*/
#define bpf_core_type_id_kernel(type) \
- __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+ __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET)
/*
* Convenience macro to check that provided named type
@@ -214,7 +237,7 @@ enum bpf_enum_value_kind {
* 0, if no matching type is found.
*/
#define bpf_core_type_exists(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS)
/*
* Convenience macro to check that provided named type
@@ -224,7 +247,7 @@ enum bpf_enum_value_kind {
* 0, if the type does not match any in the target kernel
*/
#define bpf_core_type_matches(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES)
/*
* Convenience macro to get the byte size of a provided named type
@@ -234,7 +257,7 @@ enum bpf_enum_value_kind {
* 0, if no matching type is found.
*/
#define bpf_core_type_size(type) \
- __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+ __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE)
/*
* Convenience macro to check that provided enumerator value is defined in
@@ -244,8 +267,13 @@ enum bpf_enum_value_kind {
* kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
+#ifdef __clang__
#define bpf_core_enum_value_exists(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+#else
+#define bpf_core_enum_value_exists(enum_type, enum_value) \
+ __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS)
+#endif
/*
* Convenience macro to get the integer value of an enumerator value in
@@ -255,8 +283,13 @@ enum bpf_enum_value_kind {
* present in target kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
+#ifdef __clang__
#define bpf_core_enum_value(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+#else
+#define bpf_core_enum_value(enum_type, enum_value) \
+ __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE)
+#endif
/*
* bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
@@ -268,7 +301,7 @@ enum bpf_enum_value_kind {
* a relocation, which records BTF type ID describing root struct/union and an
* accessor string which describes exact embedded field that was used to take
* an address. See detailed description of this relocation format and
- * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h.
*
* This relocation allows libbpf to adjust BPF instruction to use correct
* actual field offset, based on target kernel BTF type that matches original
@@ -292,6 +325,17 @@ enum bpf_enum_value_kind {
#define bpf_core_read_user_str(dst, sz, src) \
bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src))
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
+
+/*
+ * Cast provided pointer *ptr* into a pointer to a specified *type* in such
+ * a way that BPF verifier will become aware of associated kernel-side BTF
+ * type. This allows to access members of kernel types directly without the
+ * need to use BPF_CORE_READ() macros.
+ */
+#define bpf_core_cast(ptr, type) \
+ ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type)))
+
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 2324cc42b017..cd17f6d0791f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -13,6 +13,7 @@
#define __uint(name, val) int (*name)[val]
#define __type(name, val) typeof(val) *name
#define __array(name, val) typeof(val) *name[]
+#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
/*
* Helper macro to place programs, maps, license in
@@ -190,6 +191,9 @@ enum libbpf_tristate {
#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))
#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
+#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
+#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
+#define __arg_arena __attribute((btf_decl_tag("arg:arena")))
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ee95fd379d4d..2d0840ef599a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1079,6 +1079,11 @@ struct btf *btf__new(const void *data, __u32 size)
return libbpf_ptr(btf_new(data, size, NULL));
}
+struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
+{
+ return libbpf_ptr(btf_new(data, size, base_btf));
+}
+
static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
struct btf_ext **btf_ext)
{
@@ -1317,7 +1322,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd)
{
LIBBPF_OPTS(bpf_btf_load_opts, opts);
__u32 buf_sz = 0, raw_size;
@@ -1367,6 +1374,10 @@ retry_load:
opts.log_level = log_level;
}
+ opts.token_fd = token_fd;
+ if (token_fd)
+ opts.btf_flags |= BPF_F_TOKEN_FD;
+
btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) {
/* time to turn on verbose mode and try again */
@@ -1394,7 +1405,7 @@ done:
int btf__load_into_kernel(struct btf *btf)
{
- return btf_load_into_kernel(btf, NULL, 0, 0);
+ return btf_load_into_kernel(btf, NULL, 0, 0, 0);
}
int btf__fd(const struct btf *btf)
@@ -3039,12 +3050,16 @@ done:
return btf_ext;
}
-const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size)
{
*size = btf_ext->data_size;
return btf_ext->data;
}
+__attribute__((alias("btf_ext__raw_data")))
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
+
+
struct btf_dedup;
static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
@@ -4926,10 +4941,9 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
*/
struct btf *btf__load_vmlinux_btf(void)
{
+ const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux";
+ /* fall back locations, trying to find vmlinux on disk */
const char *locations[] = {
- /* try canonical vmlinux BTF through sysfs first */
- "/sys/kernel/btf/vmlinux",
- /* fall back to trying to find vmlinux on disk otherwise */
"/boot/vmlinux-%1$s",
"/lib/modules/%1$s/vmlinux-%1$s",
"/lib/modules/%1$s/build/vmlinux",
@@ -4943,8 +4957,23 @@ struct btf *btf__load_vmlinux_btf(void)
struct btf *btf;
int i, err;
- uname(&buf);
+ /* is canonical sysfs location accessible? */
+ if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) {
+ pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
+ sysfs_btf_path);
+ } else {
+ btf = btf__parse(sysfs_btf_path, NULL);
+ if (!btf) {
+ err = -errno;
+ pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err);
+ return libbpf_err_ptr(err);
+ }
+ pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path);
+ return btf;
+ }
+ /* try fallback locations */
+ uname(&buf);
for (i = 0; i < ARRAY_SIZE(locations); i++) {
snprintf(path, PATH_MAX, locations[i], buf.release);
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index b02faec748a5..c92e02394159 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -11,8 +11,6 @@
#include "libbpf_internal.h"
#include "str_error.h"
-#define STRERR_BUFSIZE 128
-
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an
* explicit version number. This is a GNU extension.
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
new file mode 100644
index 000000000000..4e783cc7fc4b
--- /dev/null
+++ b/tools/lib/bpf/features.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/kernel.h>
+#include <linux/filter.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "str_error.h"
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+
+static int probe_kern_prog_name(int token_fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.license = ptr_to_u64("GPL");
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+ attr.prog_token_fd = token_fd;
+ if (token_fd)
+ attr.prog_flags |= BPF_F_TOKEN_FD;
+ libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
+
+ /* make sure loading with name works */
+ ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
+ return probe_fd(ret);
+}
+
+static int probe_kern_global_data(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ insns[0].imm = map;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ close(map);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func_global(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* static void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC val */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_qmark_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0?.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_float(int token_fd)
+{
+ static const char strs[] = "\0float";
+ __u32 types[] = {
+ /* float */
+ BTF_TYPE_FLOAT_ENC(1, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_decl_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* attr */
+ BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_type_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* attr */
+ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
+ /* ptr */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_array_mmap(int token_fd)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0),
+ .token_fd = token_fd,
+ );
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_exp_attach_type(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ /* use any valid combination of program type and (optional)
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+ * to see if kernel supports expected_attach_type field for
+ * BPF_PROG_LOAD command
+ */
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_probe_read_kernel(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_prog_bind_map(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .token_fd = token_fd,
+ .map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
+static int probe_module_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ char name[16];
+ int fd, err;
+
+ fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (fd < 0)
+ return 0; /* BTF not supported at all */
+
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+ * kernel's module BTF support coincides with support for
+ * name/name_len fields in struct bpf_btf_info.
+ */
+ err = bpf_btf_get_info_by_fd(fd, &info, &len);
+ close(fd);
+ return !err;
+}
+
+static int probe_perf_link(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int prog_fd, link_fd, err;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_uprobe_multi_link(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+ unsigned long offset = 0;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &load_opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* Creating uprobe in '/' binary should fail with -EBADF. */
+ link_opts.uprobe_multi.path = "/";
+ link_opts.uprobe_multi.offsets = &offset;
+ link_opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_kern_bpf_cookie(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf_enum64(int token_fd)
+{
+ static const char strs[] = "\0enum64";
+ __u32 types[] = {
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_arg_ctx_tag(int token_fd)
+{
+ static const char strs[] = "\0a\0b\0arg:ctx\0";
+ const __u32 types[] = {
+ /* [1] INT */
+ BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
+ /* [2] PTR -> VOID */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* [3] FUNC_PROTO `int(void *a)` */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
+ BTF_PARAM_ENC(1 /* "a" */, 2),
+ /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
+ BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
+ /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
+ BTF_PARAM_ENC(3 /* "b" */, 2),
+ /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
+ BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
+ /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
+ BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
+ };
+ const struct bpf_insn insns[] = {
+ /* main prog */
+ BPF_CALL_REL(+1),
+ BPF_EXIT_INSN(),
+ /* global subprog */
+ BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
+ BPF_EXIT_INSN(),
+ };
+ const struct bpf_func_info_min func_infos[] = {
+ { 0, 4 }, /* main prog -> FUNC 'a' */
+ { 2, 6 }, /* subprog -> FUNC 'b' */
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = token_fd,
+ .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
+ int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (btf_fd < 0)
+ return 0;
+
+ opts.prog_btf_fd = btf_fd;
+ opts.func_info = &func_infos;
+ opts.func_info_cnt = ARRAY_SIZE(func_infos);
+ opts.func_info_rec_size = sizeof(func_infos[0]);
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
+ "GPL", insns, insn_cnt, &opts);
+ close(btf_fd);
+
+ return probe_fd(prog_fd);
+}
+
+typedef int (*feature_probe_fn)(int /* token_fd */);
+
+static struct kern_feature_cache feature_cache;
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+ },
+ [FEAT_MODULE_BTF] = {
+ "module BTF support", probe_module_btf,
+ },
+ [FEAT_BTF_FLOAT] = {
+ "BTF_KIND_FLOAT support", probe_kern_btf_float,
+ },
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
+ [FEAT_BTF_DECL_TAG] = {
+ "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
+ },
+ [FEAT_BTF_TYPE_TAG] = {
+ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+ },
+ [FEAT_MEMCG_ACCOUNT] = {
+ "memcg-based memory accounting", probe_memcg_account,
+ },
+ [FEAT_BPF_COOKIE] = {
+ "BPF cookie support", probe_kern_bpf_cookie,
+ },
+ [FEAT_BTF_ENUM64] = {
+ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+ },
+ [FEAT_SYSCALL_WRAPPER] = {
+ "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+ },
+ [FEAT_UPROBE_MULTI_LINK] = {
+ "BPF multi-uprobe link support", probe_uprobe_multi_link,
+ },
+ [FEAT_ARG_CTX_TAG] = {
+ "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag,
+ },
+ [FEAT_BTF_QMARK_DATASEC] = {
+ "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
+ },
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ /* assume global feature cache, unless custom one is provided */
+ if (!cache)
+ cache = &feature_cache;
+
+ if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
+ ret = feat->probe(cache->token_fd);
+ if (ret > 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ }
+ }
+
+ return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index afd09571c482..efab29b8935b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -33,6 +33,7 @@
#include <linux/filter.h>
#include <linux/limits.h>
#include <linux/perf_event.h>
+#include <linux/bpf_perf_event.h>
#include <linux/ring_buffer.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
@@ -59,6 +60,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
+
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
@@ -70,6 +73,7 @@
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
+static int map_set_def_max_entries(struct bpf_map *map);
static const char * const attach_type_name[] = {
[BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
@@ -181,6 +185,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
+ [BPF_MAP_TYPE_ARENA] = "arena",
};
static const char * const prog_type_name[] = {
@@ -493,6 +498,7 @@ struct bpf_struct_ops {
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
+#define ARENA_SEC ".arena.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -527,6 +533,7 @@ struct bpf_map {
struct bpf_map_def def;
__u32 numa_node;
__u32 btf_var_idx;
+ int mod_btf_fd;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
@@ -607,6 +614,7 @@ enum sec_type {
SEC_BSS,
SEC_DATA,
SEC_RODATA,
+ SEC_ST_OPS,
};
struct elf_sec_desc {
@@ -622,8 +630,7 @@ struct elf_state {
Elf *elf;
Elf64_Ehdr *ehdr;
Elf_Data *symbols;
- Elf_Data *st_ops_data;
- Elf_Data *st_ops_link_data;
+ Elf_Data *arena_data;
size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct elf_sec_desc *secs;
@@ -632,8 +639,8 @@ struct elf_state {
__u32 btf_maps_sec_btf_id;
int text_shndx;
int symbols_shndx;
- int st_ops_shndx;
- int st_ops_link_shndx;
+ bool has_st_ops;
+ int arena_data_shndx;
};
struct usdt_manager;
@@ -693,6 +700,14 @@ struct bpf_object {
struct usdt_manager *usdt_man;
+ struct bpf_map *arena_map;
+ void *arena_data;
+ size_t arena_data_sz;
+
+ struct kern_feature_cache *feat_cache;
+ char *token_path;
+ int token_fd;
+
char path[];
};
@@ -930,22 +945,33 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t,
return NULL;
}
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ struct module_btf **res_mod_btf);
+
#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
const char *name, __u32 kind);
static int
-find_struct_ops_kern_types(const struct btf *btf, const char *tname,
+find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
+ struct module_btf **mod_btf,
const struct btf_type **type, __u32 *type_id,
const struct btf_type **vtype, __u32 *vtype_id,
const struct btf_member **data_member)
{
const struct btf_type *kern_type, *kern_vtype;
const struct btf_member *kern_data_member;
+ struct btf *btf;
__s32 kern_vtype_id, kern_type_id;
+ char tname[256];
__u32 i;
- kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ snprintf(tname, sizeof(tname), "%.*s",
+ (int)bpf_core_essential_name_len(tname_raw), tname_raw);
+
+ kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
+ &btf, mod_btf);
if (kern_type_id < 0) {
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
tname);
@@ -998,15 +1024,72 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map)
return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
}
+static bool is_valid_st_ops_program(struct bpf_object *obj,
+ const struct bpf_program *prog)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (&obj->programs[i] == prog)
+ return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
+ }
+
+ return false;
+}
+
+/* For each struct_ops program P, referenced from some struct_ops map M,
+ * enable P.autoload if there are Ms for which M.autocreate is true,
+ * disable P.autoload if for all Ms M.autocreate is false.
+ * Don't change P.autoload for programs that are not referenced from any maps.
+ */
+static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
+{
+ struct bpf_program *prog, *slot_prog;
+ struct bpf_map *map;
+ int i, j, k, vlen;
+
+ for (i = 0; i < obj->nr_programs; ++i) {
+ int should_load = false;
+ int use_cnt = 0;
+
+ prog = &obj->programs[i];
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ continue;
+
+ for (j = 0; j < obj->nr_maps; ++j) {
+ map = &obj->maps[j];
+ if (!bpf_map__is_struct_ops(map))
+ continue;
+
+ vlen = btf_vlen(map->st_ops->type);
+ for (k = 0; k < vlen; ++k) {
+ slot_prog = map->st_ops->progs[k];
+ if (prog != slot_prog)
+ continue;
+
+ use_cnt++;
+ if (map->autocreate)
+ should_load = true;
+ }
+ }
+ if (use_cnt)
+ prog->autoload = should_load;
+ }
+
+ return 0;
+}
+
/* Init the map's fields that depend on kern_btf */
-static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
- const struct btf *btf,
- const struct btf *kern_btf)
+static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
{
const struct btf_member *member, *kern_member, *kern_data_member;
const struct btf_type *type, *kern_type, *kern_vtype;
__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
+ struct bpf_object *obj = map->obj;
+ const struct btf *btf = obj->btf;
struct bpf_struct_ops *st_ops;
+ const struct btf *kern_btf;
+ struct module_btf *mod_btf;
void *data, *kern_data;
const char *tname;
int err;
@@ -1014,16 +1097,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
st_ops = map->st_ops;
type = st_ops->type;
tname = st_ops->tname;
- err = find_struct_ops_kern_types(kern_btf, tname,
+ err = find_struct_ops_kern_types(obj, tname, &mod_btf,
&kern_type, &kern_type_id,
&kern_vtype, &kern_vtype_id,
&kern_data_member);
if (err)
return err;
+ kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
+
pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
+ map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
map->def.value_size = kern_vtype->size;
map->btf_vmlinux_value_type_id = kern_vtype_id;
@@ -1081,9 +1167,16 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
if (btf_is_ptr(mtype)) {
struct bpf_program *prog;
- prog = st_ops->progs[i];
+ /* Update the value from the shadow type */
+ prog = *(void **)mdata;
+ st_ops->progs[i] = prog;
if (!prog)
continue;
+ if (!is_valid_st_ops_program(obj, prog)) {
+ pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
+ map->name, mname);
+ return -ENOTSUP;
+ }
kern_mtype = skip_mods_and_typedefs(kern_btf,
kern_mtype->type,
@@ -1099,8 +1192,34 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
return -ENOTSUP;
}
- prog->attach_btf_id = kern_type_id;
- prog->expected_attach_type = kern_member_idx;
+ if (mod_btf)
+ prog->attach_btf_obj_fd = mod_btf->fd;
+
+ /* if we haven't yet processed this BPF program, record proper
+ * attach_btf_id and member_idx
+ */
+ if (!prog->attach_btf_id) {
+ prog->attach_btf_id = kern_type_id;
+ prog->expected_attach_type = kern_member_idx;
+ }
+
+ /* struct_ops BPF prog can be re-used between multiple
+ * .struct_ops & .struct_ops.link as long as it's the
+ * same struct_ops struct definition and the same
+ * function pointer field
+ */
+ if (prog->attach_btf_id != kern_type_id) {
+ pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
+ map->name, mname, prog->name, prog->sec_name, prog->type,
+ prog->attach_btf_id, kern_type_id);
+ return -EINVAL;
+ }
+ if (prog->expected_attach_type != kern_member_idx) {
+ pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
+ map->name, mname, prog->name, prog->sec_name, prog->type,
+ prog->expected_attach_type, kern_member_idx);
+ return -EINVAL;
+ }
st_ops->kern_func_off[i] = kern_data_off + kern_moff;
@@ -1141,8 +1260,10 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
if (!bpf_map__is_struct_ops(map))
continue;
- err = bpf_map__init_kern_struct_ops(map, obj->btf,
- obj->btf_vmlinux);
+ if (!map->autocreate)
+ continue;
+
+ err = bpf_map__init_kern_struct_ops(map);
if (err)
return err;
}
@@ -1151,7 +1272,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
}
static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
- int shndx, Elf_Data *data, __u32 map_flags)
+ int shndx, Elf_Data *data)
{
const struct btf_type *type, *datasec;
const struct btf_var_secinfo *vsi;
@@ -1207,12 +1328,22 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
map->name = strdup(var_name);
if (!map->name)
return -ENOMEM;
+ map->btf_value_type_id = type_id;
+
+ /* Follow same convention as for programs autoload:
+ * SEC("?.struct_ops") means map is not created by default.
+ */
+ if (sec_name[0] == '?') {
+ map->autocreate = false;
+ /* from now on forget there was ? in section name */
+ sec_name++;
+ }
map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
map->def.key_size = sizeof(int);
map->def.value_size = type->size;
map->def.max_entries = 1;
- map->def.map_flags = map_flags;
+ map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
map->st_ops = calloc(1, sizeof(*map->st_ops));
if (!map->st_ops)
@@ -1247,15 +1378,25 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
static int bpf_object_init_struct_ops(struct bpf_object *obj)
{
- int err;
+ const char *sec_name;
+ int sec_idx, err;
- err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
- obj->efile.st_ops_data, 0);
- err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
- obj->efile.st_ops_link_shndx,
- obj->efile.st_ops_link_data,
- BPF_F_LINK);
- return err;
+ for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
+ struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
+
+ if (desc->sec_type != SEC_ST_OPS)
+ continue;
+
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ if (!sec_name)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static struct bpf_object *bpf_object__new(const char *path,
@@ -1293,8 +1434,6 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.obj_buf = obj_buf;
obj->efile.obj_buf_sz = obj_buf_sz;
obj->efile.btf_maps_shndx = -1;
- obj->efile.st_ops_shndx = -1;
- obj->efile.st_ops_link_shndx = -1;
obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
@@ -1311,8 +1450,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
elf_end(obj->efile.elf);
obj->efile.elf = NULL;
obj->efile.symbols = NULL;
- obj->efile.st_ops_data = NULL;
- obj->efile.st_ops_link_data = NULL;
+ obj->efile.arena_data = NULL;
zfree(&obj->efile.secs);
obj->efile.sec_cnt = 0;
@@ -1503,11 +1641,20 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
return ERR_PTR(-ENOENT);
}
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static int sys_memfd_create(const char *name, unsigned flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
static int create_placeholder_fd(void)
{
int fd;
- fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
+ fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
if (fd < 0)
return -errno;
return fd;
@@ -1546,7 +1693,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return map;
}
-static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
+static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
{
const long page_sz = sysconf(_SC_PAGE_SIZE);
size_t map_sz;
@@ -1556,6 +1703,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
return map_sz;
}
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
+
+ switch (map->def.type) {
+ case BPF_MAP_TYPE_ARRAY:
+ return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ case BPF_MAP_TYPE_ARENA:
+ return page_sz * map->def.max_entries;
+ default:
+ return 0; /* not supported */
+ }
+}
+
static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
{
void *mmaped;
@@ -1698,7 +1859,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->value_size = data_sz;
def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
- ? BPF_F_RDONLY_PROG : 0;
+ ? BPF_F_RDONLY_PROG : 0;
/* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map);
@@ -1709,7 +1870,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
map->name, map->sec_idx, map->sec_offset, def->map_flags);
- mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ mmap_sz = bpf_map_mmap_sz(map);
map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (map->mmaped == MAP_FAILED) {
@@ -2197,6 +2358,46 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return true;
}
+static bool get_map_field_long(const char *map_name, const struct btf *btf,
+ const struct btf_member *m, __u64 *res)
+{
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
+ const char *name = btf__name_by_offset(btf, m->name_off);
+
+ if (btf_is_ptr(t)) {
+ __u32 res32;
+ bool ret;
+
+ ret = get_map_field_int(map_name, btf, m, &res32);
+ if (ret)
+ *res = (__u64)res32;
+ return ret;
+ }
+
+ if (!btf_is_enum(t) && !btf_is_enum64(t)) {
+ pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
+ map_name, name, btf_kind_str(t));
+ return false;
+ }
+
+ if (btf_vlen(t) != 1) {
+ pr_warn("map '%s': attr '%s': invalid __ulong\n",
+ map_name, name);
+ return false;
+ }
+
+ if (btf_is_enum(t)) {
+ const struct btf_enum *e = btf_enum(t);
+
+ *res = e->val;
+ } else {
+ const struct btf_enum64 *e = btf_enum64(t);
+
+ *res = btf_enum64_value(e);
+ }
+ return true;
+}
+
static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
{
int len;
@@ -2216,7 +2417,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
int err;
if (!path)
- path = "/sys/fs/bpf";
+ path = BPF_FS_DEFAULT_PATH;
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
@@ -2430,9 +2631,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,
map_def->pinning = val;
map_def->parts |= MAP_DEF_PINNING;
} else if (strcmp(name, "map_extra") == 0) {
- __u32 map_extra;
+ __u64 map_extra;
- if (!get_map_field_int(map_name, btf, m, &map_extra))
+ if (!get_map_field_long(map_name, btf, m, &map_extra))
return -EINVAL;
map_def->map_extra = map_extra;
map_def->parts |= MAP_DEF_MAP_EXTRA;
@@ -2650,6 +2851,32 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return 0;
}
+static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
+ const char *sec_name, int sec_idx,
+ void *data, size_t data_sz)
+{
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t mmap_sz;
+
+ mmap_sz = bpf_map_mmap_sz(obj->arena_map);
+ if (roundup(data_sz, page_sz) > mmap_sz) {
+ pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
+ sec_name, mmap_sz, data_sz);
+ return -E2BIG;
+ }
+
+ obj->arena_data = malloc(data_sz);
+ if (!obj->arena_data)
+ return -ENOMEM;
+ memcpy(obj->arena_data, data, data_sz);
+ obj->arena_data_sz = data_sz;
+
+ /* make bpf_map__init_value() work for ARENA maps */
+ map->mmaped = obj->arena_data;
+
+ return 0;
+}
+
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
const char *pin_root_path)
{
@@ -2699,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return err;
}
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map *map = &obj->maps[i];
+
+ if (map->def.type != BPF_MAP_TYPE_ARENA)
+ continue;
+
+ if (obj->arena_map) {
+ pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
+ map->name, obj->arena_map->name);
+ return -EINVAL;
+ }
+ obj->arena_map = map;
+
+ if (obj->efile.arena_data) {
+ err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
+ obj->efile.arena_data->d_buf,
+ obj->efile.arena_data->d_size);
+ if (err)
+ return err;
+ }
+ }
+ if (obj->efile.arena_data && !obj->arena_map) {
+ pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
+ ARENA_SEC);
+ return -ENOENT;
+ }
+
return 0;
}
@@ -2731,6 +2985,11 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
return sh->sh_flags & SHF_EXECINSTR;
}
+static bool starts_with_qmark(const char *s)
+{
+ return s && s[0] == '?';
+}
+
static bool btf_needs_sanitization(struct bpf_object *obj)
{
bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
@@ -2740,9 +2999,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
return !has_func || !has_datasec || !has_func_global || !has_float ||
- !has_decl_tag || !has_type_tag || !has_enum64;
+ !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
}
static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
@@ -2754,6 +3014,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
+ bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
int enum64_placeholder_id = 0;
struct btf_type *t;
int i, j, vlen;
@@ -2780,7 +3041,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
name = (char *)btf__name_by_offset(btf, t->name_off);
while (*name) {
- if (*name == '.')
+ if (*name == '.' || *name == '?')
*name = '_';
name++;
}
@@ -2795,6 +3056,14 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
vt = (void *)btf__type_by_id(btf, v->type);
m->name_off = vt->name_off;
}
+ } else if (!has_qmark_datasec && btf_is_datasec(t) &&
+ starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
+ /* replace '?' prefix with '_' for DATASEC names */
+ char *name;
+
+ name = (char *)btf__name_by_offset(btf, t->name_off);
+ if (name[0] == '?')
+ name[0] = '_';
} else if (!has_func && btf_is_func_proto(t)) {
/* replace FUNC_PROTO with ENUM */
vlen = btf_vlen(t);
@@ -2848,14 +3117,13 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
- obj->efile.st_ops_shndx >= 0 ||
- obj->efile.st_ops_link_shndx >= 0 ||
+ obj->efile.has_st_ops ||
obj->nr_extern > 0;
}
static bool kernel_needs_btf(const struct bpf_object *obj)
{
- return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
+ return obj->efile.has_st_ops;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -3225,7 +3493,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
} else {
/* currently BPF_BTF_LOAD only supports log_level 1 */
err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
- obj->log_level ? 1 : 0);
+ obj->log_level ? 1 : 0, obj->token_fd);
}
if (sanitize) {
if (!err) {
@@ -3556,12 +3824,17 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RODATA;
sec_desc->shdr = sh;
sec_desc->data = data;
- } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
- obj->efile.st_ops_data = data;
- obj->efile.st_ops_shndx = idx;
- } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
- obj->efile.st_ops_link_data = data;
- obj->efile.st_ops_link_shndx = idx;
+ } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
+ strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
+ strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
+ strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
+ sec_desc->sec_type = SEC_ST_OPS;
+ sec_desc->shdr = sh;
+ sec_desc->data = data;
+ obj->efile.has_st_ops = true;
+ } else if (strcmp(name, ARENA_SEC) == 0) {
+ obj->efile.arena_data = data;
+ obj->efile.arena_data_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
@@ -3577,6 +3850,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (!section_have_execinstr(obj, targ_sec_idx) &&
strcmp(name, ".rel" STRUCT_OPS_SEC) &&
strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
+ strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
strcmp(name, ".rel" MAPS_ELF_SEC)) {
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
idx, name, targ_sec_idx,
@@ -4189,6 +4464,15 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ /* arena data relocation */
+ if (shdr_idx == obj->efile.arena_data_shndx) {
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = obj->arena_map - obj->maps;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@@ -4546,6 +4830,58 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
return 0;
}
+static int bpf_object_prepare_token(struct bpf_object *obj)
+{
+ const char *bpffs_path;
+ int bpffs_fd = -1, token_fd, err;
+ bool mandatory;
+ enum libbpf_print_level level;
+
+ /* token is explicitly prevented */
+ if (obj->token_path && obj->token_path[0] == '\0') {
+ pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
+ return 0;
+ }
+
+ mandatory = obj->token_path != NULL;
+ level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
+
+ bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
+ bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
+ if (bpffs_fd < 0) {
+ err = -errno;
+ __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
+ obj->name, err, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? err : 0;
+ }
+
+ token_fd = bpf_token_create(bpffs_fd, 0);
+ close(bpffs_fd);
+ if (token_fd < 0) {
+ if (!mandatory && token_fd == -ENOENT) {
+ pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
+ obj->name, bpffs_path);
+ return 0;
+ }
+ __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
+ obj->name, token_fd, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? token_fd : 0;
+ }
+
+ obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
+ if (!obj->feat_cache) {
+ close(token_fd);
+ return -ENOMEM;
+ }
+
+ obj->token_fd = token_fd;
+ obj->feat_cache->token_fd = token_fd;
+
+ return 0;
+}
+
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
@@ -4555,6 +4891,10 @@ bpf_object__probe_loading(struct bpf_object *obj)
BPF_EXIT_INSN(),
};
int ret, insn_cnt = ARRAY_SIZE(insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .token_fd = obj->token_fd,
+ .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
+ );
if (obj->gen_loader)
return 0;
@@ -4564,9 +4904,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
/* make sure basic loading works */
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0)
- ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4581,462 +4921,18 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
-static int probe_fd(int fd)
-{
- if (fd >= 0)
- close(fd);
- return fd >= 0;
-}
-
-static int probe_kern_prog_name(void)
-{
- const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- union bpf_attr attr;
- int ret;
-
- memset(&attr, 0, attr_sz);
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.license = ptr_to_u64("GPL");
- attr.insns = ptr_to_u64(insns);
- attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
- libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
-
- /* make sure loading with name works */
- ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
- return probe_fd(ret);
-}
-
-static int probe_kern_global_data(void)
-{
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, insn_cnt = ARRAY_SIZE(insns);
-
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- insns[0].imm = map;
-
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
- close(map);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func_global(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* static void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_datasec(void)
-{
- static const char strs[] = "\0x\0.data";
- /* static int a; */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* DATASEC val */ /* [3] */
- BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
- BTF_VAR_SECINFO_ENC(2, 0, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_float(void)
-{
- static const char strs[] = "\0float";
- __u32 types[] = {
- /* float */
- BTF_TYPE_FLOAT_ENC(1, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_decl_tag(void)
-{
- static const char strs[] = "\0tag";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* attr */
- BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_type_tag(void)
-{
- static const char strs[] = "\0tag";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* attr */
- BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
- /* ptr */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_array_mmap(void)
-{
- LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
- int fd;
-
- fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
- return probe_fd(fd);
-}
-
-static int probe_kern_exp_attach_type(void)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int fd, insn_cnt = ARRAY_SIZE(insns);
-
- /* use any valid combination of program type and (optional)
- * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
- * to see if kernel supports expected_attach_type field for
- * BPF_PROG_LOAD command
- */
- fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
- return probe_fd(fd);
-}
-
-static int probe_kern_probe_read_kernel(void)
-{
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
- BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
- BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- };
- int fd, insn_cnt = ARRAY_SIZE(insns);
-
- fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
- return probe_fd(fd);
-}
-
-static int probe_prog_bind_map(void)
-{
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
-
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
- if (prog < 0) {
- close(map);
- return 0;
- }
-
- ret = bpf_prog_bind_map(prog, map, NULL);
-
- close(map);
- close(prog);
-
- return ret >= 0;
-}
-
-static int probe_module_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
- struct bpf_btf_info info;
- __u32 len = sizeof(info);
- char name[16];
- int fd, err;
-
- fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
- if (fd < 0)
- return 0; /* BTF not supported at all */
-
- memset(&info, 0, sizeof(info));
- info.name = ptr_to_u64(name);
- info.name_len = sizeof(name);
-
- /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
- * kernel's module BTF support coincides with support for
- * name/name_len fields in struct bpf_btf_info.
- */
- err = bpf_btf_get_info_by_fd(fd, &info, &len);
- close(fd);
- return !err;
-}
-
-static int probe_perf_link(void)
-{
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int prog_fd, link_fd, err;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
- insns, ARRAY_SIZE(insns), NULL);
- if (prog_fd < 0)
- return -errno;
-
- /* use invalid perf_event FD to get EBADF, if link is supported;
- * otherwise EINVAL should be returned
- */
- link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
- err = -errno; /* close() can clobber errno */
-
- if (link_fd >= 0)
- close(link_fd);
- close(prog_fd);
-
- return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_uprobe_multi_link(void)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
- .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
- );
- LIBBPF_OPTS(bpf_link_create_opts, link_opts);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int prog_fd, link_fd, err;
- unsigned long offset = 0;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
- insns, ARRAY_SIZE(insns), &load_opts);
- if (prog_fd < 0)
- return -errno;
-
- /* Creating uprobe in '/' binary should fail with -EBADF. */
- link_opts.uprobe_multi.path = "/";
- link_opts.uprobe_multi.offsets = &offset;
- link_opts.uprobe_multi.cnt = 1;
-
- link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
- err = -errno; /* close() can clobber errno */
-
- if (link_fd >= 0)
- close(link_fd);
- close(prog_fd);
-
- return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_kern_bpf_cookie(void)
-{
- struct bpf_insn insns[] = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
- BPF_EXIT_INSN(),
- };
- int ret, insn_cnt = ARRAY_SIZE(insns);
-
- ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf_enum64(void)
-{
- static const char strs[] = "\0enum64";
- __u32 types[] = {
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_syscall_wrapper(void);
-
-enum kern_feature_result {
- FEAT_UNKNOWN = 0,
- FEAT_SUPPORTED = 1,
- FEAT_MISSING = 2,
-};
-
-typedef int (*feature_probe_fn)(void);
-
-static struct kern_feature_desc {
- const char *desc;
- feature_probe_fn probe;
- enum kern_feature_result res;
-} feature_probes[__FEAT_CNT] = {
- [FEAT_PROG_NAME] = {
- "BPF program name", probe_kern_prog_name,
- },
- [FEAT_GLOBAL_DATA] = {
- "global variables", probe_kern_global_data,
- },
- [FEAT_BTF] = {
- "minimal BTF", probe_kern_btf,
- },
- [FEAT_BTF_FUNC] = {
- "BTF functions", probe_kern_btf_func,
- },
- [FEAT_BTF_GLOBAL_FUNC] = {
- "BTF global function", probe_kern_btf_func_global,
- },
- [FEAT_BTF_DATASEC] = {
- "BTF data section and variable", probe_kern_btf_datasec,
- },
- [FEAT_ARRAY_MMAP] = {
- "ARRAY map mmap()", probe_kern_array_mmap,
- },
- [FEAT_EXP_ATTACH_TYPE] = {
- "BPF_PROG_LOAD expected_attach_type attribute",
- probe_kern_exp_attach_type,
- },
- [FEAT_PROBE_READ_KERN] = {
- "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
- },
- [FEAT_PROG_BIND_MAP] = {
- "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
- },
- [FEAT_MODULE_BTF] = {
- "module BTF support", probe_module_btf,
- },
- [FEAT_BTF_FLOAT] = {
- "BTF_KIND_FLOAT support", probe_kern_btf_float,
- },
- [FEAT_PERF_LINK] = {
- "BPF perf link support", probe_perf_link,
- },
- [FEAT_BTF_DECL_TAG] = {
- "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
- },
- [FEAT_BTF_TYPE_TAG] = {
- "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
- },
- [FEAT_MEMCG_ACCOUNT] = {
- "memcg-based memory accounting", probe_memcg_account,
- },
- [FEAT_BPF_COOKIE] = {
- "BPF cookie support", probe_kern_bpf_cookie,
- },
- [FEAT_BTF_ENUM64] = {
- "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
- },
- [FEAT_SYSCALL_WRAPPER] = {
- "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
- },
- [FEAT_UPROBE_MULTI_LINK] = {
- "BPF multi-uprobe link support", probe_uprobe_multi_link,
- },
-};
-
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
- struct kern_feature_desc *feat = &feature_probes[feat_id];
- int ret;
-
- if (obj && obj->gen_loader)
+ if (obj->gen_loader)
/* To generate loader program assume the latest kernel
* to avoid doing extra prog_load, map_create syscalls.
*/
return true;
- if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
- ret = feat->probe();
- if (ret > 0) {
- WRITE_ONCE(feat->res, FEAT_SUPPORTED);
- } else if (ret == 0) {
- WRITE_ONCE(feat->res, FEAT_MISSING);
- } else {
- pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
- WRITE_ONCE(feat->res, FEAT_MISSING);
- }
- }
+ if (obj->token_fd)
+ return feat_supported(obj->feat_cache, feat_id);
- return READ_ONCE(feat->res) == FEAT_SUPPORTED;
+ return feat_supported(NULL, feat_id);
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -5117,6 +5013,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
return 0;
}
+
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) {
err = -errno;
@@ -5160,9 +5057,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.map_flags = def->map_flags;
create_attr.numa_node = map->numa_node;
create_attr.map_extra = map->map_extra;
+ create_attr.token_fd = obj->token_fd;
+ if (obj->token_fd)
+ create_attr.map_flags |= BPF_F_TOKEN_FD;
- if (bpf_map__is_struct_ops(map))
+ if (bpf_map__is_struct_ops(map)) {
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
+ if (map->mod_btf_fd >= 0) {
+ create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
+ create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
+ }
+ }
if (obj->btf && btf__fd(obj->btf) >= 0) {
create_attr.btf_fd = btf__fd(obj->btf);
@@ -5172,6 +5077,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
if (bpf_map_type__is_map_in_map(def->type)) {
if (map->inner_map) {
+ err = map_set_def_max_entries(map->inner_map);
+ if (err)
+ return err;
err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
@@ -5198,11 +5106,16 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_QUEUE:
case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_ARENA:
create_attr.btf_fd = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
map->btf_value_type_id = 0;
+ break;
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ create_attr.btf_value_type_id = 0;
+ break;
default:
break;
}
@@ -5438,7 +5351,23 @@ retry:
if (err < 0)
goto err_out;
}
-
+ if (map->def.type == BPF_MAP_TYPE_ARENA) {
+ map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
+ PROT_READ | PROT_WRITE,
+ map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
+ map->fd, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("map '%s': failed to mmap arena: %d\n",
+ map->name, err);
+ return err;
+ }
+ if (obj->arena_data) {
+ memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
+ zfree(&obj->arena_data);
+ }
+ }
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
err = init_map_in_map_slots(obj, map);
if (err < 0)
@@ -6695,6 +6624,14 @@ static struct {
/* all other program types don't have "named" context structs */
};
+/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
+ * for below __builtin_types_compatible_p() checks;
+ * with this approach we don't need any extra arch-specific #ifdef guards
+ */
+struct pt_regs;
+struct user_pt_regs;
+struct user_regs_struct;
+
static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
const char *subprog_name, int arg_idx,
int arg_type_id, const char *ctx_name)
@@ -6735,11 +6672,21 @@ static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_pro
/* special cases */
switch (prog->type) {
case BPF_PROG_TYPE_KPROBE:
- case BPF_PROG_TYPE_PERF_EVENT:
/* `struct pt_regs *` is expected, but we need to fix up */
if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
return true;
break;
+ case BPF_PROG_TYPE_PERF_EVENT:
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
+ btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
+ return true;
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
+ btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
+ return true;
+ if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
+ btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
+ return true;
+ break;
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
/* allow u64* as ctx */
@@ -6818,69 +6765,6 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr
return fn_id;
}
-static int probe_kern_arg_ctx_tag(void)
-{
- /* To minimize merge conflicts with BPF token series that refactors
- * feature detection code a lot, we don't integrate
- * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection
- * framework yet, doing our own caching internally.
- * This will be cleaned up a bit later when bpf/bpf-next trees settle.
- */
- static int cached_result = -1;
- static const char strs[] = "\0a\0b\0arg:ctx\0";
- const __u32 types[] = {
- /* [1] INT */
- BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
- /* [2] PTR -> VOID */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
- /* [3] FUNC_PROTO `int(void *a)` */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
- BTF_PARAM_ENC(1 /* "a" */, 2),
- /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
- BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
- /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
- BTF_PARAM_ENC(3 /* "b" */, 2),
- /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
- BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
- /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
- BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
- };
- const struct bpf_insn insns[] = {
- /* main prog */
- BPF_CALL_REL(+1),
- BPF_EXIT_INSN(),
- /* global subprog */
- BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
- BPF_EXIT_INSN(),
- };
- const struct bpf_func_info_min func_infos[] = {
- { 0, 4 }, /* main prog -> FUNC 'a' */
- { 2, 6 }, /* subprog -> FUNC 'b' */
- };
- LIBBPF_OPTS(bpf_prog_load_opts, opts);
- int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);
-
- if (cached_result >= 0)
- return cached_result;
-
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
- if (btf_fd < 0)
- return 0;
-
- opts.prog_btf_fd = btf_fd;
- opts.func_info = &func_infos;
- opts.func_info_cnt = ARRAY_SIZE(func_infos);
- opts.func_info_rec_size = sizeof(func_infos[0]);
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
- "GPL", insns, insn_cnt, &opts);
- close(btf_fd);
-
- cached_result = probe_fd(prog_fd);
- return cached_result;
-}
-
/* Check if main program or global subprog's function prototype has `arg:ctx`
* argument tags, and, if necessary, substitute correct type to match what BPF
* verifier would expect, taking into account specific program type. This
@@ -6905,7 +6789,7 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra
return 0;
/* don't do any fix ups if kernel natively supports __arg_ctx */
- if (probe_kern_arg_ctx_tag() > 0)
+ if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
return 0;
/* some BPF program types just don't have named context structs, so
@@ -7292,12 +7176,12 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
data = sec_desc->data;
idx = shdr->sh_info;
- if (shdr->sh_type != SHT_REL) {
+ if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
pr_warn("internal error at %d\n", __LINE__);
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
+ if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
@@ -7473,6 +7357,10 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
load_attr.prog_flags = prog->prog_flags;
load_attr.fd_array = obj->fd_array;
+ load_attr.token_fd = obj->token_fd;
+ if (obj->token_fd)
+ load_attr.prog_flags |= BPF_F_TOKEN_FD;
+
/* adjust load_attr if sec_def provides custom preload callback */
if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
@@ -7918,7 +7806,7 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig, *btf_tmp_path;
+ const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
struct bpf_object *obj;
char tmp_name[64];
int err;
@@ -7955,6 +7843,16 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
if (log_size && !log_buf)
return ERR_PTR(-EINVAL);
+ token_path = OPTS_GET(opts, bpf_token_path, NULL);
+ /* if user didn't specify bpf_token_path explicitly, check if
+ * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
+ * option
+ */
+ if (!token_path)
+ token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
+ if (token_path && strlen(token_path) >= PATH_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
if (IS_ERR(obj))
return obj;
@@ -7963,6 +7861,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
obj->log_size = log_size;
obj->log_level = log_level;
+ if (token_path) {
+ obj->token_path = strdup(token_path);
+ if (!obj->token_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
if (btf_tmp_path) {
if (strlen(btf_tmp_path) >= PATH_MAX) {
@@ -8449,11 +8355,20 @@ static void bpf_map_prepare_vdata(const struct bpf_map *map)
static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
{
+ struct bpf_map *map;
int i;
- for (i = 0; i < obj->nr_maps; i++)
- if (bpf_map__is_struct_ops(&obj->maps[i]))
- bpf_map_prepare_vdata(&obj->maps[i]);
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &obj->maps[i];
+
+ if (!bpf_map__is_struct_ops(map))
+ continue;
+
+ if (!map->autocreate)
+ continue;
+
+ bpf_map_prepare_vdata(map);
+ }
return 0;
}
@@ -8473,11 +8388,13 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
if (obj->gen_loader)
bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
- err = bpf_object__probe_loading(obj);
+ err = bpf_object_prepare_token(obj);
+ err = err ? : bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+ err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__create_maps(obj);
@@ -8947,13 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map)
zfree(&map->init_slots);
map->init_slots_sz = 0;
- if (map->mmaped) {
- size_t mmap_sz;
-
- mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
- munmap(map->mmaped, mmap_sz);
- map->mmaped = NULL;
- }
+ if (map->mmaped && map->mmaped != map->obj->arena_data)
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
+ map->mmaped = NULL;
if (map->st_ops) {
zfree(&map->st_ops->data);
@@ -9008,6 +8921,13 @@ void bpf_object__close(struct bpf_object *obj)
}
zfree(&obj->programs);
+ zfree(&obj->feat_cache);
+ zfree(&obj->token_path);
+ if (obj->token_fd > 0)
+ close(obj->token_fd);
+
+ zfree(&obj->arena_data);
+
free(obj);
}
@@ -9668,7 +9588,9 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
return NULL;
}
-/* Collect the reloc from ELF and populate the st_ops->progs[] */
+/* Collect the reloc from ELF, populate the st_ops->progs[], and update
+ * st_ops->data for shadow type.
+ */
static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
Elf64_Shdr *shdr, Elf_Data *data)
{
@@ -9760,28 +9682,15 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -EINVAL;
}
- /* if we haven't yet processed this BPF program, record proper
- * attach_btf_id and member_idx
- */
- if (!prog->attach_btf_id) {
- prog->attach_btf_id = st_ops->type_id;
- prog->expected_attach_type = member_idx;
- }
+ st_ops->progs[member_idx] = prog;
- /* struct_ops BPF prog can be re-used between multiple
- * .struct_ops & .struct_ops.link as long as it's the
- * same struct_ops struct definition and the same
- * function pointer field
+ /* st_ops->data will be exposed to users, being returned by
+ * bpf_map__initial_value() as a pointer to the shadow
+ * type. All function pointers in the original struct type
+ * should be converted to a pointer to struct bpf_program
+ * in the shadow type.
*/
- if (prog->attach_btf_id != st_ops->type_id ||
- prog->expected_attach_type != member_idx) {
- pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
- map->name, prog->name, prog->sec_name, prog->type,
- prog->attach_btf_id, prog->expected_attach_type, name);
- return -EINVAL;
- }
-
- st_ops->progs[member_idx] = prog;
+ *((struct bpf_program **)(st_ops->data + moff)) = prog;
}
return 0;
@@ -9966,7 +9875,9 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
*btf_obj_fd = 0;
*btf_type_id = 1;
} else {
- err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+ err = find_kernel_btf_id(prog->obj, attach_name,
+ attach_type, btf_obj_fd,
+ btf_type_id);
}
if (err) {
pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
@@ -10188,11 +10099,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
return libbpf_err(-EBUSY);
if (map->mmaped) {
- int err;
size_t mmap_old_sz, mmap_new_sz;
+ int err;
- mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
- mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
+ if (map->def.type != BPF_MAP_TYPE_ARRAY)
+ return -EOPNOTSUPP;
+
+ mmap_old_sz = bpf_map_mmap_sz(map);
+ mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
if (err) {
pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
@@ -10225,22 +10139,41 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size)
{
+ size_t actual_sz;
+
if (map->obj->loaded || map->reused)
return libbpf_err(-EBUSY);
- if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
- size != map->def.value_size)
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
+ return libbpf_err(-EINVAL);
+
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ actual_sz = map->obj->arena_data_sz;
+ else
+ actual_sz = map->def.value_size;
+ if (size != actual_sz)
return libbpf_err(-EINVAL);
memcpy(map->mmaped, data, size);
return 0;
}
-void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
{
+ if (bpf_map__is_struct_ops(map)) {
+ if (psize)
+ *psize = map->def.value_size;
+ return map->st_ops->data;
+ }
+
if (!map->mmaped)
return NULL;
- *psize = map->def.value_size;
+
+ if (map->def.type == BPF_MAP_TYPE_ARENA)
+ *psize = map->obj->arena_data_sz;
+ else
+ *psize = map->def.value_size;
+
return map->mmaped;
}
@@ -11028,7 +10961,7 @@ static const char *arch_specific_syscall_pfx(void)
#endif
}
-static int probe_kern_syscall_wrapper(void)
+int probe_kern_syscall_wrapper(int token_fd)
{
char syscall_name[64];
const char *ksys_pfx;
@@ -13717,7 +13650,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->map_cnt; i++) {
struct bpf_map *map = *s->maps[i].map;
- size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ size_t mmap_sz = bpf_map_mmap_sz(map);
int prot, map_fd = map->fd;
void **mmaped = s->maps[i].mmaped;
@@ -13729,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
continue;
}
+ if (map->def.type == BPF_MAP_TYPE_ARENA) {
+ *mmaped = map->mmaped;
+ continue;
+ }
+
if (map->def.map_flags & BPF_F_RDONLY_PROG)
prot = PROT_READ;
else
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6cd9c501624f..7b510761f545 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -177,10 +177,29 @@ struct bpf_object_open_opts {
* logs through its print callback.
*/
__u32 kernel_log_level;
+ /* Path to BPF FS mount point to derive BPF token from.
+ *
+ * Created BPF token will be used for all bpf() syscall operations
+ * that accept BPF token (e.g., map creation, BTF and program loads,
+ * etc) automatically within instantiated BPF object.
+ *
+ * If bpf_token_path is not specified, libbpf will consult
+ * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be
+ * taken as a value of bpf_token_path option and will force libbpf to
+ * either create BPF token from provided custom BPF FS path, or will
+ * disable implicit BPF token creation, if envvar value is an empty
+ * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are
+ * set at the same time.
+ *
+ * Setting bpf_token_path option to empty string disables libbpf's
+ * automatic attempt to create BPF token from default BPF FS mount
+ * point (/sys/fs/bpf), in case this default behavior is undesirable.
+ */
+ const char *bpf_token_path;
size_t :0;
};
-#define bpf_object_open_opts__last_field kernel_log_level
+#define bpf_object_open_opts__last_field bpf_token_path
/**
* @brief **bpf_object__open()** creates a bpf_object by opening
@@ -995,7 +1014,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
+LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize);
/**
* @brief **bpf_map__is_internal()** tells the caller whether or not the
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 91c5aef7dae7..86804fd90dd1 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -245,7 +245,6 @@ LIBBPF_0.3.0 {
btf__parse_raw_split;
btf__parse_split;
btf__new_empty_split;
- btf__new_split;
ring_buffer__epoll_fd;
} LIBBPF_0.2.0;
@@ -326,7 +325,6 @@ LIBBPF_0.7.0 {
bpf_xdp_detach;
bpf_xdp_query;
bpf_xdp_query_id;
- btf_ext__raw_data;
libbpf_probe_bpf_helper;
libbpf_probe_bpf_map_type;
libbpf_probe_bpf_prog_type;
@@ -411,4 +409,8 @@ LIBBPF_1.3.0 {
} LIBBPF_1.2.0;
LIBBPF_1.4.0 {
+ global:
+ bpf_token_create;
+ btf__new_split;
+ btf_ext__raw_data;
} LIBBPF_1.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 27e4e320e1a6..864b36177424 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -15,9 +15,24 @@
#include <linux/err.h>
#include <fcntl.h>
#include <unistd.h>
+#include <sys/syscall.h>
#include <libelf.h>
#include "relo_core.h"
+/* Android's libc doesn't support AT_EACCESS in faccessat() implementation
+ * ([0]), and just returns -EINVAL even if file exists and is accessible.
+ * See [1] for issues caused by this.
+ *
+ * So just redefine it to 0 on Android.
+ *
+ * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50
+ * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250
+ */
+#ifdef __ANDROID__
+#undef AT_EACCESS
+#define AT_EACCESS 0
+#endif
+
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -357,18 +372,39 @@ enum kern_feature_id {
FEAT_SYSCALL_WRAPPER,
/* BPF multi-uprobe link support */
FEAT_UPROBE_MULTI_LINK,
+ /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */
+ FEAT_ARG_CTX_TAG,
+ /* Kernel supports '?' at the front of datasec names */
+ FEAT_BTF_QMARK_DATASEC,
__FEAT_CNT,
};
-int probe_memcg_account(void);
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+struct kern_feature_cache {
+ enum kern_feature_result res[__FEAT_CNT];
+ int token_fd;
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+
+int probe_kern_syscall_wrapper(int token_fd);
+int probe_memcg_account(int token_fd);
int bump_rlimit_memlock(void);
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
+ const char *str_sec, size_t str_len,
+ int token_fd);
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -532,6 +568,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
+/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
+ * Original FD is not closed or altered in any other way.
+ * Preserves original FD value, if it's invalid (negative).
+ */
+static inline int dup_good_fd(int fd)
+{
+ if (fd < 0)
+ return fd;
+ return fcntl(fd, F_DUPFD_CLOEXEC, 3);
+}
+
/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
* Takes ownership of the fd passed in, and closes it if calling
* fcntl(fd, F_DUPFD_CLOEXEC, 3).
@@ -543,7 +590,7 @@ static inline int ensure_good_fd(int fd)
if (fd < 0)
return fd;
if (fd < 3) {
- fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ fd = dup_good_fd(fd);
saved_errno = errno;
close(old_fd);
errno = saved_errno;
@@ -555,6 +602,15 @@ static inline int ensure_good_fd(int fd)
return fd;
}
+static inline int sys_dup2(int oldfd, int newfd)
+{
+#ifdef __NR_dup2
+ return syscall(__NR_dup2, oldfd, newfd);
+#else
+ return syscall(__NR_dup3, oldfd, newfd, 0);
+#endif
+}
+
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
* Regardless of success, *tmp_fd* is closed.
* Whatever *fixed_fd* pointed to is closed silently.
@@ -563,7 +619,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd)
{
int err;
- err = dup2(tmp_fd, fixed_fd);
+ err = sys_dup2(tmp_fd, fixed_fd);
err = err < 0 ? -errno : 0;
close(tmp_fd); /* clean up temporary FD */
return err;
@@ -613,4 +669,6 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt,
int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
unsigned long **poffsets, size_t *pcnt);
+int probe_fd(int fd);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9c4db90b92b6..302188122439 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
}
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len)
+ const char *str_sec, size_t str_len,
+ int token_fd)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
@@ -229,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
.str_off = types_len,
.str_len = str_len,
};
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .token_fd = token_fd,
+ .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+ );
int btf_fd, btf_len;
__u8 *raw_btf;
@@ -241,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
- btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
+ btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
free(raw_btf);
return btf_fd;
@@ -271,7 +276,7 @@ static int load_local_storage_btf(void)
};
return libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
+ strs, sizeof(strs), 0);
}
static int probe_map_create(enum bpf_map_type map_type)
@@ -326,12 +331,20 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_STRUCT_OPS:
/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
opts.btf_vmlinux_value_type_id = 1;
+ opts.value_type_btf_obj_fd = -1;
exp_err = -524; /* -ENOTSUPP */
break;
case BPF_MAP_TYPE_BLOOM_FILTER:
key_size = 0;
max_entries = 1;
break;
+ case BPF_MAP_TYPE_ARENA:
+ key_size = 0;
+ value_size = 0;
+ max_entries = 1; /* one page */
+ opts.map_extra = 0; /* can mmap() at any address */
+ opts.map_flags = BPF_F_MMAPABLE;
+ break;
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PROG_ARRAY:
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 16bca56002ab..0d4be829551b 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -2732,7 +2732,7 @@ static int finalize_btf(struct bpf_linker *linker)
/* Emit .BTF.ext section */
if (linker->btf_ext) {
- raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz);
+ raw_data = btf_ext__raw_data(linker->btf_ext, &raw_sz);
if (!raw_data)
return -ENOMEM;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 090bcf6e3b3d..68a2def17175 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -496,8 +496,8 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
if (err)
return libbpf_err(err);
- opts->feature_flags = md.flags;
- opts->xdp_zc_max_segs = md.xdp_zc_max_segs;
+ OPTS_SET(opts, feature_flags, md.flags);
+ OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs);
skip_feature_flags:
return 0;
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index a139334d57b6..626d7ffb03d6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -2,5 +2,8 @@
#ifndef __LIBBPF_STR_ERROR_H
#define __LIBBPF_STR_ERROR_H
+#define STRERR_BUFSIZE 128
+
char *libbpf_strerror_r(int err, char *dst, int len);
+
#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index da1aa10bbcc3..8e9e09d84e26 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -11,11 +11,11 @@ $(SUBDIRS):
$(MAKE) -C $@ ; \
fi
-clean hardclean:
+clean distclean:
@for dir in $(SUBDIRS) ; do \
if [ -f "$$dir/Makefile" ] ; then \
$(MAKE) -C $$dir $@; \
fi \
done
-.PHONY: clean all $(SUBDIRS)
+.PHONY: all clean distclean $(SUBDIRS)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 3110f84dd029..07373c5a7afe 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -15,7 +15,12 @@ UAPI_PATH:=../../../../include/uapi/
get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
+CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
+CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py
index 2ad9ec0f5545..f131e33ac3ee 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/cli.py
@@ -6,7 +6,16 @@ import json
import pprint
import time
-from lib import YnlFamily, Netlink
+from lib import YnlFamily, Netlink, NlError
+
+
+class YnlEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, bytes):
+ return bytes.hex(obj)
+ if isinstance(obj, set):
+ return list(obj)
+ return json.JSONEncoder.default(self, obj)
def main():
@@ -28,8 +37,17 @@ def main():
parser.add_argument('--append', dest='flags', action='append_const',
const=Netlink.NLM_F_APPEND)
parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction)
+ parser.add_argument('--output-json', action='store_true')
+ parser.add_argument('--dbg-small-recv', default=0, const=4000,
+ action='store', nargs='?', type=int)
args = parser.parse_args()
+ def output(msg):
+ if args.output_json:
+ print(json.dumps(msg, cls=YnlEncoder))
+ else:
+ pprint.PrettyPrinter().pprint(msg)
+
if args.no_schema:
args.schema = ''
@@ -37,7 +55,10 @@ def main():
if args.json_text:
attrs = json.loads(args.json_text)
- ynl = YnlFamily(args.spec, args.schema, args.process_unknown)
+ ynl = YnlFamily(args.spec, args.schema, args.process_unknown,
+ recv_size=args.dbg_small_recv)
+ if args.dbg_small_recv:
+ ynl.set_recv_dbg(True)
if args.ntf:
ynl.ntf_subscribe(args.ntf)
@@ -45,16 +66,20 @@ def main():
if args.sleep:
time.sleep(args.sleep)
- if args.do:
- reply = ynl.do(args.do, attrs, args.flags)
- pprint.PrettyPrinter().pprint(reply)
- if args.dump:
- reply = ynl.dump(args.dump, attrs)
- pprint.PrettyPrinter().pprint(reply)
+ try:
+ if args.do:
+ reply = ynl.do(args.do, attrs, args.flags)
+ output(reply)
+ if args.dump:
+ reply = ynl.dump(args.dump, attrs)
+ output(reply)
+ except NlError as e:
+ print(e)
+ exit(1)
if args.ntf:
ynl.check_ntf()
- pprint.PrettyPrinter().pprint(ynl.async_msg_queue)
+ output(ynl.async_msg_queue)
if __name__ == "__main__":
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 84cbabdd02a8..713f5fb9cc2d 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -14,7 +14,10 @@ YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \
TOOL:=../ynl-gen-c.py
-GENS:=ethtool devlink handshake fou netdev nfsd
+GENS_PATHS=$(shell grep -nrI --files-without-match \
+ 'protocol: netlink' \
+ ../../../../Documentation/netlink/specs/)
+GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS})
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
@@ -40,11 +43,11 @@ protos.a: $(OBJS)
clean:
rm -f *.o
-hardclean: clean
+distclean: clean
rm -f *.c *.h *.a
regen:
@../ynl-regen.sh
-.PHONY: all clean hardclean regen
+.PHONY: all clean distclean regen
.DEFAULT_GOAL: all
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
index d2e50fd0a52d..dfff3ecd1cba 100644
--- a/tools/net/ynl/lib/Makefile
+++ b/tools/net/ynl/lib/Makefile
@@ -17,12 +17,13 @@ ynl.a: $(OBJS)
ar rcs $@ $(OBJS)
clean:
rm -f *.o *.d *~
+ rm -rf __pycache__
-hardclean: clean
+distclean: clean
rm -f *.a
%.o: %.c
$(COMPILE.c) -MMD -c -o $@ $<
-.PHONY: all clean
+.PHONY: all clean distclean
.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py
index f7eaa07783e7..9137b83e580a 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/lib/__init__.py
@@ -2,7 +2,7 @@
from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
SpecFamily, SpecOperation
-from .ynl import YnlFamily, Netlink
+from .ynl import YnlFamily, Netlink, NlError
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
- "SpecFamily", "SpecOperation", "YnlFamily", "Netlink"]
+ "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"]
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 44f13e383e8a..6d08ab9e213f 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -144,7 +144,7 @@ class SpecEnumSet(SpecElement):
class SpecAttr(SpecElement):
- """ Single Netlink atttribute type
+ """ Single Netlink attribute type
Represents a single attribute type within an attr space.
@@ -248,6 +248,7 @@ class SpecStructMember(SpecElement):
len integer, optional byte length of binary types
display_hint string, hint to help choose format specifier
when displaying the value
+ struct string, name of nested struct type
"""
def __init__(self, family, yaml):
super().__init__(family, yaml)
@@ -256,6 +257,7 @@ class SpecStructMember(SpecElement):
self.enum = yaml.get('enum')
self.len = yaml.get('len')
self.display_hint = yaml.get('display-hint')
+ self.struct = yaml.get('struct')
class SpecStruct(SpecElement):
@@ -306,10 +308,9 @@ class SpecSubMessage(SpecElement):
class SpecSubMessageFormat(SpecElement):
- """ Netlink sub-message definition
+ """ Netlink sub-message format definition
- Represents a set of sub-message formats for polymorphic nlattrs
- that contain type-specific sub messages.
+ Represents a single format for a sub-message.
Attributes:
value attribute value to match against type selector
@@ -417,6 +418,7 @@ class SpecFamily(SpecElement):
consts dict of all constants/enums
fixed_header string, optional name of family default fixed header struct
mcast_groups dict of all multicast groups (index by name)
+ kernel_family dict of kernel family attributes
"""
def __init__(self, spec_path, schema_path=None, exclude_ops=None):
with open(spec_path, "r") as stream:
@@ -460,6 +462,7 @@ class SpecFamily(SpecElement):
self.ntfs = collections.OrderedDict()
self.consts = collections.OrderedDict()
self.mcast_groups = collections.OrderedDict()
+ self.kernel_family = collections.OrderedDict(self.yaml.get('kernel-family', {}))
last_exception = None
while len(self._resolution_list) > 0:
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 7491da8e7555..6cf890080dc0 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -2,16 +2,16 @@
#ifndef __YNL_C_PRIV_H
#define __YNL_C_PRIV_H 1
+#include <stdbool.h>
#include <stddef.h>
-#include <libmnl/libmnl.h>
#include <linux/types.h>
+struct ynl_parse_arg;
+
/*
* YNL internals / low level stuff
*/
-/* Generic mnl helper code */
-
enum ynl_policy_type {
YNL_PT_REJECT = 1,
YNL_PT_IGNORE,
@@ -27,6 +27,20 @@ enum ynl_policy_type {
YNL_PT_BITFIELD32,
};
+enum ynl_parse_result {
+ YNL_PARSE_CB_ERROR = -1,
+ YNL_PARSE_CB_STOP = 0,
+ YNL_PARSE_CB_OK = 1,
+};
+
+#define YNL_SOCKET_BUFFER_SIZE (1 << 17)
+
+#define YNL_ARRAY_SIZE(array) (sizeof(array) ? \
+ sizeof(array) / sizeof(array[0]) : 0)
+
+typedef int (*ynl_parse_cb_t)(const struct nlmsghdr *nlh,
+ struct ynl_parse_arg *yarg);
+
struct ynl_policy_attr {
enum ynl_policy_type type;
unsigned int len;
@@ -80,8 +94,6 @@ struct ynl_ntf_base_type {
unsigned char data[] __attribute__((aligned(8)));
};
-extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE];
-
struct nlmsghdr *
ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
@@ -89,30 +101,26 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
-int ynl_recv_ack(struct ynl_sock *ys, int ret);
-int ynl_cb_null(const struct nlmsghdr *nlh, void *data);
-
/* YNL specific helpers used by the auto-generated code */
struct ynl_req_state {
struct ynl_parse_arg yarg;
- mnl_cb_t cb;
+ ynl_parse_cb_t cb;
__u32 rsp_cmd;
};
struct ynl_dump_state {
- struct ynl_sock *ys;
- struct ynl_policy_nest *rsp_policy;
+ struct ynl_parse_arg yarg;
void *first;
struct ynl_dump_list_type *last;
size_t alloc_sz;
- mnl_cb_t cb;
+ ynl_parse_cb_t cb;
__u32 rsp_cmd;
};
struct ynl_ntf_info {
struct ynl_policy_nest *policy;
- mnl_cb_t cb;
+ ynl_parse_cb_t cb;
size_t alloc_sz;
void (*free)(struct ynl_ntf_base_type *ntf);
};
@@ -125,20 +133,325 @@ int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd);
int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
-#ifndef MNL_HAS_AUTO_SCALARS
-static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr)
+/* Netlink message handling helpers */
+
+#define YNL_MSG_OVERFLOW 1
+
+static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf)
+{
+ struct nlmsghdr *nlh = buf;
+
+ memset(nlh, 0, sizeof(*nlh));
+ nlh->nlmsg_len = NLMSG_HDRLEN;
+
+ return nlh;
+}
+
+static inline unsigned int ynl_nlmsg_data_len(const struct nlmsghdr *nlh)
{
- if (mnl_attr_get_payload_len(attr) == 4)
- return mnl_attr_get_u32(attr);
- return mnl_attr_get_u64(attr);
+ return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+static inline void *ynl_nlmsg_data(const struct nlmsghdr *nlh)
+{
+ return (unsigned char *)nlh + NLMSG_HDRLEN;
+}
+
+static inline void *
+ynl_nlmsg_data_offset(const struct nlmsghdr *nlh, unsigned int offset)
+{
+ return (unsigned char *)nlh + NLMSG_HDRLEN + offset;
+}
+
+static inline void *ynl_nlmsg_end_addr(const struct nlmsghdr *nlh)
+{
+ return (char *)nlh + nlh->nlmsg_len;
+}
+
+static inline void *
+ynl_nlmsg_put_extra_header(struct nlmsghdr *nlh, unsigned int size)
+{
+ void *tail = ynl_nlmsg_end_addr(nlh);
+
+ nlh->nlmsg_len += NLMSG_ALIGN(size);
+ return tail;
+}
+
+/* Netlink attribute helpers */
+
+static inline unsigned int ynl_attr_type(const struct nlattr *attr)
+{
+ return attr->nla_type & NLA_TYPE_MASK;
+}
+
+static inline unsigned int ynl_attr_data_len(const struct nlattr *attr)
+{
+ return attr->nla_len - NLA_HDRLEN;
+}
+
+static inline void *ynl_attr_data(const struct nlattr *attr)
+{
+ return (unsigned char *)attr + NLA_HDRLEN;
+}
+
+static inline void *ynl_attr_data_end(const struct nlattr *attr)
+{
+ return ynl_attr_data(attr) + ynl_attr_data_len(attr);
+}
+
+#define ynl_attr_for_each(attr, nlh, fixed_hdr_sz) \
+ for ((attr) = ynl_attr_first(nlh, (nlh)->nlmsg_len, \
+ NLMSG_HDRLEN + fixed_hdr_sz); attr; \
+ (attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr))
+
+#define ynl_attr_for_each_nested(attr, outer) \
+ for ((attr) = ynl_attr_first(outer, outer->nla_len, \
+ sizeof(struct nlattr)); attr; \
+ (attr) = ynl_attr_next(ynl_attr_data_end(outer), attr))
+
+#define ynl_attr_for_each_payload(start, len, attr) \
+ for ((attr) = ynl_attr_first(start, len, 0); attr; \
+ (attr) = ynl_attr_next(start + len, attr))
+
+static inline struct nlattr *
+ynl_attr_if_good(const void *end, struct nlattr *attr)
+{
+ if (attr + 1 > (const struct nlattr *)end)
+ return NULL;
+ if (ynl_attr_data_end(attr) > end)
+ return NULL;
+ return attr;
+}
+
+static inline struct nlattr *
+ynl_attr_next(const void *end, const struct nlattr *prev)
+{
+ struct nlattr *attr;
+
+ attr = (void *)((char *)prev + NLA_ALIGN(prev->nla_len));
+ return ynl_attr_if_good(end, attr);
+}
+
+static inline struct nlattr *
+ynl_attr_first(const void *start, size_t len, size_t skip)
+{
+ struct nlattr *attr;
+
+ attr = (void *)((char *)start + NLMSG_ALIGN(skip));
+ return ynl_attr_if_good(start + len, attr);
+}
+
+static inline bool
+__ynl_attr_put_overflow(struct nlmsghdr *nlh, size_t size)
+{
+ bool o;
+
+ /* ynl_msg_start() stashed buffer length in nlmsg_pid. */
+ o = nlh->nlmsg_len + NLA_HDRLEN + NLMSG_ALIGN(size) > nlh->nlmsg_pid;
+ if (o)
+ /* YNL_MSG_OVERFLOW is < NLMSG_HDRLEN, all subsequent checks
+ * are guaranteed to fail.
+ */
+ nlh->nlmsg_pid = YNL_MSG_OVERFLOW;
+ return o;
+}
+
+static inline struct nlattr *
+ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type)
+{
+ struct nlattr *attr;
+
+ if (__ynl_attr_put_overflow(nlh, 0))
+ return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN;
+
+ attr = ynl_nlmsg_end_addr(nlh);
+ attr->nla_type = attr_type | NLA_F_NESTED;
+ nlh->nlmsg_len += NLA_HDRLEN;
+
+ return attr;
}
static inline void
-mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data)
+ynl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *attr)
{
- if ((uint32_t)data == (uint64_t)data)
- return mnl_attr_put_u32(nlh, type, data);
- return mnl_attr_put_u64(nlh, type, data);
+ attr->nla_len = (char *)ynl_nlmsg_end_addr(nlh) - (char *)attr;
+}
+
+static inline void
+ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ const void *value, size_t size)
+{
+ struct nlattr *attr;
+
+ if (__ynl_attr_put_overflow(nlh, size))
+ return;
+
+ attr = ynl_nlmsg_end_addr(nlh);
+ attr->nla_type = attr_type;
+ attr->nla_len = NLA_HDRLEN + size;
+
+ memcpy(ynl_attr_data(attr), value, size);
+
+ nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
+}
+
+static inline void
+ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
+{
+ struct nlattr *attr;
+ size_t len;
+
+ len = strlen(str);
+ if (__ynl_attr_put_overflow(nlh, len))
+ return;
+
+ attr = ynl_nlmsg_end_addr(nlh);
+ attr->nla_type = attr_type;
+
+ strcpy(ynl_attr_data(attr), str);
+ attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len);
+
+ nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
+}
+
+static inline const char *ynl_attr_get_str(const struct nlattr *attr)
+{
+ return (const char *)ynl_attr_data(attr);
+}
+
+static inline __s8 ynl_attr_get_s8(const struct nlattr *attr)
+{
+ return *(__s8 *)ynl_attr_data(attr);
+}
+
+static inline __s16 ynl_attr_get_s16(const struct nlattr *attr)
+{
+ return *(__s16 *)ynl_attr_data(attr);
+}
+
+static inline __s32 ynl_attr_get_s32(const struct nlattr *attr)
+{
+ return *(__s32 *)ynl_attr_data(attr);
+}
+
+static inline __s64 ynl_attr_get_s64(const struct nlattr *attr)
+{
+ __s64 tmp;
+
+ memcpy(&tmp, (unsigned char *)(attr + 1), sizeof(tmp));
+ return tmp;
+}
+
+static inline __u8 ynl_attr_get_u8(const struct nlattr *attr)
+{
+ return *(__u8 *)ynl_attr_data(attr);
+}
+
+static inline __u16 ynl_attr_get_u16(const struct nlattr *attr)
+{
+ return *(__u16 *)ynl_attr_data(attr);
+}
+
+static inline __u32 ynl_attr_get_u32(const struct nlattr *attr)
+{
+ return *(__u32 *)ynl_attr_data(attr);
+}
+
+static inline __u64 ynl_attr_get_u64(const struct nlattr *attr)
+{
+ __u64 tmp;
+
+ memcpy(&tmp, (unsigned char *)(attr + 1), sizeof(tmp));
+ return tmp;
+}
+
+static inline void
+ynl_attr_put_s8(struct nlmsghdr *nlh, unsigned int attr_type, __s8 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_s16(struct nlmsghdr *nlh, unsigned int attr_type, __s16 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_s32(struct nlmsghdr *nlh, unsigned int attr_type, __s32 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_s64(struct nlmsghdr *nlh, unsigned int attr_type, __s64 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u8(struct nlmsghdr *nlh, unsigned int attr_type, __u8 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u16(struct nlmsghdr *nlh, unsigned int attr_type, __u16 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u32(struct nlmsghdr *nlh, unsigned int attr_type, __u32 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline void
+ynl_attr_put_u64(struct nlmsghdr *nlh, unsigned int attr_type, __u64 value)
+{
+ ynl_attr_put(nlh, attr_type, &value, sizeof(value));
+}
+
+static inline __u64 ynl_attr_get_uint(const struct nlattr *attr)
+{
+ switch (ynl_attr_data_len(attr)) {
+ case 4:
+ return ynl_attr_get_u32(attr);
+ case 8:
+ return ynl_attr_get_u64(attr);
+ default:
+ return 0;
+ }
+}
+
+static inline __s64 ynl_attr_get_sint(const struct nlattr *attr)
+{
+ switch (ynl_attr_data_len(attr)) {
+ case 4:
+ return ynl_attr_get_s32(attr);
+ case 8:
+ return ynl_attr_get_s64(attr);
+ default:
+ return 0;
+ }
+}
+
+static inline void
+ynl_attr_put_uint(struct nlmsghdr *nlh, __u16 type, __u64 data)
+{
+ if ((__u32)data == (__u64)data)
+ ynl_attr_put_u32(nlh, type, data);
+ else
+ ynl_attr_put_u64(nlh, type, data);
+}
+
+static inline void
+ynl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data)
+{
+ if ((__s32)data == (__s64)data)
+ ynl_attr_put_s32(nlh, type, data);
+ else
+ ynl_attr_put_s64(nlh, type, data);
}
-#endif
#endif
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index 45e49671ae87..4b9c091fc86b 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -3,10 +3,11 @@
#include <poll.h>
#include <string.h>
#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
#include <linux/types.h>
-
-#include <libmnl/libmnl.h>
#include <linux/genetlink.h>
+#include <sys/socket.h>
#include "ynl.h"
@@ -92,9 +93,9 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
data_len = end - start;
- mnl_attr_for_each_payload(start, data_len) {
+ ynl_attr_for_each_payload(start, data_len, attr) {
astart_off = (char *)attr - (char *)start;
- aend_off = astart_off + mnl_attr_get_payload_len(attr);
+ aend_off = astart_off + ynl_attr_data_len(attr);
if (aend_off <= off)
continue;
@@ -106,7 +107,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
off -= astart_off;
- type = mnl_attr_get_type(attr);
+ type = ynl_attr_type(attr);
if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
return n;
@@ -124,8 +125,8 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
}
off -= sizeof(struct nlattr);
- start = mnl_attr_get_payload(attr);
- end = start + mnl_attr_get_payload_len(attr);
+ start = ynl_attr_data(attr);
+ end = start + ynl_attr_data_len(attr);
return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
&str[n], str_sz - n, nest_pol);
@@ -147,14 +148,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) {
yerr_msg(ys, "%s", strerror(ys->err.code));
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
}
- mnl_attr_for_each(attr, nlh, hlen) {
+ ynl_attr_for_each(attr, nlh, hlen) {
unsigned int len, type;
- len = mnl_attr_get_payload_len(attr);
- type = mnl_attr_get_type(attr);
+ len = ynl_attr_data_len(attr);
+ type = ynl_attr_type(attr);
if (type > NLMSGERR_ATTR_MAX)
continue;
@@ -166,12 +167,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
case NLMSGERR_ATTR_MISS_TYPE:
case NLMSGERR_ATTR_MISS_NEST:
if (len != sizeof(__u32))
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
break;
case NLMSGERR_ATTR_MSG:
- str = mnl_attr_get_payload(attr);
+ str = ynl_attr_get_str(attr);
if (str[len - 1])
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
break;
default:
break;
@@ -185,14 +186,13 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
unsigned int n, off;
void *start, *end;
- ys->err.attr_offs = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
+ ys->err.attr_offs = ynl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
str ? " (" : "");
- start = mnl_nlmsg_get_payload_offset(ys->nlh,
- ys->family->hdr_len);
- end = mnl_nlmsg_get_payload_tail(ys->nlh);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ end = ynl_nlmsg_end_addr(ys->nlh);
off = ys->err.attr_offs;
off -= sizeof(struct nlmsghdr);
@@ -211,18 +211,17 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
void *start, *end;
int n2;
- type = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]);
+ type = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]);
n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
bad_attr[0] ? ", " : (str ? " (" : ""));
- start = mnl_nlmsg_get_payload_offset(ys->nlh,
- ys->family->hdr_len);
- end = mnl_nlmsg_get_payload_tail(ys->nlh);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ end = ynl_nlmsg_end_addr(ys->nlh);
nest_pol = ys->req_policy;
if (tb[NLMSGERR_ATTR_MISS_NEST]) {
- off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
+ off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
off -= sizeof(struct nlmsghdr);
off -= ys->family->hdr_len;
@@ -254,13 +253,13 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
else
yerr_msg(ys, "%s", strerror(ys->err.code));
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
}
-static int ynl_cb_error(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_cb_error(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh);
- struct ynl_parse_arg *yarg = data;
+ const struct nlmsgerr *err = ynl_nlmsg_data(nlh);
unsigned int hlen;
int code;
@@ -270,16 +269,15 @@ static int ynl_cb_error(const struct nlmsghdr *nlh, void *data)
hlen = sizeof(*err);
if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
- hlen += mnl_nlmsg_get_payload_len(&err->msg);
+ hlen += ynl_nlmsg_data_len(&err->msg);
ynl_ext_ack_check(yarg->ys, nlh, hlen);
- return code ? MNL_CB_ERROR : MNL_CB_STOP;
+ return code ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_STOP;
}
-static int ynl_cb_done(const struct nlmsghdr *nlh, void *data)
+static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_parse_arg *yarg = data;
int err;
err = *(int *)NLMSG_DATA(nlh);
@@ -289,23 +287,11 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, void *data)
ynl_ext_ack_check(yarg->ys, nlh, sizeof(int));
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
- return MNL_CB_STOP;
-}
-
-static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data)
-{
- return MNL_CB_OK;
+ return YNL_PARSE_CB_STOP;
}
-mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = {
- [NLMSG_NOOP] = ynl_cb_noop,
- [NLMSG_ERROR] = ynl_cb_error,
- [NLMSG_DONE] = ynl_cb_done,
- [NLMSG_OVERRUN] = ynl_cb_noop,
-};
-
/* Attribute validation */
int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
@@ -314,9 +300,9 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
unsigned int type, len;
unsigned char *data;
- data = mnl_attr_get_payload(attr);
- len = mnl_attr_get_payload_len(attr);
- type = mnl_attr_get_type(attr);
+ data = ynl_attr_data(attr);
+ len = ynl_attr_data_len(attr);
+ type = ynl_attr_type(attr);
if (type > yarg->rsp_policy->max_attr) {
yerr(yarg->ys, YNL_ERROR_INTERNAL,
"Internal error, validating unknown attribute");
@@ -413,14 +399,38 @@ struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags)
ynl_err_reset(ys);
- nlh = ys->nlh = mnl_nlmsg_put_header(ys->tx_buf);
+ nlh = ys->nlh = ynl_nlmsg_put_header(ys->tx_buf);
nlh->nlmsg_type = id;
nlh->nlmsg_flags = flags;
nlh->nlmsg_seq = ++ys->seq;
+ /* This is a local YNL hack for length checking, we put the buffer
+ * length in nlmsg_pid, since messages sent to the kernel always use
+ * PID 0. Message needs to be terminated with ynl_msg_end().
+ */
+ nlh->nlmsg_pid = YNL_SOCKET_BUFFER_SIZE;
+
return nlh;
}
+static int ynl_msg_end(struct ynl_sock *ys, struct nlmsghdr *nlh)
+{
+ /* We stash buffer length in nlmsg_pid. */
+ if (nlh->nlmsg_pid == 0) {
+ yerr(ys, YNL_ERROR_INPUT_INVALID,
+ "Unknown input buffer length");
+ return -EINVAL;
+ }
+ if (nlh->nlmsg_pid == YNL_MSG_OVERFLOW) {
+ yerr(ys, YNL_ERROR_INPUT_TOO_BIG,
+ "Constructed message longer than internal buffer");
+ return -EMSGSIZE;
+ }
+
+ nlh->nlmsg_pid = 0;
+ return 0;
+}
+
struct nlmsghdr *
ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
__u8 cmd, __u8 version)
@@ -435,7 +445,7 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
gehdr.cmd = cmd;
gehdr.version = version;
- data = mnl_nlmsg_put_extra_header(nlh, sizeof(gehdr));
+ data = ynl_nlmsg_put_extra_header(nlh, sizeof(gehdr));
memcpy(data, &gehdr, sizeof(gehdr));
return nlh;
@@ -464,33 +474,85 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
cmd, version);
}
-int ynl_recv_ack(struct ynl_sock *ys, int ret)
+static int ynl_cb_null(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_parse_arg yarg = { .ys = ys, };
+ yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG,
+ "Received a message when none were expected");
- if (!ret) {
- yerr(ys, YNL_ERROR_EXPECT_ACK,
- "Expecting an ACK but nothing received");
- return -1;
+ return YNL_PARSE_CB_ERROR;
+}
+
+static int
+__ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb, int flags)
+{
+ struct ynl_sock *ys = yarg->ys;
+ const struct nlmsghdr *nlh;
+ ssize_t len, rem;
+ int ret;
+
+ len = recv(ys->socket, ys->rx_buf, YNL_SOCKET_BUFFER_SIZE, flags);
+ if (len < 0) {
+ if (flags & MSG_DONTWAIT && errno == EAGAIN)
+ return YNL_PARSE_CB_STOP;
+ return len;
}
- ret = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
- if (ret < 0) {
- perr(ys, "Socket receive failed");
- return ret;
+ ret = YNL_PARSE_CB_STOP;
+ for (rem = len; rem > 0; NLMSG_NEXT(nlh, rem)) {
+ nlh = (struct nlmsghdr *)&ys->rx_buf[len - rem];
+ if (!NLMSG_OK(nlh, rem)) {
+ yerr(yarg->ys, YNL_ERROR_INV_RESP,
+ "Invalid message or trailing data in the response.");
+ return YNL_PARSE_CB_ERROR;
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP_INTR) {
+ /* TODO: handle this better */
+ yerr(yarg->ys, YNL_ERROR_DUMP_INTER,
+ "Dump interrupted / inconsistent, please retry.");
+ return YNL_PARSE_CB_ERROR;
+ }
+
+ switch (nlh->nlmsg_type) {
+ case 0:
+ yerr(yarg->ys, YNL_ERROR_INV_RESP,
+ "Invalid message type in the response.");
+ return YNL_PARSE_CB_ERROR;
+ case NLMSG_NOOP:
+ case NLMSG_OVERRUN ... NLMSG_MIN_TYPE - 1:
+ ret = YNL_PARSE_CB_OK;
+ break;
+ case NLMSG_ERROR:
+ ret = ynl_cb_error(nlh, yarg);
+ break;
+ case NLMSG_DONE:
+ ret = ynl_cb_done(nlh, yarg);
+ break;
+ default:
+ ret = cb(nlh, yarg);
+ break;
+ }
}
- return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid,
- ynl_cb_null, &yarg);
+
+ return ret;
}
-int ynl_cb_null(const struct nlmsghdr *nlh, void *data)
+static int ynl_sock_read_msgs(struct ynl_parse_arg *yarg, ynl_parse_cb_t cb)
{
- struct ynl_parse_arg *yarg = data;
+ return __ynl_sock_read_msgs(yarg, cb, 0);
+}
- yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG,
- "Received a message when none were expected");
+static int ynl_recv_ack(struct ynl_sock *ys, int ret)
+{
+ struct ynl_parse_arg yarg = { .ys = ys, };
- return MNL_CB_ERROR;
+ if (!ret) {
+ yerr(ys, YNL_ERROR_EXPECT_ACK,
+ "Expecting an ACK but nothing received");
+ return -1;
+ }
+
+ return ynl_sock_read_msgs(&yarg, ynl_cb_null);
}
/* Init/fini and genetlink boiler plate */
@@ -500,7 +562,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
const struct nlattr *entry, *attr;
unsigned int i;
- mnl_attr_for_each_nested(attr, mcasts)
+ ynl_attr_for_each_nested(attr, mcasts)
ys->n_mcast_groups++;
if (!ys->n_mcast_groups)
@@ -509,16 +571,16 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
ys->mcast_groups = calloc(ys->n_mcast_groups,
sizeof(*ys->mcast_groups));
if (!ys->mcast_groups)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
i = 0;
- mnl_attr_for_each_nested(entry, mcasts) {
- mnl_attr_for_each_nested(attr, entry) {
- if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_ID)
- ys->mcast_groups[i].id = mnl_attr_get_u32(attr);
- if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) {
+ ynl_attr_for_each_nested(entry, mcasts) {
+ ynl_attr_for_each_nested(attr, entry) {
+ if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_ID)
+ ys->mcast_groups[i].id = ynl_attr_get_u32(attr);
+ if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) {
strncpy(ys->mcast_groups[i].name,
- mnl_attr_get_str(attr),
+ ynl_attr_get_str(attr),
GENL_NAMSIZ - 1);
ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0;
}
@@ -529,35 +591,35 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
return 0;
}
-static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_get_family_info_cb(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_parse_arg *yarg = data;
struct ynl_sock *ys = yarg->ys;
const struct nlattr *attr;
bool found_id = true;
- mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
- if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GROUPS)
+ ynl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (ynl_attr_type(attr) == CTRL_ATTR_MCAST_GROUPS)
if (ynl_get_family_info_mcast(ys, attr))
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
- if (mnl_attr_get_type(attr) != CTRL_ATTR_FAMILY_ID)
+ if (ynl_attr_type(attr) != CTRL_ATTR_FAMILY_ID)
continue;
- if (mnl_attr_get_payload_len(attr) != sizeof(__u16)) {
+ if (ynl_attr_data_len(attr) != sizeof(__u16)) {
yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID");
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
- ys->family_id = mnl_attr_get_u16(attr);
+ ys->family_id = ynl_attr_get_u16(attr);
found_id = true;
}
if (!found_id) {
yerr(ys, YNL_ERROR_ATTR_MISSING, "Family ID missing");
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
}
static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
@@ -567,22 +629,19 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
int err;
nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1);
- mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
+ ynl_attr_put_str(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
+
+ err = ynl_msg_end(ys, nlh);
+ if (err < 0)
+ return err;
- err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);
+ err = send(ys->socket, nlh, nlh->nlmsg_len, 0);
if (err < 0) {
perr(ys, "failed to request socket family info");
return err;
}
- err = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
- if (err <= 0) {
- perr(ys, "failed to receive the socket family info");
- return err;
- }
- err = mnl_cb_run2(ys->rx_buf, err, ys->seq, ys->portid,
- ynl_get_family_info_cb, &yarg,
- ynl_cb_array, ARRAY_SIZE(ynl_cb_array));
+ err = ynl_sock_read_msgs(&yarg, ynl_get_family_info_cb);
if (err < 0) {
free(ys->mcast_groups);
perr(ys, "failed to receive the socket family info - no such family?");
@@ -601,38 +660,54 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
struct ynl_sock *
ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
{
+ struct sockaddr_nl addr;
struct ynl_sock *ys;
+ socklen_t addrlen;
int one = 1;
- ys = malloc(sizeof(*ys) + 2 * MNL_SOCKET_BUFFER_SIZE);
+ ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
if (!ys)
return NULL;
memset(ys, 0, sizeof(*ys));
ys->family = yf;
ys->tx_buf = &ys->raw_buf[0];
- ys->rx_buf = &ys->raw_buf[MNL_SOCKET_BUFFER_SIZE];
+ ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
ys->ntf_last_next = &ys->ntf_first;
- ys->sock = mnl_socket_open(NETLINK_GENERIC);
- if (!ys->sock) {
+ ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (ys->socket < 0) {
__perr(yse, "failed to create a netlink socket");
goto err_free_sock;
}
- if (mnl_socket_setsockopt(ys->sock, NETLINK_CAP_ACK,
- &one, sizeof(one))) {
+ if (setsockopt(ys->socket, SOL_NETLINK, NETLINK_CAP_ACK,
+ &one, sizeof(one))) {
__perr(yse, "failed to enable netlink ACK");
goto err_close_sock;
}
- if (mnl_socket_setsockopt(ys->sock, NETLINK_EXT_ACK,
- &one, sizeof(one))) {
+ if (setsockopt(ys->socket, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one))) {
__perr(yse, "failed to enable netlink ext ACK");
goto err_close_sock;
}
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+ if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ __perr(yse, "unable to bind to a socket address");
+ goto err_close_sock;;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addrlen = sizeof(addr);
+ if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) {
+ __perr(yse, "unable to read socket address");
+ goto err_close_sock;;
+ }
+ ys->portid = addr.nl_pid;
ys->seq = random();
- ys->portid = mnl_socket_get_portid(ys->sock);
+
if (ynl_sock_read_family(ys, yf->name)) {
if (yse)
@@ -643,7 +718,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
return ys;
err_close_sock:
- mnl_socket_close(ys->sock);
+ close(ys->socket);
err_free_sock:
free(ys);
return NULL;
@@ -653,7 +728,7 @@ void ynl_sock_destroy(struct ynl_sock *ys)
{
struct ynl_ntf_base_type *ntf;
- mnl_socket_close(ys->sock);
+ close(ys->socket);
while ((ntf = ynl_ntf_dequeue(ys)))
ynl_ntf_free(ntf);
free(ys->mcast_groups);
@@ -680,9 +755,9 @@ int ynl_subscribe(struct ynl_sock *ys, const char *grp_name)
return -1;
}
- err = mnl_socket_setsockopt(ys->sock, NETLINK_ADD_MEMBERSHIP,
- &ys->mcast_groups[i].id,
- sizeof(ys->mcast_groups[i].id));
+ err = setsockopt(ys->socket, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+ &ys->mcast_groups[i].id,
+ sizeof(ys->mcast_groups[i].id));
if (err < 0) {
perr(ys, "Subscribing to multicast group failed");
return -1;
@@ -693,7 +768,7 @@ int ynl_subscribe(struct ynl_sock *ys, const char *grp_name)
int ynl_socket_get_fd(struct ynl_sock *ys)
{
- return mnl_socket_get_fd(ys->sock);
+ return ys->socket;
}
struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys)
@@ -719,12 +794,12 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
struct genlmsghdr *gehdr;
int ret;
- gehdr = mnl_nlmsg_get_payload(nlh);
+ gehdr = ynl_nlmsg_data(nlh);
if (gehdr->cmd >= ys->family->ntf_info_size)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
info = &ys->family->ntf_info[gehdr->cmd];
if (!info->cb)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
rsp = calloc(1, info->alloc_sz);
rsp->free = info->free;
@@ -732,7 +807,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
yarg.rsp_policy = info->policy;
ret = info->cb(nlh, &yarg);
- if (ret <= MNL_CB_STOP)
+ if (ret <= YNL_PARSE_CB_STOP)
goto err_free;
rsp->family = nlh->nlmsg_type;
@@ -741,46 +816,27 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
*ys->ntf_last_next = rsp;
ys->ntf_last_next = &rsp->next;
- return MNL_CB_OK;
+ return YNL_PARSE_CB_OK;
err_free:
info->free(rsp);
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
-static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_ntf_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_parse_arg *yarg = data;
-
return ynl_ntf_parse(yarg->ys, nlh);
}
int ynl_ntf_check(struct ynl_sock *ys)
{
struct ynl_parse_arg yarg = { .ys = ys, };
- ssize_t len;
int err;
do {
- /* libmnl doesn't let us pass flags to the recv to make
- * it non-blocking so we need to poll() or peek() :|
- */
- struct pollfd pfd = { };
-
- pfd.fd = mnl_socket_get_fd(ys->sock);
- pfd.events = POLLIN;
- err = poll(&pfd, 1, 1);
- if (err < 1)
- return err;
-
- len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
- MNL_SOCKET_BUFFER_SIZE);
- if (len < 0)
- return len;
-
- err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
- ynl_ntf_trampoline, &yarg,
- ynl_cb_array, NLMSG_MIN_TYPE);
+ err = __ynl_sock_read_msgs(&yarg, ynl_ntf_trampoline,
+ MSG_DONTWAIT);
if (err < 0)
return err;
} while (err > 0);
@@ -801,7 +857,7 @@ void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd)
int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
{
yerr(yarg->ys, YNL_ERROR_INV_RESP, "Error parsing response: %s", msg);
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
}
static int
@@ -809,27 +865,28 @@ ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
{
struct genlmsghdr *gehdr;
- if (mnl_nlmsg_get_payload_len(nlh) < sizeof(*gehdr)) {
+ if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
yerr(ys, YNL_ERROR_INV_RESP,
"Kernel responded with truncated message");
return -1;
}
- gehdr = mnl_nlmsg_get_payload(nlh);
+ gehdr = ynl_nlmsg_data(nlh);
if (gehdr->cmd != rsp_cmd)
return ynl_ntf_parse(ys, nlh);
return 0;
}
-static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data)
+static
+int ynl_req_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
{
- struct ynl_req_state *yrs = data;
+ struct ynl_req_state *yrs = (void *)yarg;
int ret;
ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd);
if (ret)
- return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+ return ret < 0 ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_OK;
return yrs->cb(nlh, &yrs->yarg);
}
@@ -837,43 +894,38 @@ static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data)
int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
struct ynl_req_state *yrs)
{
- ssize_t len;
int err;
- err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+ err = ynl_msg_end(ys, req_nlh);
+ if (err < 0)
+ return err;
+
+ err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0);
if (err < 0)
return err;
do {
- len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
- MNL_SOCKET_BUFFER_SIZE);
- if (len < 0)
- return len;
-
- err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
- ynl_req_trampoline, yrs,
- ynl_cb_array, NLMSG_MIN_TYPE);
- if (err < 0)
- return err;
+ err = ynl_sock_read_msgs(&yrs->yarg, ynl_req_trampoline);
} while (err > 0);
- return 0;
+ return err;
}
-static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data)
+static int
+ynl_dump_trampoline(const struct nlmsghdr *nlh, struct ynl_parse_arg *data)
{
- struct ynl_dump_state *ds = data;
+ struct ynl_dump_state *ds = (void *)data;
struct ynl_dump_list_type *obj;
struct ynl_parse_arg yarg = {};
int ret;
- ret = ynl_check_alien(ds->ys, nlh, ds->rsp_cmd);
+ ret = ynl_check_alien(ds->yarg.ys, nlh, ds->rsp_cmd);
if (ret)
- return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+ return ret < 0 ? YNL_PARSE_CB_ERROR : YNL_PARSE_CB_OK;
obj = calloc(1, ds->alloc_sz);
if (!obj)
- return MNL_CB_ERROR;
+ return YNL_PARSE_CB_ERROR;
if (!ds->first)
ds->first = obj;
@@ -881,8 +933,7 @@ static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data)
ds->last->next = obj;
ds->last = obj;
- yarg.ys = ds->ys;
- yarg.rsp_policy = ds->rsp_policy;
+ yarg = ds->yarg;
yarg.data = &obj->data;
return ds->cb(nlh, &yarg);
@@ -900,22 +951,18 @@ static void *ynl_dump_end(struct ynl_dump_state *ds)
int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
struct ynl_dump_state *yds)
{
- ssize_t len;
int err;
- err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+ err = ynl_msg_end(ys, req_nlh);
if (err < 0)
return err;
- do {
- len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
- MNL_SOCKET_BUFFER_SIZE);
- if (len < 0)
- goto err_close_list;
+ err = send(ys->socket, req_nlh, req_nlh->nlmsg_len, 0);
+ if (err < 0)
+ return err;
- err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
- ynl_dump_trampoline, yds,
- ynl_cb_array, NLMSG_MIN_TYPE);
+ do {
+ err = ynl_sock_read_msgs(&yds->yarg, ynl_dump_trampoline);
if (err < 0)
goto err_close_list;
} while (err > 0);
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index ce77a6d76ce0..9842e85a8c57 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -12,6 +12,7 @@ enum ynl_error_code {
YNL_ERROR_NONE = 0,
__YNL_ERRNO_END = 4096,
YNL_ERROR_INTERNAL,
+ YNL_ERROR_DUMP_INTER,
YNL_ERROR_EXPECT_ACK,
YNL_ERROR_EXPECT_MSG,
YNL_ERROR_UNEXPECT_MSG,
@@ -19,6 +20,8 @@ enum ynl_error_code {
YNL_ERROR_ATTR_INVALID,
YNL_ERROR_UNKNOWN_NTF,
YNL_ERROR_INV_RESP,
+ YNL_ERROR_INPUT_INVALID,
+ YNL_ERROR_INPUT_TOO_BIG,
};
/**
@@ -58,7 +61,7 @@ struct ynl_sock {
/* private: */
const struct ynl_family *family;
- struct mnl_socket *sock;
+ int socket;
__u32 seq;
__u32 portid;
__u16 family_id;
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 1e10512b2117..5fa7957f6e0f 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -7,6 +7,7 @@ import random
import socket
import struct
from struct import Struct
+import sys
import yaml
import ipaddress
import uuid
@@ -84,6 +85,10 @@ class NlError(Exception):
return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}"
+class ConfigError(Exception):
+ pass
+
+
class NlAttr:
ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little'])
type_formats = {
@@ -113,20 +118,6 @@ class NlAttr:
else format.little
return format.native
- @classmethod
- def formatted_string(cls, raw, display_hint):
- if display_hint == 'mac':
- formatted = ':'.join('%02x' % b for b in raw)
- elif display_hint == 'hex':
- formatted = bytes.hex(raw, ' ')
- elif display_hint in [ 'ipv4', 'ipv6' ]:
- formatted = format(ipaddress.ip_address(raw))
- elif display_hint == 'uuid':
- formatted = str(uuid.UUID(bytes=raw))
- else:
- formatted = raw
- return formatted
-
def as_scalar(self, attr_type, byte_order=None):
format = self.get_format(attr_type, byte_order)
return format.unpack(self.raw)[0]
@@ -148,23 +139,6 @@ class NlAttr:
format = self.get_format(type)
return [ x[0] for x in format.iter_unpack(self.raw) ]
- def as_struct(self, members):
- value = dict()
- offset = 0
- for m in members:
- # TODO: handle non-scalar members
- if m.type == 'binary':
- decoded = self.raw[offset : offset + m['len']]
- offset += m['len']
- elif m.type in NlAttr.type_formats:
- format = self.get_format(m.type, m.byte_order)
- [ decoded ] = format.unpack_from(self.raw, offset)
- offset += format.size
- if m.display_hint:
- decoded = self.formatted_string(decoded, m.display_hint)
- value[m.name] = decoded
- return value
-
def __repr__(self):
return f"[type:{self.type} len:{self._len}] {self.raw}"
@@ -244,11 +218,11 @@ class NlMsg:
return self.nl_type
def __repr__(self):
- msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}\n"
+ msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}"
if self.error:
- msg += '\terror: ' + str(self.error)
+ msg += '\n\terror: ' + str(self.error)
if self.extack:
- msg += '\textack: ' + repr(self.extack)
+ msg += '\n\textack: ' + repr(self.extack)
return msg
@@ -370,7 +344,7 @@ class NetlinkProtocol:
fixed_header_size = 0
if ynl:
op = ynl.rsp_by_value[msg.cmd()]
- fixed_header_size = ynl._fixed_header_size(op.fixed_header)
+ fixed_header_size = ynl._struct_size(op.fixed_header)
msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
return msg
@@ -379,6 +353,9 @@ class NetlinkProtocol:
raise Exception(f'Multicast group "{mcast_name}" not present in the spec')
return mcast_groups[mcast_name].value
+ def msghdr_size(self):
+ return 16
+
class GenlProtocol(NetlinkProtocol):
def __init__(self, family_name):
@@ -404,6 +381,28 @@ class GenlProtocol(NetlinkProtocol):
raise Exception(f'Multicast group "{mcast_name}" not present in the family')
return self.genl_family['mcast'][mcast_name]
+ def msghdr_size(self):
+ return super().msghdr_size() + 4
+
+
+class SpaceAttrs:
+ SpecValuesPair = namedtuple('SpecValuesPair', ['spec', 'values'])
+
+ def __init__(self, attr_space, attrs, outer = None):
+ outer_scopes = outer.scopes if outer else []
+ inner_scope = self.SpecValuesPair(attr_space, attrs)
+ self.scopes = [inner_scope] + outer_scopes
+
+ def lookup(self, name):
+ for scope in self.scopes:
+ if name in scope.spec:
+ if name in scope.values:
+ return scope.values[name]
+ spec_name = scope.spec.yaml['name']
+ raise Exception(
+ f"No value for '{name}' in attribute space '{spec_name}'")
+ raise Exception(f"Attribute '{name}' not defined in any attribute-set")
+
#
# YNL implementation details.
@@ -411,7 +410,8 @@ class GenlProtocol(NetlinkProtocol):
class YnlFamily(SpecFamily):
- def __init__(self, def_path, schema=None, process_unknown=False):
+ def __init__(self, def_path, schema=None, process_unknown=False,
+ recv_size=0):
super().__init__(def_path, schema)
self.include_raw = False
@@ -426,6 +426,17 @@ class YnlFamily(SpecFamily):
except KeyError:
raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel")
+ self._recv_dbg = False
+ # Note that netlink will use conservative (min) message size for
+ # the first dump recv() on the socket, our setting will only matter
+ # from the second recv() on.
+ self._recv_size = recv_size if recv_size else 131072
+ # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo)
+ # for a message, so smaller receive sizes will lead to truncation.
+ # Note that the min size for other families may be larger than 4k!
+ if self._recv_size < 4000:
+ raise ConfigError()
+
self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, self.nlproto.proto_num)
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1)
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1)
@@ -449,18 +460,61 @@ class YnlFamily(SpecFamily):
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP,
mcast_id)
- def _add_attr(self, space, name, value):
+ def set_recv_dbg(self, enabled):
+ self._recv_dbg = enabled
+
+ def _recv_dbg_print(self, reply, nl_msgs):
+ if not self._recv_dbg:
+ return
+ print("Recv: read", len(reply), "bytes,",
+ len(nl_msgs.msgs), "messages", file=sys.stderr)
+ for nl_msg in nl_msgs:
+ print(" ", nl_msg, file=sys.stderr)
+
+ def _encode_enum(self, attr_spec, value):
+ enum = self.consts[attr_spec['enum']]
+ if enum.type == 'flags' or attr_spec.get('enum-as-flags', False):
+ scalar = 0
+ if isinstance(value, str):
+ value = [value]
+ for single_value in value:
+ scalar += enum.entries[single_value].user_value(as_flags = True)
+ return scalar
+ else:
+ return enum.entries[value].user_value()
+
+ def _get_scalar(self, attr_spec, value):
+ try:
+ return int(value)
+ except (ValueError, TypeError) as e:
+ if 'enum' not in attr_spec:
+ raise e
+ return self._encode_enum(attr_spec, value)
+
+ def _add_attr(self, space, name, value, search_attrs):
try:
attr = self.attr_sets[space][name]
except KeyError:
raise Exception(f"Space '{space}' has no attribute '{name}'")
nl_type = attr.value
+
+ if attr.is_multi and isinstance(value, list):
+ attr_payload = b''
+ for subvalue in value:
+ attr_payload += self._add_attr(space, name, subvalue, search_attrs)
+ return attr_payload
+
if attr["type"] == 'nest':
nl_type |= Netlink.NLA_F_NESTED
attr_payload = b''
+ sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs)
for subname, subvalue in value.items():
- attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue)
+ attr_payload += self._add_attr(attr['nested-attributes'],
+ subname, subvalue, sub_attrs)
elif attr["type"] == 'flag':
+ if not value:
+ # If value is absent or false then skip attribute creation.
+ return b''
attr_payload = b''
elif attr["type"] == 'string':
attr_payload = str(value).encode('ascii') + b'\x00'
@@ -469,18 +523,36 @@ class YnlFamily(SpecFamily):
attr_payload = value
elif isinstance(value, str):
attr_payload = bytes.fromhex(value)
+ elif isinstance(value, dict) and attr.struct_name:
+ attr_payload = self._encode_struct(attr.struct_name, value)
else:
raise Exception(f'Unknown type for binary attribute, value: {value}')
- elif attr.is_auto_scalar:
- scalar = int(value)
- real_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64')
- format = NlAttr.get_format(real_type, attr.byte_order)
- attr_payload = format.pack(int(value))
- elif attr['type'] in NlAttr.type_formats:
- format = NlAttr.get_format(attr['type'], attr.byte_order)
- attr_payload = format.pack(int(value))
+ elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar:
+ scalar = self._get_scalar(attr, value)
+ if attr.is_auto_scalar:
+ attr_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64')
+ else:
+ attr_type = attr["type"]
+ format = NlAttr.get_format(attr_type, attr.byte_order)
+ attr_payload = format.pack(scalar)
elif attr['type'] in "bitfield32":
- attr_payload = struct.pack("II", int(value["value"]), int(value["selector"]))
+ scalar_value = self._get_scalar(attr, value["value"])
+ scalar_selector = self._get_scalar(attr, value["selector"])
+ attr_payload = struct.pack("II", scalar_value, scalar_selector)
+ elif attr['type'] == 'sub-message':
+ msg_format = self._resolve_selector(attr, search_attrs)
+ attr_payload = b''
+ if msg_format.fixed_header:
+ attr_payload += self._encode_struct(msg_format.fixed_header, value)
+ if msg_format.attr_set:
+ if msg_format.attr_set in self.attr_sets:
+ nl_type |= Netlink.NLA_F_NESTED
+ sub_attrs = SpaceAttrs(msg_format.attr_set, value, search_attrs)
+ for subname, subvalue in value.items():
+ attr_payload += self._add_attr(msg_format.attr_set,
+ subname, subvalue, sub_attrs)
+ else:
+ raise Exception(f"Unknown attribute-set '{msg_format.attr_set}'")
else:
raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
@@ -503,17 +575,13 @@ class YnlFamily(SpecFamily):
def _decode_binary(self, attr, attr_spec):
if attr_spec.struct_name:
- members = self.consts[attr_spec.struct_name]
- decoded = attr.as_struct(members)
- for m in members:
- if m.enum:
- decoded[m.name] = self._decode_enum(decoded[m.name], m)
+ decoded = self._decode_struct(attr.raw, attr_spec.struct_name)
elif attr_spec.sub_type:
decoded = attr.as_c_array(attr_spec.sub_type)
else:
decoded = attr.as_bin()
if attr_spec.display_hint:
- decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint)
+ decoded = self._formatted_string(decoded, attr_spec.display_hint)
return decoded
def _decode_array_nest(self, attr, attr_spec):
@@ -527,6 +595,16 @@ class YnlFamily(SpecFamily):
decoded.append({ item.type: subattrs })
return decoded
+ def _decode_nest_type_value(self, attr, attr_spec):
+ decoded = {}
+ value = attr
+ for name in attr_spec['type-value']:
+ value = NlAttr(value.raw, 0)
+ decoded[name] = value.type
+ subattrs = self._decode(NlAttrs(value.raw), attr_spec['nested-attributes'])
+ decoded.update(subattrs)
+ return decoded
+
def _decode_unknown(self, attr):
if attr.is_nest:
return self._decode(NlAttrs(attr.raw), None)
@@ -548,29 +626,27 @@ class YnlFamily(SpecFamily):
else:
rsp[name] = [decoded]
- def _resolve_selector(self, attr_spec, vals):
+ def _resolve_selector(self, attr_spec, search_attrs):
sub_msg = attr_spec.sub_message
if sub_msg not in self.sub_msgs:
raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}")
sub_msg_spec = self.sub_msgs[sub_msg]
selector = attr_spec.selector
- if selector not in vals:
- raise Exception(f"There is no value for {selector} to resolve '{attr_spec.name}'")
- value = vals[selector]
+ value = search_attrs.lookup(selector)
if value not in sub_msg_spec.formats:
raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
spec = sub_msg_spec.formats[value]
return spec
- def _decode_sub_msg(self, attr, attr_spec, rsp):
- msg_format = self._resolve_selector(attr_spec, rsp)
+ def _decode_sub_msg(self, attr, attr_spec, search_attrs):
+ msg_format = self._resolve_selector(attr_spec, search_attrs)
decoded = {}
offset = 0
if msg_format.fixed_header:
- decoded.update(self._decode_fixed_header(attr, msg_format.fixed_header));
- offset = self._fixed_header_size(msg_format.fixed_header)
+ decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header));
+ offset = self._struct_size(msg_format.fixed_header)
if msg_format.attr_set:
if msg_format.attr_set in self.attr_sets:
subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set)
@@ -579,10 +655,12 @@ class YnlFamily(SpecFamily):
raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'")
return decoded
- def _decode(self, attrs, space):
+ def _decode(self, attrs, space, outer_attrs = None):
+ rsp = dict()
if space:
attr_space = self.attr_sets[space]
- rsp = dict()
+ search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs)
+
for attr in attrs:
try:
attr_spec = attr_space.attrs_by_val[attr.type]
@@ -594,7 +672,7 @@ class YnlFamily(SpecFamily):
continue
if attr_spec["type"] == 'nest':
- subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'])
+ subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
decoded = subdict
elif attr_spec["type"] == 'string':
decoded = attr.as_strz()
@@ -617,7 +695,9 @@ class YnlFamily(SpecFamily):
selector = self._decode_enum(selector, attr_spec)
decoded = {"value": value, "selector": selector}
elif attr_spec["type"] == 'sub-message':
- decoded = self._decode_sub_msg(attr, attr_spec, rsp)
+ decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
+ elif attr_spec["type"] == 'nest-type-value':
+ decoded = self._decode_nest_type_value(attr, attr_spec)
else:
if not self.process_unknown:
raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
@@ -658,20 +738,23 @@ class YnlFamily(SpecFamily):
return
msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set))
- offset = 20 + self._fixed_header_size(op.fixed_header)
+ offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header)
path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
extack['bad-attr-offs'])
if path:
del extack['bad-attr-offs']
extack['bad-attr'] = path
- def _fixed_header_size(self, name):
+ def _struct_size(self, name):
if name:
- fixed_header_members = self.consts[name].members
+ members = self.consts[name].members
size = 0
- for m in fixed_header_members:
+ for m in members:
if m.type in ['pad', 'binary']:
- size += m.len
+ if m.struct:
+ size += self._struct_size(m.struct)
+ else:
+ size += m.len
else:
format = NlAttr.get_format(m.type, m.byte_order)
size += format.size
@@ -679,26 +762,71 @@ class YnlFamily(SpecFamily):
else:
return 0
- def _decode_fixed_header(self, msg, name):
- fixed_header_members = self.consts[name].members
- fixed_header_attrs = dict()
+ def _decode_struct(self, data, name):
+ members = self.consts[name].members
+ attrs = dict()
offset = 0
- for m in fixed_header_members:
+ for m in members:
value = None
if m.type == 'pad':
offset += m.len
elif m.type == 'binary':
- value = msg.raw[offset : offset + m.len]
- offset += m.len
+ if m.struct:
+ len = self._struct_size(m.struct)
+ value = self._decode_struct(data[offset : offset + len],
+ m.struct)
+ offset += len
+ else:
+ value = data[offset : offset + m.len]
+ offset += m.len
else:
format = NlAttr.get_format(m.type, m.byte_order)
- [ value ] = format.unpack_from(msg.raw, offset)
+ [ value ] = format.unpack_from(data, offset)
offset += format.size
if value is not None:
if m.enum:
value = self._decode_enum(value, m)
- fixed_header_attrs[m.name] = value
- return fixed_header_attrs
+ elif m.display_hint:
+ value = self._formatted_string(value, m.display_hint)
+ attrs[m.name] = value
+ return attrs
+
+ def _encode_struct(self, name, vals):
+ members = self.consts[name].members
+ attr_payload = b''
+ for m in members:
+ value = vals.pop(m.name) if m.name in vals else None
+ if m.type == 'pad':
+ attr_payload += bytearray(m.len)
+ elif m.type == 'binary':
+ if m.struct:
+ if value is None:
+ value = dict()
+ attr_payload += self._encode_struct(m.struct, value)
+ else:
+ if value is None:
+ attr_payload += bytearray(m.len)
+ else:
+ attr_payload += bytes.fromhex(value)
+ else:
+ if value is None:
+ value = 0
+ format = NlAttr.get_format(m.type, m.byte_order)
+ attr_payload += format.pack(value)
+ return attr_payload
+
+ def _formatted_string(self, raw, display_hint):
+ if display_hint == 'mac':
+ formatted = ':'.join('%02x' % b for b in raw)
+ elif display_hint == 'hex':
+ formatted = bytes.hex(raw, ' ')
+ elif display_hint in [ 'ipv4', 'ipv6' ]:
+ formatted = format(ipaddress.ip_address(raw))
+ elif display_hint == 'uuid':
+ formatted = str(uuid.UUID(bytes=raw))
+ else:
+ formatted = raw
+ return formatted
def handle_ntf(self, decoded):
msg = dict()
@@ -707,7 +835,7 @@ class YnlFamily(SpecFamily):
op = self.rsp_by_value[decoded.cmd()]
attrs = self._decode(decoded.raw_attrs, op.attr_set.name)
if op.fixed_header:
- attrs.update(self._decode_fixed_header(decoded, op.fixed_header))
+ attrs.update(self._decode_struct(decoded.raw, op.fixed_header))
msg['name'] = op['name']
msg['msg'] = attrs
@@ -716,11 +844,12 @@ class YnlFamily(SpecFamily):
def check_ntf(self):
while True:
try:
- reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT)
+ reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT)
except BlockingIOError:
return
nms = NlMsgs(reply)
+ self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.error:
print("Netlink error in ntf!?", os.strerror(-nl_msg.error))
@@ -759,20 +888,11 @@ class YnlFamily(SpecFamily):
req_seq = random.randint(1024, 65535)
msg = self.nlproto.message(nl_flags, op.req_value, 1, req_seq)
- fixed_header_members = []
if op.fixed_header:
- fixed_header_members = self.consts[op.fixed_header].members
- for m in fixed_header_members:
- value = vals.pop(m.name) if m.name in vals else 0
- if m.type == 'pad':
- msg += bytearray(m.len)
- elif m.type == 'binary':
- msg += bytes.fromhex(value)
- else:
- format = NlAttr.get_format(m.type, m.byte_order)
- msg += format.pack(value)
+ msg += self._encode_struct(op.fixed_header, vals)
+ search_attrs = SpaceAttrs(op.attr_set, vals)
for name, value in vals.items():
- msg += self._add_attr(op.attr_set.name, name, value)
+ msg += self._add_attr(op.attr_set.name, name, value, search_attrs)
msg = _genl_msg_finalize(msg)
self.sock.send(msg, 0)
@@ -780,8 +900,9 @@ class YnlFamily(SpecFamily):
done = False
rsp = []
while not done:
- reply = self.sock.recv(128 * 1024)
+ reply = self.sock.recv(self._recv_size)
nms = NlMsgs(reply, attr_space=op.attr_set)
+ self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.extack:
self._decode_extack(msg, op, nl_msg.extack)
@@ -808,7 +929,7 @@ class YnlFamily(SpecFamily):
rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name)
if op.fixed_header:
- rsp_msg.update(self._decode_fixed_header(decoded, op.fixed_header))
+ rsp_msg.update(self._decode_struct(decoded.raw, op.fixed_header))
rsp.append(rsp_msg)
if not rsp:
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index 49637b26c482..dda6686257a7 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -1,4 +1,5 @@
ethtool
devlink
netdev
+ovs
page-pool \ No newline at end of file
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index 28bdb1557a54..e194a7565861 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -9,7 +9,7 @@ ifeq ("$(DEBUG)","1")
CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
endif
-LDLIBS=-lmnl ../lib/ynl.a ../generated/protos.a
+LDLIBS=../lib/ynl.a ../generated/protos.a
SRCS=$(wildcard *.c)
BINS=$(patsubst %.c,%,${SRCS})
@@ -28,8 +28,8 @@ $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
clean:
rm -f *.o *.d *~
-hardclean: clean
+distclean: clean
rm -f $(BINS)
-.PHONY: all clean
+.PHONY: all clean distclean
.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/samples/ovs.c b/tools/net/ynl/samples/ovs.c
new file mode 100644
index 000000000000..3e975c003d77
--- /dev/null
+++ b/tools/net/ynl/samples/ovs.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include "ovs_datapath-user.h"
+
+int main(int argc, char **argv)
+{
+ struct ynl_sock *ys;
+ int err;
+
+ ys = ynl_sock_create(&ynl_ovs_datapath_family, NULL);
+ if (!ys)
+ return 1;
+
+ if (argc > 1) {
+ struct ovs_datapath_new_req *req;
+
+ req = ovs_datapath_new_req_alloc();
+ if (!req)
+ goto err_close;
+
+ ovs_datapath_new_req_set_upcall_pid(req, 1);
+ ovs_datapath_new_req_set_name(req, argv[1]);
+
+ err = ovs_datapath_new(ys, req);
+ ovs_datapath_new_req_free(req);
+ if (err)
+ goto err_close;
+ } else {
+ struct ovs_datapath_get_req_dump *req;
+ struct ovs_datapath_get_list *dps;
+
+ printf("Dump:\n");
+ req = ovs_datapath_get_req_dump_alloc();
+
+ dps = ovs_datapath_get_dump(ys, req);
+ ovs_datapath_get_req_dump_free(req);
+ if (!dps)
+ goto err_close;
+
+ ynl_dump_foreach(dps, dp) {
+ printf(" %s(%d): pid:%u cache:%u\n",
+ dp->name, dp->_hdr.dp_ifindex,
+ dp->upcall_pid, dp->masks_cache_size);
+ }
+ ovs_datapath_get_list_free(dps);
+ }
+
+ ynl_sock_destroy(ys);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg);
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
index 098b5190d0e5..332f281ee5cb 100644
--- a/tools/net/ynl/samples/page-pool.c
+++ b/tools/net/ynl/samples/page-pool.c
@@ -95,6 +95,8 @@ int main(int argc, char **argv)
if (pp->_present.alloc_fast)
s->alloc_fast += pp->alloc_fast;
+ if (pp->_present.alloc_refill)
+ s->alloc_fast += pp->alloc_refill;
if (pp->_present.alloc_slow)
s->alloc_slow += pp->alloc_slow;
if (pp->_present.recycle_ring)
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 7fc1aa788f6f..6b7eb2d2aaf1 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -40,14 +40,6 @@ class BaseNlLib:
def get_family_id(self):
return 'ys->family_id'
- def parse_cb_run(self, cb, data, is_dump=False, indent=1):
- ind = '\n\t\t' + '\t' * indent + ' '
- if is_dump:
- return f"mnl_cb_run2(ys->rx_buf, len, 0, 0, {cb}, {data},{ind}ynl_cb_array, NLMSG_MIN_TYPE)"
- else:
- return f"mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,{ind}{cb}, {data},{ind}" + \
- "ynl_cb_array, NLMSG_MIN_TYPE)"
-
class Type(SpecAttr):
def __init__(self, family, attr_set, attr, value):
@@ -88,6 +80,8 @@ class Type(SpecAttr):
value = self.checks.get(limit, default)
if value is None:
return value
+ elif value in self.family.consts:
+ return c_upper(f"{self.family['name']}-{value}")
if not isinstance(value, int):
value = limit_to_number(value)
return value
@@ -168,15 +162,6 @@ class Type(SpecAttr):
spec = self._attr_policy(policy)
cw.p(f"\t[{self.enum_name}] = {spec},")
- def _mnl_type(self):
- # mnl does not have helpers for signed integer types
- # turn signed type into unsigned
- # this only makes sense for scalar types
- t = self.type
- if t[0] == 's':
- t = 'u' + t[1:]
- return t
-
def _attr_typol(self):
raise Exception(f"Type policy not implemented for class type {self.type}")
@@ -192,7 +177,7 @@ class Type(SpecAttr):
ri.cw.p(f"{line};")
def _attr_put_simple(self, ri, var, put_type):
- line = f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})"
+ line = f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name})"
self._attr_put_line(ri, var, line)
def attr_put(self, ri, var):
@@ -217,7 +202,7 @@ class Type(SpecAttr):
if not self.is_multi_val():
ri.cw.p("if (ynl_attr_validate(yarg, attr))")
- ri.cw.p("return MNL_CB_ERROR;")
+ ri.cw.p("return YNL_PARSE_CB_ERROR;")
if self.presence_type() == 'bit':
ri.cw.p(f"{var}->_present.{self.c_name} = 1;")
@@ -264,7 +249,7 @@ class TypeUnused(Type):
return []
def _attr_get(self, ri, var):
- return ['return MNL_CB_ERROR;'], None, None
+ return ['return YNL_PARSE_CB_ERROR;'], None, None
def _attr_typol(self):
return '.type = YNL_PT_REJECT, '
@@ -357,9 +342,6 @@ class TypeScalar(Type):
else:
self.type_name = '__' + self.type
- def mnl_type(self):
- return self._mnl_type()
-
def _attr_policy(self, policy):
if 'flags-mask' in self.checks or self.is_bitfield:
if self.is_bitfield:
@@ -387,10 +369,10 @@ class TypeScalar(Type):
return [f'{self.type_name} {self.c_name}{self.byte_order_comment}']
def attr_put(self, ri, var):
- self._attr_put_simple(ri, var, self.mnl_type())
+ self._attr_put_simple(ri, var, self.type)
def _attr_get(self, ri, var):
- return f"{var}->{self.c_name} = mnl_attr_get_{self.mnl_type()}(attr);", None, None
+ return f"{var}->{self.c_name} = ynl_attr_get_{self.type}(attr);", None, None
def _setter_lines(self, ri, member, presence):
return [f"{member} = {self.c_name};"]
@@ -404,7 +386,7 @@ class TypeFlag(Type):
return '.type = YNL_PT_FLAG, '
def attr_put(self, ri, var):
- self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, 0, NULL)")
+ self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, NULL, 0)")
def _attr_get(self, ri, var):
return [], None, None
@@ -446,15 +428,15 @@ class TypeString(Type):
cw.p(f"\t[{self.enum_name}] = {spec},")
def attr_put(self, ri, var):
- self._attr_put_simple(ri, var, 'strz')
+ self._attr_put_simple(ri, var, 'str')
def _attr_get(self, ri, var):
len_mem = var + '->_present.' + self.c_name + '_len'
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len + 1);",
- f"memcpy({var}->{self.c_name}, mnl_attr_get_str(attr), len);",
+ f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);",
f"{var}->{self.c_name}[len] = 0;"], \
- ['len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));'], \
+ ['len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));'], \
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
@@ -493,15 +475,15 @@ class TypeBinary(Type):
return mem
def attr_put(self, ri, var):
- self._attr_put_line(ri, var, f"mnl_attr_put(nlh, {self.enum_name}, " +
- f"{var}->_present.{self.c_name}_len, {var}->{self.c_name})")
+ self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " +
+ f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)")
def _attr_get(self, ri, var):
len_mem = var + '->_present.' + self.c_name + '_len'
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len);",
- f"memcpy({var}->{self.c_name}, mnl_attr_get_payload(attr), len);"], \
- ['len = mnl_attr_get_payload_len(attr);'], \
+ f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+ ['len = ynl_attr_data_len(attr);'], \
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
@@ -526,11 +508,11 @@ class TypeBitfield32(Type):
return f"NLA_POLICY_BITFIELD32({mask})"
def attr_put(self, ri, var):
- line = f"mnl_attr_put(nlh, {self.enum_name}, sizeof(struct nla_bitfield32), &{var}->{self.c_name})"
+ line = f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}, sizeof(struct nla_bitfield32))"
self._attr_put_line(ri, var, line)
def _attr_get(self, ri, var):
- return f"memcpy(&{var}->{self.c_name}, mnl_attr_get_payload(attr), sizeof(struct nla_bitfield32));", None, None
+ return f"memcpy(&{var}->{self.c_name}, ynl_attr_data(attr), sizeof(struct nla_bitfield32));", None, None
def _setter_lines(self, ri, member, presence):
return [f"memcpy(&{member}, {self.c_name}, sizeof(struct nla_bitfield32));"]
@@ -563,7 +545,7 @@ class TypeNest(Type):
def _attr_get(self, ri, var):
get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
- "return MNL_CB_ERROR;"]
+ "return YNL_PARSE_CB_ERROR;"]
init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
f"parg.data = &{var}->{self.c_name};"]
return get_lines, init_lines, None
@@ -589,9 +571,6 @@ class TypeMultiAttr(Type):
def presence_type(self):
return 'count'
- def mnl_type(self):
- return self._mnl_type()
-
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
return self.nested_struct_type
@@ -625,9 +604,9 @@ class TypeMultiAttr(Type):
def attr_put(self, ri, var):
if self.attr['type'] in scalars:
- put_type = self.mnl_type()
+ put_type = self.type
ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
- ri.cw.p(f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+ ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
@@ -665,7 +644,7 @@ class TypeArrayNest(Type):
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
get_lines = [f'attr_{self.c_name} = attr;',
- 'mnl_attr_for_each_nested(attr2, attr)',
+ 'ynl_attr_for_each_nested(attr2, attr)',
f'\t{var}->n_{self.c_name}++;']
return get_lines, None, local_vars
@@ -690,8 +669,8 @@ class TypeNestTypeValue(Type):
local_vars += [f'__u32 {", ".join(tv_names)};']
for level in self.attr["type-value"]:
level = c_lower(level)
- get_lines += [f'attr_{level} = mnl_attr_get_payload({prev});']
- get_lines += [f'{level} = mnl_attr_get_type(attr_{level});']
+ get_lines += [f'attr_{level} = ynl_attr_data({prev});']
+ get_lines += [f'{level} = ynl_attr_type(attr_{level});']
prev = 'attr_' + level
tv_args = f", {', '.join(tv_names)}"
@@ -1550,7 +1529,7 @@ def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None):
cw.block_start()
if enum and enum.type == 'flags':
cw.p(f'{arg_name} = ffs({arg_name}) - 1;')
- cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)MNL_ARRAY_SIZE({map_name}))')
+ cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)YNL_ARRAY_SIZE({map_name}))')
cw.p('return NULL;')
cw.p(f'return {map_name}[{arg_name}];')
cw.block_end()
@@ -1612,12 +1591,12 @@ def put_req_nested(ri, struct):
ri.cw.block_start()
ri.cw.write_func_lvar('struct nlattr *nest;')
- ri.cw.p("nest = mnl_attr_nest_start(nlh, attr_type);")
+ ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);")
for _, arg in struct.member_list():
arg.attr_put(ri, "obj")
- ri.cw.p("mnl_attr_nest_end(nlh, nest);")
+ ri.cw.p("ynl_attr_nest_end(nlh, nest);")
ri.cw.nl()
ri.cw.p('return 0;')
@@ -1627,11 +1606,11 @@ def put_req_nested(ri, struct):
def _multi_parse(ri, struct, init_lines, local_vars):
if struct.nested:
- iter_line = "mnl_attr_for_each_nested(attr, nested)"
+ iter_line = "ynl_attr_for_each_nested(attr, nested)"
else:
if ri.fixed_hdr:
local_vars += ['void *hdr;']
- iter_line = "mnl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
+ iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
array_nests = set()
multi_attrs = set()
@@ -1665,7 +1644,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p(f'dst->{arg} = {arg};')
if ri.fixed_hdr:
- ri.cw.p('hdr = mnl_nlmsg_get_payload_offset(nlh, sizeof(struct genlmsghdr));')
+ ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
for anest in sorted(all_multi):
aspec = struct[anest]
@@ -1674,7 +1653,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.nl()
ri.cw.block_start(line=iter_line)
- ri.cw.p('unsigned int type = mnl_attr_get_type(attr);')
+ ri.cw.p('unsigned int type = ynl_attr_type(attr);')
ri.cw.nl()
first = True
@@ -1690,14 +1669,14 @@ def _multi_parse(ri, struct, init_lines, local_vars):
aspec = struct[anest]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
- ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+ ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
- ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})")
+ ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})")
ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
- ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, mnl_attr_get_type(attr)))")
- ri.cw.p('return MNL_CB_ERROR;')
+ ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))")
+ ri.cw.p('return YNL_PARSE_CB_ERROR;')
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -1712,13 +1691,13 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
ri.cw.block_start(line=iter_line)
- ri.cw.block_start(line=f"if (mnl_attr_get_type(attr) == {aspec.enum_name})")
+ ri.cw.block_start(line=f"if (ynl_attr_type(attr) == {aspec.enum_name})")
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr))")
- ri.cw.p('return MNL_CB_ERROR;')
+ ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.type in scalars:
- ri.cw.p(f"dst->{aspec.c_name}[i] = mnl_attr_get_{aspec.mnl_type()}(attr);")
+ ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);")
else:
raise Exception('Nest parsing type not supported yet')
ri.cw.p('i++;')
@@ -1730,7 +1709,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if struct.nested:
ri.cw.p('return 0;')
else:
- ri.cw.p('return MNL_CB_OK;')
+ ri.cw.p('return YNL_PARSE_CB_OK;')
ri.cw.block_end()
ri.cw.nl()
@@ -1760,10 +1739,9 @@ def parse_rsp_msg(ri, deref=False):
return
func_args = ['const struct nlmsghdr *nlh',
- 'void *data']
+ 'struct ynl_parse_arg *yarg']
local_vars = [f'{type_name(ri, "reply", deref=deref)} *dst;',
- 'struct ynl_parse_arg *yarg = data;',
'const struct nlattr *attr;']
init_lines = ['dst = yarg->data;']
@@ -1774,7 +1752,7 @@ def parse_rsp_msg(ri, deref=False):
else:
# Empty reply
ri.cw.block_start()
- ri.cw.p('return MNL_CB_OK;')
+ ri.cw.p('return YNL_PARSE_CB_OK;')
ri.cw.block_end()
ri.cw.nl()
@@ -1809,7 +1787,7 @@ def print_req(ri):
if ri.fixed_hdr:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
- ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);")
+ ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
ri.cw.nl()
@@ -1859,20 +1837,21 @@ def print_dump(ri):
ri.cw.write_func_lvar(local_vars)
- ri.cw.p('yds.ys = ys;')
+ ri.cw.p('yds.yarg.ys = ys;')
+ ri.cw.p(f"yds.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
+ ri.cw.p("yds.yarg.data = NULL;")
ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});")
ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;")
if ri.op.value is not None:
ri.cw.p(f'yds.rsp_cmd = {ri.op.enum_name};')
else:
ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
- ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
if ri.fixed_hdr:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
- ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);")
+ ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
ri.cw.nl()
@@ -2363,6 +2342,10 @@ def print_kernel_family_struct_hdr(family, cw):
cw.p(f"extern struct genl_family {family.c_name}_nl_family;")
cw.nl()
+ if 'sock-priv' in family.kernel_family:
+ cw.p(f'void {family.c_name}_nl_sock_priv_init({family.kernel_family["sock-priv"]} *priv);')
+ cw.p(f'void {family.c_name}_nl_sock_priv_destroy({family.kernel_family["sock-priv"]} *priv);')
+ cw.nl()
def print_kernel_family_struct_src(family, cw):
@@ -2384,6 +2367,11 @@ def print_kernel_family_struct_src(family, cw):
if family.mcgrps['list']:
cw.p(f'.mcgrps\t\t= {family.c_name}_nl_mcgrps,')
cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),')
+ if 'sock-priv' in family.kernel_family:
+ cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),')
+ # Force cast here, actual helpers take pointer to the real type.
+ cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,')
+ cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,')
cw.block_end(';')
@@ -2584,7 +2572,7 @@ def render_user_family(family, cw, prototype):
cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
if family.ntfs:
cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
- cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
cw.block_end(line=';')
@@ -2680,6 +2668,7 @@ def main():
cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
cw.nl()
headers = ['uapi/' + parsed.uapi_header]
+ headers += parsed.kernel_family.get('headers', [])
else:
cw.p('#include <stdlib.h>')
cw.p('#include <string.h>')
@@ -2700,7 +2689,6 @@ def main():
if args.mode == "user":
if not args.header:
- cw.p("#include <libmnl/libmnl.h>")
cw.p("#include <linux/genetlink.h>")
cw.nl()
for one in args.user_header:
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index 262d88f88696..927407b3efb3 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -189,12 +189,19 @@ def parse_operations(operations: List[Dict[str, Any]]) -> str:
def parse_entries(entries: List[Dict[str, Any]], level: int) -> str:
"""Parse a list of entries"""
+ ignored = ["pad"]
lines = []
for entry in entries:
if isinstance(entry, dict):
# entries could be a list or a dictionary
+ field_name = entry.get("name", "")
+ if field_name in ignored:
+ continue
+ type_ = entry.get("type")
+ if type_:
+ field_name += f" ({inline(type_)})"
lines.append(
- rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level)
+ rst_fields(field_name, sanitize(entry.get("doc", "")), level)
)
elif isinstance(entry, list):
lines.append(rst_list_inline(entry, level))
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 3bf506d4a63c..a6cf69a665e8 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -23,10 +23,16 @@ CONFIG_USB4=y
CONFIG_NET=y
CONFIG_MCTP=y
+CONFIG_MCTP_FLOWS=y
CONFIG_INET=y
CONFIG_MPTCP=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_WLAN_VENDOR_INTEL=y
+CONFIG_IWLWIFI=y
+
CONFIG_DAMON=y
CONFIG_DAMON_VADDR=y
CONFIG_DAMON_PADDR=y
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index cd9ae576bfde..d117e8a96ded 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -193,6 +193,8 @@ run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(BUILD) \
O=$(abs_objtree); \
done;
@@ -244,7 +246,10 @@ ifdef INSTALL_PATH
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \
+ INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(INSTALL_PATH) \
O=$(abs_objtree) \
$(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
index a52ecd43dbe3..ca81afa4ee90 100644
--- a/tools/testing/selftests/alsa/test-pcmtest-driver.c
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -127,11 +127,11 @@ FIXTURE_SETUP(pcmtest) {
int err;
if (geteuid())
- SKIP(exit(-1), "This test needs root to run!");
+ SKIP(return, "This test needs root to run!");
err = read_patterns();
if (err)
- SKIP(exit(-1), "Can't read patterns. Probably, module isn't loaded");
+ SKIP(return, "Can't read patterns. Probably, module isn't loaded");
card_name = malloc(127);
ASSERT_NE(card_name, NULL);
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 5c2cc7e8c5d0..d8ade15e2789 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,6 +1,5 @@
bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-exceptions # JIT does not support calling kfunc bpf_throw: -524
fexit_sleep # The test never returns. The remaining tests cannot start.
kprobe_multi_bench_attach # needs CONFIG_FPROBE
kprobe_multi_test # needs CONFIG_FPROBE
@@ -11,3 +10,5 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
+verifier_arena # JIT does not support arena
+arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 1a63996c0304..f4a2f66a683d 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -3,3 +3,6 @@
exceptions # JIT does not support calling kfunc bpf_throw (exceptions)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
+verifier_iterating_callbacks
+verifier_arena # JIT does not support arena
+arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fd15017ed3b1..3b9eb40d6343 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,13 +34,26 @@ LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
CFLAGS += -g $(OPT_FLAGS) -rdynamic \
- -Wall -Werror \
+ -Wall -Werror -fno-omit-frame-pointer \
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
LDFLAGS += $(SAN_LDFLAGS)
LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
+# The following tests perform type punning and they may break strict
+# aliasing rules, which are exploited by both GCC and clang by default
+# while optimizing. This can lead to broken programs.
+progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing
+progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing
+progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing
+progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing
+progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing
+progs/syscall.c-CFLAGS := -fno-strict-aliasing
+progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
+progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
+progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
+
ifneq ($(LLVM),)
# Silence some warnings when compiled with clang
CFLAGS += -Wno-unused-command-line-argument
@@ -64,6 +77,15 @@ TEST_INST_SUBDIRS := no_alu32
ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
TEST_INST_SUBDIRS += bpf_gcc
+
+# The following tests contain C code that, although technically legal,
+# triggers GCC warnings that cannot be disabled: declaration of
+# anonymous struct types in function parameter lists.
+progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error
endif
ifneq ($(CLANG_CPUV4),)
@@ -110,7 +132,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
- xdp_features
+ xdp_features bpf_test_no_cfi.ko
TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
@@ -175,8 +197,7 @@ endif
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
$(notdir $(TEST_GEN_PROGS) \
- $(TEST_GEN_PROGS_EXTENDED) \
- $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+ $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ;
# sort removes libbpf duplicates when not cross-building
MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
@@ -233,6 +254,12 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmo
$(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod
$(Q)cp bpf_testmod/bpf_testmod.ko $@
+$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch])
+ $(call msg,MOD,,$@)
+ $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation
+ $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi
+ $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@
+
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
ifneq ($(CROSS_COMPILE),)
CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
@@ -382,11 +409,11 @@ endif
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
- -I$(abspath $(OUTPUT)/../usr/include)
+ -I$(abspath $(OUTPUT)/../usr/include) \
+ -Wno-compare-distinct-pointer-types
# TODO: enable me -Wsign-compare
-CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
@@ -504,7 +531,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
$(wildcard $(BPFDIR)/*.bpf.h) \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
- $(TRUNNER_BPF_CFLAGS))
+ $(TRUNNER_BPF_CFLAGS) \
+ $$($$<-CFLAGS))
$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -514,6 +542,7 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@
$(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -522,6 +551,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o))
@@ -532,6 +562,7 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
$(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
$(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
endif
# ensure we set up tests.h header generation rule just once
@@ -606,6 +637,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
flow_dissector_load.h \
ip_check_defrag_frags.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
+ $(OUTPUT)/bpf_test_no_cfi.ko \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
$(OUTPUT)/sign-file \
@@ -729,11 +761,12 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
+EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature bpftool \
$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \
+ bpf_test_no_cfi.ko \
liburandom_read.so)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 9af79c7a9b58..9b974e425af3 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -115,7 +115,7 @@ the insn 20 undoes map_value addition. It is currently impossible for the
verifier to understand such speculative pointer arithmetic.
Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12.
-__ https://reviews.llvm.org/D85570
+__ https://github.com/llvm/llvm-project/commit/ddf1864ace484035e3cde5e83b3a31ac81e059c6
The corresponding C code
@@ -165,7 +165,7 @@ This is due to a llvm BPF backend bug. `The fix`__
has been pushed to llvm 10.x release branch and will be
available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
-__ https://reviews.llvm.org/D78466
+__ https://github.com/llvm/llvm-project/commit/3cb7e7bf959dcd3b8080986c62e10a75c7af43f0
bpf_verif_scale/loop6.bpf.o test failure with Clang 12
======================================================
@@ -204,7 +204,7 @@ r5(w5) is eventually saved on stack at insn #24 for later use.
This cause later verifier failure. The bug has been `fixed`__ in
Clang 13.
-__ https://reviews.llvm.org/D97479
+__ https://github.com/llvm/llvm-project/commit/1959ead525b8830cc8a345f45e1c3ef9902d3229
BPF CO-RE-based tests and Clang version
=======================================
@@ -221,11 +221,11 @@ failures:
- __builtin_btf_type_id() [0_, 1_, 2_];
- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_].
-.. _0: https://reviews.llvm.org/D74572
-.. _1: https://reviews.llvm.org/D74668
-.. _2: https://reviews.llvm.org/D85174
-.. _3: https://reviews.llvm.org/D83878
-.. _4: https://reviews.llvm.org/D83242
+.. _0: https://github.com/llvm/llvm-project/commit/6b01b465388b204d543da3cf49efd6080db094a9
+.. _1: https://github.com/llvm/llvm-project/commit/072cde03aaa13a2c57acf62d79876bf79aa1919f
+.. _2: https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
+.. _3: https://github.com/llvm/llvm-project/commit/6d218b4adb093ff2e9764febbbc89f429412006c
+.. _4: https://github.com/llvm/llvm-project/commit/6d6750696400e7ce988d66a1a00e1d0cb32815f8
Floating-point tests and Clang version
======================================
@@ -234,7 +234,7 @@ Certain selftests, e.g. core_reloc, require support for the floating-point
types, which was introduced in `Clang 13`__. The older Clang versions will
either crash when compiling these tests, or generate an incorrect BTF.
-__ https://reviews.llvm.org/D83289
+__ https://github.com/llvm/llvm-project/commit/a7137b238a07d9399d3ae96c0b461571bd5aa8b2
Kernel function call test and Clang version
===========================================
@@ -248,7 +248,7 @@ Without it, the error from compiling bpf selftests looks like:
libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
-__ https://reviews.llvm.org/D93563
+__ https://github.com/llvm/llvm-project/commit/886f9ff53155075bd5f1e994f17b85d1e1b7470c
btf_tag test and Clang version
==============================
@@ -264,8 +264,8 @@ Without them, the btf_tag selftest will be skipped and you will observe:
#<test_num> btf_tag:SKIP
-.. _0: https://reviews.llvm.org/D111588
-.. _1: https://reviews.llvm.org/D111199
+.. _0: https://github.com/llvm/llvm-project/commit/a162b67c98066218d0d00aa13b99afb95d9bb5e6
+.. _1: https://github.com/llvm/llvm-project/commit/3466e00716e12e32fdb100e3fcfca5c2b3e8d784
Clang dependencies for static linking tests
===========================================
@@ -274,7 +274,7 @@ linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
generate valid BTF information for weak variables. Please make sure you use
Clang that contains the fix.
-__ https://reviews.llvm.org/D100362
+__ https://github.com/llvm/llvm-project/commit/968292cb93198442138128d850fd54dc7edc0035
Clang relocation changes
========================
@@ -292,7 +292,7 @@ Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
To fix this issue, user newer libbpf.
.. Links
-.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _clang reloc patch: https://github.com/llvm/llvm-project/commit/6a2ea84600ba4bd3b2733bd8f08f5115eb32164b
.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
Clang dependencies for the u32 spill test (xdpwall)
@@ -304,6 +304,6 @@ from running test_progs will look like:
.. code-block:: console
- test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007
+ test_xdpwall:FAIL:Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5? unexpected error: -4007
-__ https://reviews.llvm.org/D109073
+__ https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 73ce11b0547d..b2b4c391eb0a 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -323,14 +323,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
break;
case 'p':
env.producer_cnt = strtol(arg, NULL, 10);
- if (env.producer_cnt <= 0) {
+ if (env.producer_cnt < 0) {
fprintf(stderr, "Invalid producer count: %s\n", arg);
argp_usage(state);
}
break;
case 'c':
env.consumer_cnt = strtol(arg, NULL, 10);
- if (env.consumer_cnt <= 0) {
+ if (env.consumer_cnt < 0) {
fprintf(stderr, "Invalid consumer count: %s\n", arg);
argp_usage(state);
}
@@ -495,14 +495,20 @@ extern const struct bench bench_trig_base;
extern const struct bench bench_trig_tp;
extern const struct bench bench_trig_rawtp;
extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_kretprobe;
+extern const struct bench bench_trig_kprobe_multi;
+extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_uprobe_base;
-extern const struct bench bench_trig_uprobe_with_nop;
-extern const struct bench bench_trig_uretprobe_with_nop;
-extern const struct bench bench_trig_uprobe_without_nop;
-extern const struct bench bench_trig_uretprobe_without_nop;
+extern const struct bench bench_trig_uprobe_nop;
+extern const struct bench bench_trig_uretprobe_nop;
+extern const struct bench bench_trig_uprobe_push;
+extern const struct bench bench_trig_uretprobe_push;
+extern const struct bench bench_trig_uprobe_ret;
+extern const struct bench bench_trig_uretprobe_ret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -537,14 +543,20 @@ static const struct bench *benchs[] = {
&bench_trig_tp,
&bench_trig_rawtp,
&bench_trig_kprobe,
+ &bench_trig_kretprobe,
+ &bench_trig_kprobe_multi,
+ &bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_fexit,
&bench_trig_fentry_sleep,
&bench_trig_fmodret,
&bench_trig_uprobe_base,
- &bench_trig_uprobe_with_nop,
- &bench_trig_uretprobe_with_nop,
- &bench_trig_uprobe_without_nop,
- &bench_trig_uretprobe_without_nop,
+ &bench_trig_uprobe_nop,
+ &bench_trig_uretprobe_nop,
+ &bench_trig_uprobe_push,
+ &bench_trig_uretprobe_push,
+ &bench_trig_uprobe_ret,
+ &bench_trig_uretprobe_ret,
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
@@ -607,6 +619,10 @@ static void setup_benchmark(void)
bench->setup();
for (i = 0; i < env.consumer_cnt; i++) {
+ if (!bench->consumer_thread) {
+ fprintf(stderr, "benchmark doesn't support consumers!\n");
+ exit(1);
+ }
err = pthread_create(&state.consumers[i], NULL,
bench->consumer_thread, (void *)(long)i);
if (err) {
@@ -626,6 +642,10 @@ static void setup_benchmark(void)
env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
for (i = 0; i < env.producer_cnt; i++) {
+ if (!bench->producer_thread) {
+ fprintf(stderr, "benchmark doesn't support producers!\n");
+ exit(1);
+ }
err = pthread_create(&state.producers[i], NULL,
bench->producer_thread, (void *)(long)i);
if (err) {
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index dbd362771d6a..ace0d1011a8e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -85,12 +85,36 @@ static void trigger_kprobe_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
+static void trigger_kretprobe_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
+}
+
+static void trigger_kprobe_multi_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
+}
+
+static void trigger_kretprobe_multi_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
+}
+
static void trigger_fentry_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void trigger_fexit_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fexit);
+}
+
static void trigger_fentry_sleep_setup(void)
{
setup_ctx();
@@ -113,12 +137,25 @@ static void trigger_fmodret_setup(void)
* GCC doesn't generate stack setup preample for these functions due to them
* having no input arguments and doing nothing in the body.
*/
-__weak void uprobe_target_with_nop(void)
+__weak void uprobe_target_nop(void)
{
asm volatile ("nop");
}
-__weak void uprobe_target_without_nop(void)
+__weak void opaque_noop_func(void)
+{
+}
+
+__weak int uprobe_target_push(void)
+{
+ /* overhead of function call is negligible compared to uprobe
+ * triggering, so this shouldn't affect benchmark results much
+ */
+ opaque_noop_func();
+ return 1;
+}
+
+__weak void uprobe_target_ret(void)
{
asm volatile ("");
}
@@ -126,27 +163,34 @@ __weak void uprobe_target_without_nop(void)
static void *uprobe_base_producer(void *input)
{
while (true) {
- uprobe_target_with_nop();
+ uprobe_target_nop();
atomic_inc(&base_hits.value);
}
return NULL;
}
-static void *uprobe_producer_with_nop(void *input)
+static void *uprobe_producer_nop(void *input)
+{
+ while (true)
+ uprobe_target_nop();
+ return NULL;
+}
+
+static void *uprobe_producer_push(void *input)
{
while (true)
- uprobe_target_with_nop();
+ uprobe_target_push();
return NULL;
}
-static void *uprobe_producer_without_nop(void *input)
+static void *uprobe_producer_ret(void *input)
{
while (true)
- uprobe_target_without_nop();
+ uprobe_target_ret();
return NULL;
}
-static void usetup(bool use_retprobe, bool use_nop)
+static void usetup(bool use_retprobe, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
@@ -159,11 +203,7 @@ static void usetup(bool use_retprobe, bool use_nop)
exit(1);
}
- if (use_nop)
- uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
- else
- uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
-
+ uprobe_offset = get_uprobe_offset(target_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
@@ -176,24 +216,34 @@ static void usetup(bool use_retprobe, bool use_nop)
ctx.skel->links.bench_trigger_uprobe = link;
}
-static void uprobe_setup_with_nop(void)
+static void uprobe_setup_nop(void)
{
- usetup(false, true);
+ usetup(false, &uprobe_target_nop);
}
-static void uretprobe_setup_with_nop(void)
+static void uretprobe_setup_nop(void)
{
- usetup(true, true);
+ usetup(true, &uprobe_target_nop);
}
-static void uprobe_setup_without_nop(void)
+static void uprobe_setup_push(void)
{
- usetup(false, false);
+ usetup(false, &uprobe_target_push);
}
-static void uretprobe_setup_without_nop(void)
+static void uretprobe_setup_push(void)
{
- usetup(true, false);
+ usetup(true, &uprobe_target_push);
+}
+
+static void uprobe_setup_ret(void)
+{
+ usetup(false, &uprobe_target_ret);
+}
+
+static void uretprobe_setup_ret(void)
+{
+ usetup(true, &uprobe_target_ret);
}
const struct bench bench_trig_base = {
@@ -235,6 +285,36 @@ const struct bench bench_trig_kprobe = {
.report_final = hits_drops_report_final,
};
+const struct bench bench_trig_kretprobe = {
+ .name = "trig-kretprobe",
+ .validate = trigger_validate,
+ .setup = trigger_kretprobe_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe_multi = {
+ .name = "trig-kprobe-multi",
+ .validate = trigger_validate,
+ .setup = trigger_kprobe_multi_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kretprobe_multi = {
+ .name = "trig-kretprobe-multi",
+ .validate = trigger_validate,
+ .setup = trigger_kretprobe_multi_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
const struct bench bench_trig_fentry = {
.name = "trig-fentry",
.validate = trigger_validate,
@@ -245,6 +325,16 @@ const struct bench bench_trig_fentry = {
.report_final = hits_drops_report_final,
};
+const struct bench bench_trig_fexit = {
+ .name = "trig-fexit",
+ .validate = trigger_validate,
+ .setup = trigger_fexit_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
const struct bench bench_trig_fentry_sleep = {
.name = "trig-fentry-sleep",
.validate = trigger_validate,
@@ -274,37 +364,55 @@ const struct bench bench_trig_uprobe_base = {
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uprobe_with_nop = {
- .name = "trig-uprobe-with-nop",
- .setup = uprobe_setup_with_nop,
- .producer_thread = uprobe_producer_with_nop,
+const struct bench bench_trig_uprobe_nop = {
+ .name = "trig-uprobe-nop",
+ .setup = uprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_nop = {
+ .name = "trig-uretprobe-nop",
+ .setup = uretprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_push = {
+ .name = "trig-uprobe-push",
+ .setup = uprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uretprobe_with_nop = {
- .name = "trig-uretprobe-with-nop",
- .setup = uretprobe_setup_with_nop,
- .producer_thread = uprobe_producer_with_nop,
+const struct bench bench_trig_uretprobe_push = {
+ .name = "trig-uretprobe-push",
+ .setup = uretprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uprobe_without_nop = {
- .name = "trig-uprobe-without-nop",
- .setup = uprobe_setup_without_nop,
- .producer_thread = uprobe_producer_without_nop,
+const struct bench bench_trig_uprobe_ret = {
+ .name = "trig-uprobe-ret",
+ .setup = uprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uretprobe_without_nop = {
- .name = "trig-uretprobe-without-nop",
- .setup = uretprobe_setup_without_nop,
- .producer_thread = uprobe_producer_without_nop,
+const struct bench bench_trig_uretprobe_ret = {
+ .name = "trig-uretprobe-ret",
+ .setup = uretprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
new file mode 100755
index 000000000000..9bdcc74e03a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base {uprobe,uretprobe}-{nop,push,ret}
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/bpf_arena_alloc.h b/tools/testing/selftests/bpf/bpf_arena_alloc.h
new file mode 100644
index 000000000000..c27678299e0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_alloc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+#ifndef __round_mask
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#endif
+#ifndef round_up
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#endif
+
+#ifdef __BPF__
+#define NR_CPUS (sizeof(struct cpumask) * 8)
+
+static void __arena * __arena page_frag_cur_page[NR_CPUS];
+static int __arena page_frag_cur_offset[NR_CPUS];
+
+/* Simple page_frag allocator */
+static inline void __arena* bpf_alloc(unsigned int size)
+{
+ __u64 __arena *obj_cnt;
+ __u32 cpu = bpf_get_smp_processor_id();
+ void __arena *page = page_frag_cur_page[cpu];
+ int __arena *cur_offset = &page_frag_cur_offset[cpu];
+ int offset;
+
+ size = round_up(size, 8);
+ if (size >= PAGE_SIZE - 8)
+ return NULL;
+ if (!page) {
+refill:
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return NULL;
+ cast_kern(page);
+ page_frag_cur_page[cpu] = page;
+ *cur_offset = PAGE_SIZE - 8;
+ obj_cnt = page + PAGE_SIZE - 8;
+ *obj_cnt = 0;
+ } else {
+ cast_kern(page);
+ obj_cnt = page + PAGE_SIZE - 8;
+ }
+
+ offset = *cur_offset - size;
+ if (offset < 0)
+ goto refill;
+
+ (*obj_cnt)++;
+ *cur_offset = offset;
+ return page + offset;
+}
+
+static inline void bpf_free(void __arena *addr)
+{
+ __u64 __arena *obj_cnt;
+
+ addr = (void __arena *)(((long)addr) & ~(PAGE_SIZE - 1));
+ obj_cnt = addr + PAGE_SIZE - 8;
+ if (--(*obj_cnt) == 0)
+ bpf_arena_free_pages(&arena, addr, 1);
+}
+#else
+static inline void __arena* bpf_alloc(unsigned int size) { return NULL; }
+static inline void bpf_free(void __arena *addr) {}
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
new file mode 100644
index 000000000000..bcf195c64a45
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+#endif
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member) \
+ ({ \
+ void __arena *__mptr = (void __arena *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+#ifdef __BPF__ /* when compiled as bpf program */
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+#define __arena
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+ int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+#else /* when compiled as user space code */
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+__weak char arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+ int node_id, __u64 flags)
+{
+ return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_htab.h b/tools/testing/selftests/bpf/bpf_arena_htab.h
new file mode 100644
index 000000000000..acc01a876668
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_htab.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include <errno.h>
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct htab_bucket {
+ struct arena_list_head head;
+};
+typedef struct htab_bucket __arena htab_bucket_t;
+
+struct htab {
+ htab_bucket_t *buckets;
+ int n_buckets;
+};
+typedef struct htab __arena htab_t;
+
+static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash)
+{
+ htab_bucket_t *b = htab->buckets;
+
+ cast_kern(b);
+ return &b[hash & (htab->n_buckets - 1)];
+}
+
+static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash)
+{
+ return &__select_bucket(htab, hash)->head;
+}
+
+struct hashtab_elem {
+ int hash;
+ int key;
+ int value;
+ struct arena_list_node hash_node;
+};
+typedef struct hashtab_elem __arena hashtab_elem_t;
+
+static hashtab_elem_t *lookup_elem_raw(arena_list_head_t *head, __u32 hash, int key)
+{
+ hashtab_elem_t *l;
+
+ list_for_each_entry(l, head, hash_node)
+ if (l->hash == hash && l->key == key)
+ return l;
+
+ return NULL;
+}
+
+static int htab_hash(int key)
+{
+ return key;
+}
+
+__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key)
+{
+ hashtab_elem_t *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+ if (l_old)
+ return l_old->value;
+ return 0;
+}
+
+__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value)
+{
+ hashtab_elem_t *l_new = NULL, *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+
+ l_new = bpf_alloc(sizeof(*l_new));
+ if (!l_new)
+ return -ENOMEM;
+ l_new->key = key;
+ l_new->hash = htab_hash(key);
+ l_new->value = value;
+
+ list_add_head(&l_new->hash_node, head);
+ if (l_old) {
+ list_del(&l_old->hash_node);
+ bpf_free(l_old);
+ }
+ return 0;
+}
+
+void htab_init(htab_t *htab)
+{
+ void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+
+ cast_user(buckets);
+ htab->buckets = buckets;
+ htab->n_buckets = 2 * PAGE_SIZE / sizeof(struct htab_bucket);
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
new file mode 100644
index 000000000000..b99b9f408eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+struct arena_list_node;
+
+typedef struct arena_list_node __arena arena_list_node_t;
+
+struct arena_list_node {
+ arena_list_node_t *next;
+ arena_list_node_t * __arena *pprev;
+};
+
+struct arena_list_head {
+ struct arena_list_node __arena *first;
+};
+typedef struct arena_list_head __arena arena_list_head_t;
+
+#define list_entry(ptr, type, member) arena_container_of(ptr, type, member)
+
+#define list_entry_safe(ptr, type, member) \
+ ({ typeof(*ptr) * ___ptr = (ptr); \
+ ___ptr ? ({ cast_kern(___ptr); list_entry(___ptr, type, member); }) : NULL; \
+ })
+
+#ifndef __BPF__
+static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { return NULL; }
+static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
+static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
+#define cond_break ({})
+#endif
+
+/* Safely walk link list elements. Deletion of elements is allowed. */
+#define list_for_each_entry(pos, head, member) \
+ for (void * ___tmp = (pos = list_entry_safe((head)->first, \
+ typeof(*(pos)), member), \
+ (void *)0); \
+ pos && ({ ___tmp = (void *)pos->member.next; 1; }); \
+ cond_break, \
+ pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
+
+static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
+{
+ arena_list_node_t *first = h->first, * __arena *tmp;
+
+ cast_user(first);
+ cast_kern(n);
+ WRITE_ONCE(n->next, first);
+ cast_kern(first);
+ if (first) {
+ tmp = &n->next;
+ cast_user(tmp);
+ WRITE_ONCE(first->pprev, tmp);
+ }
+ cast_user(n);
+ WRITE_ONCE(h->first, n);
+
+ tmp = &h->first;
+ cast_user(tmp);
+ cast_kern(n);
+ WRITE_ONCE(n->pprev, tmp);
+}
+
+static inline void __list_del(arena_list_node_t *n)
+{
+ arena_list_node_t *next = n->next, *tmp;
+ arena_list_node_t * __arena *pprev = n->pprev;
+
+ cast_user(next);
+ cast_kern(pprev);
+ tmp = *pprev;
+ cast_kern(tmp);
+ WRITE_ONCE(tmp, next);
+ if (next) {
+ cast_user(pprev);
+ cast_kern(next);
+ WRITE_ONCE(next->pprev, pprev);
+ }
+}
+
+#define POISON_POINTER_DELTA 0
+
+#define LIST_POISON1 ((void __arena *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void __arena *) 0x122 + POISON_POINTER_DELTA)
+
+static inline void list_del(arena_list_node_t *n)
+{
+ __list_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index f44875f8b367..a5b9df38c162 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -260,11 +260,11 @@ extern void bpf_throw(u64 cookie) __ksym;
#define __is_signed_type(type) (((type)(-1)) < (type)1)
-#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \
+#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \
({ \
__label__ l_true; \
bool ret = DEFAULT; \
- asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \
+ asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \
:: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
ret = !DEFAULT; \
l_true: \
@@ -276,7 +276,7 @@ l_true: \
* __lhs OP __rhs below will catch the mistake.
* Be aware that we check only __lhs to figure out the sign of compare.
*/
-#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \
+#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \
({ \
typeof(LHS) __lhs = (LHS); \
typeof(RHS) __rhs = (RHS); \
@@ -285,14 +285,17 @@ l_true: \
(void)(__lhs OP __rhs); \
if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \
if (sizeof(__rhs) == 8) \
- ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \
+ /* "i" will truncate 64-bit constant into s32, \
+ * so we have to use extra register via "r". \
+ */ \
+ ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \
else \
- ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \
+ ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \
} else { \
if (sizeof(__rhs) == 8) \
- ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \
+ ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \
else \
- ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \
+ ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \
} \
ret; \
})
@@ -304,7 +307,7 @@ l_true: \
#ifndef bpf_cmp_likely
#define bpf_cmp_likely(LHS, OP, RHS) \
({ \
- bool ret; \
+ bool ret = 0; \
if (__builtin_strcmp(#OP, "==") == 0) \
ret = _bpf_cmp(LHS, !=, RHS, false); \
else if (__builtin_strcmp(#OP, "!=") == 0) \
@@ -318,16 +321,71 @@ l_true: \
else if (__builtin_strcmp(#OP, ">=") == 0) \
ret = _bpf_cmp(LHS, <, RHS, false); \
else \
- (void) "bug"; \
+ asm volatile("r0 " #OP " invalid compare"); \
ret; \
})
#endif
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
#endif
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+ asm volatile(".byte 0xBF; \
+ .ifc %[reg], r0; \
+ .byte 0x00; \
+ .endif; \
+ .ifc %[reg], r1; \
+ .byte 0x11; \
+ .endif; \
+ .ifc %[reg], r2; \
+ .byte 0x22; \
+ .endif; \
+ .ifc %[reg], r3; \
+ .byte 0x33; \
+ .endif; \
+ .ifc %[reg], r4; \
+ .byte 0x44; \
+ .endif; \
+ .ifc %[reg], r5; \
+ .byte 0x55; \
+ .endif; \
+ .ifc %[reg], r6; \
+ .byte 0x66; \
+ .endif; \
+ .ifc %[reg], r7; \
+ .byte 0x77; \
+ .endif; \
+ .ifc %[reg], r8; \
+ .byte 0x88; \
+ .endif; \
+ .ifc %[reg], r9; \
+ .byte 0x99; \
+ .endif; \
+ .short %[off]; \
+ .long %[as]" \
+ : [reg]"+r"(var) \
+ : [off]"i"(BPF_ADDR_SPACE_CAST) \
+ , [as]"i"((dst_as << 16) | src_as));
+#endif
+
/* Description
* Assert that a conditional expression is true.
* Returns
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index b4e78c1eb37b..14ebe7d9e1a3 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -9,7 +9,7 @@ struct bpf_sock_addr_kern;
* Error code
*/
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
- struct bpf_dynptr *ptr__uninit) __ksym;
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
/* Description
* Initializes an xdp-type dynptr
@@ -17,7 +17,7 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
* Error code
*/
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
- struct bpf_dynptr *ptr__uninit) __ksym;
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
/* Description
* Obtain a read-only pointer to the dynptr's data
@@ -26,7 +26,7 @@ extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym;
+ void *buffer, __u32 buffer__szk) __ksym __weak;
/* Description
* Obtain a read-write pointer to the dynptr's data
@@ -35,13 +35,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym;
+ void *buffer, __u32 buffer__szk) __ksym __weak;
-extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym;
-extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym;
-extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym;
-extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym;
-extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
+extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
/* Description
* Modify the address of a AF_UNIX sockaddr.
@@ -51,9 +51,19 @@ extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clo
extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
const __u8 *sun_path, __u32 sun_path__sz) __ksym;
+/* Description
+ * Allocate and configure a reqsk and link it with a listener and skb.
+ * Returns
+ * Error code
+ */
+struct sock;
+struct bpf_tcp_req_attrs;
+extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz) __ksym;
+
void *bpf_cast_to_kern_ctx(void *) __ksym;
-void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
extern int bpf_get_file_xattr(struct file *file, const char *name,
struct bpf_dynptr *value_ptr) __ksym;
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
new file mode 100644
index 000000000000..ed5143b79edf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
@@ -0,0 +1,19 @@
+BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_test_no_cfi.ko
+
+obj-m += bpf_test_no_cfi.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean
+
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
new file mode 100644
index 000000000000..b1dd889d5d7d
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+struct bpf_test_no_cfi_ops {
+ void (*fn_1)(void);
+ void (*fn_2)(void);
+};
+
+static int dummy_init(struct btf *btf)
+{
+ return 0;
+}
+
+static int dummy_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static int dummy_reg(void *kdata)
+{
+ return 0;
+}
+
+static void dummy_unreg(void *kdata)
+{
+}
+
+static const struct bpf_verifier_ops dummy_verifier_ops;
+
+static void bpf_test_no_cfi_ops__fn_1(void)
+{
+}
+
+static void bpf_test_no_cfi_ops__fn_2(void)
+{
+}
+
+static struct bpf_test_no_cfi_ops __test_no_cif_ops = {
+ .fn_1 = bpf_test_no_cfi_ops__fn_1,
+ .fn_2 = bpf_test_no_cfi_ops__fn_2,
+};
+
+static struct bpf_struct_ops test_no_cif_ops = {
+ .verifier_ops = &dummy_verifier_ops,
+ .init = dummy_init,
+ .init_member = dummy_init_member,
+ .reg = dummy_reg,
+ .unreg = dummy_unreg,
+ .name = "bpf_test_no_cfi_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_test_no_cfi_init(void)
+{
+ int ret;
+
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ if (!ret)
+ return -EINVAL;
+
+ test_no_cif_ops.cfi_stubs = &__test_no_cif_ops;
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ return ret;
+}
+
+static void bpf_test_no_cfi_exit(void)
+{
+}
+
+module_init(bpf_test_no_cfi_init);
+module_exit(bpf_test_no_cfi_exit);
+
+MODULE_AUTHOR("Kuifeng Lee");
+MODULE_DESCRIPTION("BPF no cfi_stubs test module");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 91907b321f91..39ad96a18123 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
+#include <linux/delay.h>
#include <linux/error-injection.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -341,12 +343,12 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
-BTF_SET8_START(bpf_testmod_common_kfunc_ids)
+BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_kfunc_common_test)
-BTF_SET8_END(bpf_testmod_common_kfunc_ids)
+BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.owner = THIS_MODULE,
@@ -492,7 +494,7 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused
return arg;
}
-BTF_SET8_START(bpf_testmod_check_kfunc_ids)
+BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
@@ -518,13 +520,120 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
-BTF_SET8_END(bpf_testmod_check_kfunc_ids)
+BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
+
+static int bpf_testmod_ops_init(struct btf *btf)
+{
+ return 0;
+}
+
+static bool bpf_testmod_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_testmod_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) {
+ /* For data fields, this function has to copy it and return
+ * 1 to indicate that the data has been handled by the
+ * struct_ops type, or the verifier will reject the map if
+ * the value of the data field is not zero.
+ */
+ ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data;
+ return 1;
+ }
+ return 0;
+}
static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_testmod_check_kfunc_ids,
};
+static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .is_valid_access = bpf_testmod_ops_is_valid_access,
+};
+
+static int bpf_dummy_reg(void *kdata)
+{
+ struct bpf_testmod_ops *ops = kdata;
+
+ if (ops->test_1)
+ ops->test_1();
+ /* Some test cases (ex. struct_ops_maybe_null) may not have test_2
+ * initialized, so we need to check for NULL.
+ */
+ if (ops->test_2)
+ ops->test_2(4, ops->data);
+
+ return 0;
+}
+
+static void bpf_dummy_unreg(void *kdata)
+{
+}
+
+static int bpf_testmod_test_1(void)
+{
+ return 0;
+}
+
+static void bpf_testmod_test_2(int a, int b)
+{
+}
+
+static int bpf_testmod_ops__test_maybe_null(int dummy,
+ struct task_struct *task__nullable)
+{
+ return 0;
+}
+
+static struct bpf_testmod_ops __bpf_testmod_ops = {
+ .test_1 = bpf_testmod_test_1,
+ .test_2 = bpf_testmod_test_2,
+ .test_maybe_null = bpf_testmod_ops__test_maybe_null,
+};
+
+struct bpf_struct_ops bpf_bpf_testmod_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops,
+ .name = "bpf_testmod_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_dummy_reg2(void *kdata)
+{
+ struct bpf_testmod_ops2 *ops = kdata;
+
+ ops->test_1();
+ return 0;
+}
+
+static struct bpf_testmod_ops2 __bpf_testmod_ops2 = {
+ .test_1 = bpf_testmod_test_1,
+};
+
+struct bpf_struct_ops bpf_testmod_ops2 = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg2,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops2,
+ .name = "bpf_testmod_ops2",
+ .owner = THIS_MODULE,
+};
+
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@@ -535,6 +644,8 @@ static int bpf_testmod_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
+ ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
if (ret < 0)
return ret;
if (bpf_fentry_test1(0) < 0)
@@ -544,7 +655,15 @@ static int bpf_testmod_init(void)
static void bpf_testmod_exit(void)
{
- return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ /* Need to wait for all references to be dropped because
+ * bpf_kfunc_call_test_release() which currently resides in kernel can
+ * be called after bpf_testmod is unloaded. Once release function is
+ * moved into the module this wait can be removed.
+ */
+ while (refcount_read(&prog_test_struct.cnt) > 1)
+ msleep(20);
+
+ sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
module_init(bpf_testmod_init);
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index f32793efe095..23fa1872ee67 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -5,6 +5,8 @@
#include <linux/types.h>
+struct task_struct;
+
struct bpf_testmod_test_read_ctx {
char *buf;
loff_t off;
@@ -28,4 +30,67 @@ struct bpf_iter_testmod_seq {
int cnt;
};
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ /* Used to test nullable arguments. */
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+
+ /* The following fields are used to test shadow copies. */
+ char onebyte;
+ struct {
+ int a;
+ int b;
+ } unsupported;
+ int data;
+
+ /* The following pointers are used to test the maps having multiple
+ * pages of trampolines.
+ */
+ int (*tramp_1)(int value);
+ int (*tramp_2)(int value);
+ int (*tramp_3)(int value);
+ int (*tramp_4)(int value);
+ int (*tramp_5)(int value);
+ int (*tramp_6)(int value);
+ int (*tramp_7)(int value);
+ int (*tramp_8)(int value);
+ int (*tramp_9)(int value);
+ int (*tramp_10)(int value);
+ int (*tramp_11)(int value);
+ int (*tramp_12)(int value);
+ int (*tramp_13)(int value);
+ int (*tramp_14)(int value);
+ int (*tramp_15)(int value);
+ int (*tramp_16)(int value);
+ int (*tramp_17)(int value);
+ int (*tramp_18)(int value);
+ int (*tramp_19)(int value);
+ int (*tramp_20)(int value);
+ int (*tramp_21)(int value);
+ int (*tramp_22)(int value);
+ int (*tramp_23)(int value);
+ int (*tramp_24)(int value);
+ int (*tramp_25)(int value);
+ int (*tramp_26)(int value);
+ int (*tramp_27)(int value);
+ int (*tramp_28)(int value);
+ int (*tramp_29)(int value);
+ int (*tramp_30)(int value);
+ int (*tramp_31)(int value);
+ int (*tramp_32)(int value);
+ int (*tramp_33)(int value);
+ int (*tramp_34)(int value);
+ int (*tramp_35)(int value);
+ int (*tramp_36)(int value);
+ int (*tramp_37)(int value);
+ int (*tramp_38)(int value);
+ int (*tramp_39)(int value);
+ int (*tramp_40)(int value);
+};
+
+struct bpf_testmod_ops2 {
+ int (*test_1)(void);
+};
+
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c125c441abc7..01f241ea2c67 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -81,6 +81,7 @@ CONFIG_NF_NAT=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
+CONFIG_SYN_COOKIES=y
CONFIG_TEST_BPF=m
CONFIG_USERFAULTFD=y
CONFIG_VSOCKETS=y
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
new file mode 100644
index 000000000000..0766702de846
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+
+#include "arena_htab_asm.skel.h"
+#include "arena_htab.skel.h"
+
+#define PAGE_SIZE 4096
+
+#include "bpf_arena_htab.h"
+
+static void test_arena_htab_common(struct htab *htab)
+{
+ int i;
+
+ printf("htab %p buckets %p n_buckets %d\n", htab, htab->buckets, htab->n_buckets);
+ ASSERT_OK_PTR(htab->buckets, "htab->buckets shouldn't be NULL");
+ for (i = 0; htab->buckets && i < 16; i += 4) {
+ /*
+ * Walk htab buckets and link lists since all pointers are correct,
+ * though they were written by bpf program.
+ */
+ int val = htab_lookup_elem(htab, i);
+
+ ASSERT_EQ(i, val, "key == value");
+ }
+}
+
+static void test_arena_htab_llvm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab *skel;
+ struct htab *htab;
+ size_t arena_sz;
+ void *area;
+ int ret;
+
+ skel = arena_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab__open_and_load"))
+ return;
+
+ area = bpf_map__initial_value(skel->maps.arena, &arena_sz);
+ /* fault-in a page with pgoff == 0 as sanity check */
+ *(volatile int *)area = 0x55aa;
+
+ /* bpf prog will allocate more pages */
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_llvm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+out:
+ arena_htab__destroy(skel);
+}
+
+static void test_arena_htab_asm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab_asm *skel;
+ struct htab *htab;
+ int ret;
+
+ skel = arena_htab_asm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab_asm__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_asm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+ arena_htab_asm__destroy(skel);
+}
+
+void test_arena_htab(void)
+{
+ if (test__start_subtest("arena_htab_llvm"))
+ test_arena_htab_llvm();
+ if (test__start_subtest("arena_htab_asm"))
+ test_arena_htab_asm();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
new file mode 100644
index 000000000000..e61886debab1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+
+#define PAGE_SIZE 4096
+
+#include "bpf_arena_list.h"
+#include "arena_list.skel.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+static int list_sum(struct arena_list_head *head)
+{
+ struct elem __arena *n;
+ int sum = 0;
+
+ list_for_each_entry(n, head, node)
+ sum += n->value;
+ return sum;
+}
+
+static void test_arena_list_add_del(int cnt)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_list *skel;
+ int expected_sum = (u64)cnt * (cnt - 1) / 2;
+ int ret, sum;
+
+ skel = arena_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
+ return;
+
+ skel->bss->cnt = cnt;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, expected_sum, "sum of elems");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+ ASSERT_EQ(skel->arena->test_val, cnt + 1, "num of elems");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_del), &opts);
+ ASSERT_OK(ret, "ret_del");
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, 0, "sum of list elems after del");
+ ASSERT_EQ(skel->bss->list_sum, expected_sum, "sum of list elems computed by prog");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+out:
+ arena_list__destroy(skel);
+}
+
+void test_arena_list(void)
+{
+ if (test__start_subtest("arena_list_1"))
+ test_arena_list_add_del(1);
+ if (test__start_subtest("arena_list_1000"))
+ test_arena_list_add_del(1000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
new file mode 100644
index 000000000000..6a707213e46b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "bad_struct_ops.skel.h"
+#include "bad_struct_ops2.skel.h"
+
+static void invalid_prog_reuse(void)
+{
+ struct bad_struct_ops *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops__load(skel);
+ log = stop_libbpf_log_capture();
+ ASSERT_ERR(err, "bad_struct_ops__load should fail");
+ ASSERT_HAS_SUBSTR(log,
+ "struct_ops init_kern testmod_2 func ptr test_1: invalid reuse of prog test_1",
+ "expected init_kern message");
+
+cleanup:
+ free(log);
+ bad_struct_ops__destroy(skel);
+}
+
+static void unused_program(void)
+{
+ struct bad_struct_ops2 *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops2__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops2__open"))
+ return;
+
+ /* struct_ops programs not referenced from any maps are open
+ * with autoload set to true.
+ */
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo autoload == true");
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops2__load(skel);
+ ASSERT_ERR(err, "bad_struct_ops2__load should fail");
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log, "prog 'foo': failed to load",
+ "message about 'foo' failing to load");
+
+cleanup:
+ free(log);
+ bad_struct_ops2__destroy(skel);
+}
+
+void test_bad_struct_ops(void)
+{
+ if (test__start_subtest("invalid_prog_reuse"))
+ invalid_prog_reuse();
+ if (test__start_subtest("unused_program"))
+ unused_program();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e770912fc1d2..4c6ada5b270b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
}
bpf_program__set_type(prog, type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
+ bpf_program__set_flags(prog, testing_prog_flags());
bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 816145bcb647..00965a6e83bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3535,6 +3535,32 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 1,
.max_entries = 1,
},
+{
+ .descr = "datasec: name '?.foo bar:buz' is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0?.foo bar:buz"),
+},
+{
+ .descr = "type name '?foo' is not ok",
+ .raw_types = {
+ /* union ?foo; */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0?foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
{
.descr = "float test #1, well-formed",
@@ -4363,6 +4389,9 @@ static void do_test_raw(unsigned int test_num)
if (err || btf_fd < 0)
goto done;
+ if (!test->map_type)
+ goto done;
+
opts.btf_fd = btf_fd;
opts.btf_key_type_id = test->key_type_id;
opts.btf_value_type_id = test->value_type_id;
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index c2e886399e3c..ecf89df78109 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -27,7 +27,7 @@ static void verify_success(const char *prog_name)
struct bpf_program *prog;
struct bpf_link *link = NULL;
pid_t child_pid;
- int status;
+ int status, err;
skel = cpumask_success__open();
if (!ASSERT_OK_PTR(skel, "cpumask_success__open"))
@@ -36,8 +36,8 @@ static void verify_success(const char *prog_name)
skel->bss->pid = getpid();
skel->bss->nr_cpus = libbpf_num_possible_cpus();
- cpumask_success__load(skel);
- if (!ASSERT_OK_PTR(skel, "cpumask_success__load"))
+ err = cpumask_success__load(skel);
+ if (!ASSERT_OK(err, "cpumask_success__load"))
goto cleanup;
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index 4951aa978f33..3b7c57fe55a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -626,50 +626,6 @@ err:
return false;
}
-/* Request BPF program instructions after all rewrites are applied,
- * e.g. verifier.c:convert_ctx_access() is done.
- */
-static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 xlated_prog_len;
- __u32 buf_element_size = sizeof(struct bpf_insn);
-
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("bpf_prog_get_info_by_fd failed");
- return -1;
- }
-
- xlated_prog_len = info.xlated_prog_len;
- if (xlated_prog_len % buf_element_size) {
- printf("Program length %d is not multiple of %d\n",
- xlated_prog_len, buf_element_size);
- return -1;
- }
-
- *cnt = xlated_prog_len / buf_element_size;
- *buf = calloc(*cnt, buf_element_size);
- if (!buf) {
- perror("can't allocate xlated program buffer");
- return -ENOMEM;
- }
-
- bzero(&info, sizeof(info));
- info.xlated_prog_len = xlated_prog_len;
- info.xlated_prog_insns = (__u64)(unsigned long)*buf;
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("second bpf_prog_get_info_by_fd failed");
- goto out_free_buf;
- }
-
- return 0;
-
-out_free_buf:
- free(*buf);
- return -1;
-}
-
static void print_insn(void *private_data, const char *fmt, ...)
{
va_list args;
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
index 5c0ebe6ba866..dcb9e5070cc3 100644
--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -72,6 +72,6 @@ fail:
bpf_tc_hook_destroy(&qdisc_hook);
close_netns(nstoken);
}
- SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST);
decap_sanity__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 4ad4cd69152e..3379df2d4cf2 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -298,6 +298,6 @@ void test_fib_lookup(void)
fail:
if (nstoken)
close_netns(nstoken);
- SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST);
fib_lookup__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index d4b1901f7879..f3932941bbaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -19,6 +19,7 @@ static const char *kmulti_syms[] = {
};
#define KMULTI_CNT ARRAY_SIZE(kmulti_syms)
static __u64 kmulti_addrs[KMULTI_CNT];
+static __u64 kmulti_cookies[] = { 3, 1, 2 };
#define KPROBE_FUNC "bpf_fentry_test1"
static __u64 kprobe_addr;
@@ -31,6 +32,8 @@ static noinline void uprobe_func(void)
asm volatile ("");
}
+#define PERF_EVENT_COOKIE 0xdeadbeef
+
static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
ssize_t offset, ssize_t entry_offset)
{
@@ -62,6 +65,8 @@ again:
ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset,
"kprobe_addr");
+ ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie");
+
if (!info.perf_event.kprobe.func_name) {
ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len");
info.perf_event.kprobe.func_name = ptr_to_u64(&buf);
@@ -81,6 +86,8 @@ again:
goto again;
}
+ ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie");
+
err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME,
strlen(TP_NAME));
ASSERT_EQ(err, 0, "cmp_tp_name");
@@ -96,10 +103,17 @@ again:
goto again;
}
+ ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie");
+
err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE,
strlen(UPROBE_FILE));
ASSERT_EQ(err, 0, "cmp_file_name");
break;
+ case BPF_PERF_EVENT_EVENT:
+ ASSERT_EQ(info.perf_event.event.type, PERF_TYPE_SOFTWARE, "event_type");
+ ASSERT_EQ(info.perf_event.event.config, PERF_COUNT_SW_PAGE_FAULTS, "event_config");
+ ASSERT_EQ(info.perf_event.event.cookie, PERF_EVENT_COOKIE, "event_cookie");
+ break;
default:
err = -1;
break;
@@ -139,6 +153,7 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
.attach_mode = PROBE_ATTACH_MODE_LINK,
.retprobe = type == BPF_PERF_EVENT_KRETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
);
ssize_t entry_offset = 0;
struct bpf_link *link;
@@ -163,10 +178,13 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
static void test_tp_fill_link_info(struct test_fill_link_info *skel)
{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
struct bpf_link *link;
int link_fd, err;
- link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME);
+ link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts);
if (!ASSERT_OK_PTR(link, "attach_tp"))
return;
@@ -176,16 +194,53 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel)
bpf_link__destroy(link);
}
+static void test_event_fill_link_info(struct test_fill_link_info *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err, pfd;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_PAGE_FAULTS,
+ .freq = 1,
+ .sample_freq = 1,
+ .size = sizeof(struct perf_event_attr),
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */,
+ -1 /* group id */, 0 /* flags */);
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ return;
+
+ link = bpf_program__attach_perf_event_opts(skel->progs.event_run, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_event"))
+ goto error;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_EVENT, 0, 0, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+
+error:
+ close(pfd);
+}
+
static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
enum bpf_perf_event_type type)
{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = type == BPF_PERF_EVENT_URETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
struct bpf_link *link;
int link_fd, err;
- link = bpf_program__attach_uprobe(skel->progs.uprobe_run,
- type == BPF_PERF_EVENT_URETPROBE,
- 0, /* self pid */
- UPROBE_FILE, uprobe_offset);
+ link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
+ 0, /* self pid */
+ UPROBE_FILE, uprobe_offset,
+ &opts);
if (!ASSERT_OK_PTR(link, "attach_uprobe"))
return;
@@ -195,11 +250,11 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
bpf_link__destroy(link);
}
-static int verify_kmulti_link_info(int fd, bool retprobe)
+static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
struct bpf_link_info info;
__u32 len = sizeof(info);
- __u64 addrs[KMULTI_CNT];
int flags, i, err;
memset(&info, 0, sizeof(info));
@@ -221,18 +276,22 @@ again:
if (!info.kprobe_multi.addrs) {
info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
goto again;
}
- for (i = 0; i < KMULTI_CNT; i++)
+ for (i = 0; i < KMULTI_CNT; i++) {
ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+ ASSERT_EQ(cookies[i], has_cookies ? kmulti_cookies[i] : 0,
+ "kmulti_cookies_value");
+ }
return 0;
}
static void verify_kmulti_invalid_user_buffer(int fd)
{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
struct bpf_link_info info;
__u32 len = sizeof(info);
- __u64 addrs[KMULTI_CNT];
int err, i;
memset(&info, 0, sizeof(info));
@@ -266,7 +325,20 @@ static void verify_kmulti_invalid_user_buffer(int fd)
info.kprobe_multi.count = KMULTI_CNT;
info.kprobe_multi.addrs = 0x1; /* invalid addr */
err = bpf_link_get_info_by_fd(fd, &info, &len);
- ASSERT_EQ(err, -EFAULT, "invalid_buff");
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_addrs");
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = 0x1; /* invalid addr */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_cookies");
+
+ /* cookies && !count */
+ info.kprobe_multi.count = 0;
+ info.kprobe_multi.addrs = ptr_to_u64(NULL);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_cookies_count");
}
static int symbols_cmp_r(const void *a, const void *b)
@@ -278,13 +350,15 @@ static int symbols_cmp_r(const void *a, const void *b)
}
static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
- bool retprobe, bool invalid)
+ bool retprobe, bool cookies,
+ bool invalid)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
struct bpf_link *link;
int link_fd, err;
opts.syms = kmulti_syms;
+ opts.cookies = cookies ? kmulti_cookies : NULL;
opts.cnt = KMULTI_CNT;
opts.retprobe = retprobe;
link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts);
@@ -293,7 +367,7 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
link_fd = bpf_link__fd(link);
if (!invalid) {
- err = verify_kmulti_link_info(link_fd, retprobe);
+ err = verify_kmulti_link_info(link_fd, retprobe, cookies);
ASSERT_OK(err, "verify_kmulti_link_info");
} else {
verify_kmulti_invalid_user_buffer(link_fd);
@@ -513,6 +587,8 @@ void test_fill_link_info(void)
test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true);
if (test__start_subtest("tracepoint_link_info"))
test_tp_fill_link_info(skel);
+ if (test__start_subtest("event_link_info"))
+ test_event_fill_link_info(skel);
uprobe_offset = get_uprobe_offset(&uprobe_func);
if (test__start_subtest("uprobe_link_info"))
@@ -523,12 +599,16 @@ void test_fill_link_info(void)
qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r);
for (i = 0; i < KMULTI_CNT; i++)
kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]);
- if (test__start_subtest("kprobe_multi_link_info"))
- test_kprobe_multi_fill_link_info(skel, false, false);
- if (test__start_subtest("kretprobe_multi_link_info"))
- test_kprobe_multi_fill_link_info(skel, true, false);
+ if (test__start_subtest("kprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, false, false, false);
+ test_kprobe_multi_fill_link_info(skel, false, true, false);
+ }
+ if (test__start_subtest("kretprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, true, false, false);
+ test_kprobe_multi_fill_link_info(skel, true, true, false);
+ }
if (test__start_subtest("kprobe_multi_invalid_ubuff"))
- test_kprobe_multi_fill_link_info(skel, true, true);
+ test_kprobe_multi_fill_link_info(skel, true, true, true);
if (test__start_subtest("uprobe_multi_link_info"))
test_uprobe_multi_fill_link_info(skel, false, false);
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
index 57c814f5f6a7..8dd2af9081f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -59,9 +59,9 @@ static int setup_topology(bool ipv6)
/* Wait for up to 5s for links to come up */
for (i = 0; i < 5; ++i) {
if (ipv6)
- up = !system("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6 " &>/dev/null");
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6);
else
- up = !system("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR " &>/dev/null");
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR);
if (up)
break;
diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
new file mode 100644
index 000000000000..7def158da9eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "linux/filter.h"
+#include "kptr_xchg_inline.skel.h"
+
+void test_kptr_xchg_inline(void)
+{
+ struct kptr_xchg_inline *skel;
+ struct bpf_insn *insn = NULL;
+ struct bpf_insn exp;
+ unsigned int cnt;
+ int err;
+
+#if !(defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64))
+ test__skip();
+ return;
+#endif
+
+ skel = kptr_xchg_inline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = get_xlated_program(bpf_program__fd(skel->progs.kptr_xchg_inline), &insn, &cnt);
+ if (!ASSERT_OK(err, "prog insn"))
+ goto out;
+
+ /* The original instructions are:
+ * r1 = map[id:xxx][0]+0
+ * r2 = 0
+ * call bpf_kptr_xchg#yyy
+ *
+ * call bpf_kptr_xchg#yyy will be inlined as:
+ * r0 = r2
+ * r0 = atomic64_xchg((u64 *)(r1 +0), r0)
+ */
+ if (!ASSERT_GT(cnt, 5, "insn cnt"))
+ goto out;
+
+ exp = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+ if (!ASSERT_OK(memcmp(&insn[3], &exp, sizeof(exp)), "mov"))
+ goto out;
+
+ exp = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+ if (!ASSERT_OK(memcmp(&insn[4], &exp, sizeof(exp)), "xchg"))
+ goto out;
+out:
+ free(insn);
+ kptr_xchg_inline__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
index 9f766ddd946a..4ed46ed58a7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void)
if (prog_type == BPF_PROG_TYPE_UNSPEC)
continue;
+ if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
+ continue;
if (!test__start_subtest(prog_type_name))
continue;
@@ -68,6 +70,8 @@ void test_libbpf_probe_map_types(void)
if (map_type == BPF_MAP_TYPE_UNSPEC)
continue;
+ if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
+ continue;
if (!test__start_subtest(map_type_name))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index eb34d612d6f8..62ea855ec4d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void)
const char *map_type_str;
char buf[256];
+ if (map_type == __MAX_BPF_MAP_TYPE)
+ continue;
+
map_type_name = btf__str_by_offset(btf, e->name_off);
map_type_str = libbpf_bpf_map_type_str(map_type);
ASSERT_OK_PTR(map_type_str, map_type_name);
@@ -186,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void)
const char *prog_type_str;
char buf[256];
+ if (prog_type == __MAX_BPF_PROG_TYPE)
+ continue;
+
prog_type_name = btf__str_by_offset(btf, e->name_off);
prog_type_str = libbpf_bpf_prog_type_str(prog_type);
ASSERT_OK_PTR(prog_type_str, prog_type_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index 7a3fa2ff567b..90a98e23be61 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -169,9 +169,9 @@ void test_log_fixup(void)
if (test__start_subtest("bad_core_relo_trunc_none"))
bad_core_relo(0, TRUNC_NONE /* full buf */);
if (test__start_subtest("bad_core_relo_trunc_partial"))
- bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */);
+ bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
if (test__start_subtest("bad_core_relo_trunc_full"))
- bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */);
+ bad_core_relo(240, TRUNC_FULL /* truncate also libbpf's message patch */);
if (test__start_subtest("bad_core_relo_subprog"))
bad_core_relo_subprog();
if (test__start_subtest("missing_map"))
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index e9190574e79f..fb1eb8c67361 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -27,8 +27,6 @@
} \
})
-#define NETNS "ns_lwt"
-
static inline int netns_create(void)
{
return system("ip netns add " NETNS);
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
index 59b38569f310..835a1d756c16 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
@@ -54,6 +54,7 @@
#include <stdbool.h>
#include <stdlib.h>
+#define NETNS "ns_lwt_redirect"
#include "lwt_helpers.h"
#include "test_progs.h"
#include "network_helpers.h"
@@ -85,7 +86,7 @@ static void ping_dev(const char *dev, bool is_ingress)
snprintf(ip, sizeof(ip), "20.0.0.%d", link_index);
/* We won't get a reply. Don't fail here */
- SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1",
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
ip, ICMP_PAYLOAD_SIZE);
}
@@ -203,6 +204,7 @@ static int setup_redirect_target(const char *target_dev, bool need_mac)
if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
goto fail;
+ SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1");
SYS(fail, "ip link add link_err type dummy");
SYS(fail, "ip link set lo up");
SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
index f4bb2d5fcae0..03825d2b45a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
@@ -48,6 +48,7 @@
* For case 2, force UDP packets to overflow fq limit. As long as kernel
* is not crashed, it is considered successful.
*/
+#define NETNS "ns_lwt_reroute"
#include "lwt_helpers.h"
#include "network_helpers.h"
#include <linux/net_tstamp.h>
@@ -63,7 +64,7 @@
static void ping_once(const char *ip)
{
/* We won't get a reply. Don't fail here */
- SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1",
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
ip, ICMP_PAYLOAD_SIZE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 7c0be7cf550b..8f8d792307c1 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -79,7 +79,7 @@ static void cleanup_netns(struct nstoken *nstoken)
if (nstoken)
close_netns(nstoken);
- SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST);
+ SYS_NOFAIL("ip netns del %s", NS_TEST);
}
static int verify_tsk(int map_fd, int client_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index 3f1f58d3a729..a1f7e7378a64 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -29,6 +29,10 @@ static void test_success(void)
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_lock, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_unlock, true);
err = rcu_read_lock__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
@@ -75,6 +79,8 @@ static const char * const inproper_region_tests[] = {
"inproper_sleepable_helper",
"inproper_sleepable_kfunc",
"nested_rcu_region",
+ "rcu_read_lock_global_subprog_lock",
+ "rcu_read_lock_global_subprog_unlock",
};
static void test_inproper_region(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 820d0bcfc474..eb74363f9f70 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -840,7 +840,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op,
.log_level = 2,
.log_buf = log_buf,
.log_size = log_sz,
- .prog_flags = BPF_F_TEST_REG_INVARIANTS,
+ .prog_flags = testing_prog_flags(),
);
/* ; skip exit block below
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
index b0583309a94e..9c11938fe597 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
@@ -214,7 +214,7 @@ void test_sock_destroy(void)
cleanup:
if (nstoken)
close_netns(nstoken);
- SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ SYS_NOFAIL("ip netns del " TEST_NS);
if (cgroup_fd >= 0)
close(cgroup_fd);
sock_destroy_prog__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index 0c365f36c73b..d56e18b25528 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -112,7 +112,7 @@ void test_sock_iter_batch(void)
{
struct nstoken *nstoken = NULL;
- SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ SYS_NOFAIL("ip netns del " TEST_NS);
SYS(done, "ip netns add %s", TEST_NS);
SYS(done, "ip -net %s link set dev lo up", TEST_NS);
@@ -131,5 +131,5 @@ void test_sock_iter_batch(void)
close_netns(nstoken);
done:
- SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ SYS_NOFAIL("ip netns del " TEST_NS);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 18d451be57c8..2b0068742ef9 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -48,6 +48,8 @@ static struct {
{ "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" },
{ "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" },
{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
+ { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
};
static int match_regex(const char *pattern, const char *string)
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
new file mode 100644
index 000000000000..a5cc593c1e1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_autocreate.skel.h"
+#include "struct_ops_autocreate2.skel.h"
+
+static void cant_load_full_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ char *log = NULL;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+ /* The testmod_2 map BTF type (struct bpf_testmod_ops___v2) doesn't
+ * match the BTF of the actual struct bpf_testmod_ops defined in the
+ * kernel, so we should fail to load it if we don't disable autocreate
+ * for that map.
+ */
+ err = struct_ops_autocreate__load(skel);
+ log = stop_libbpf_log_capture();
+ if (!ASSERT_ERR(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log, "libbpf: struct_ops init_kern", "init_kern message");
+ ASSERT_EQ(err, -ENOTSUP, "errno should be ENOTSUP");
+
+cleanup:
+ free(log);
+ struct_ops_autocreate__destroy(skel);
+}
+
+static int check_test_1_link(struct struct_ops_autocreate *skel, struct bpf_map *map)
+{
+ struct bpf_link *link;
+ int err;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ return -1;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+ bpf_link__destroy(link);
+ return err;
+}
+
+static void can_load_partial_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 default autoload");
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_2), "test_2 default autoload");
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_2), "test_2 actual autoload");
+
+ check_test_1_link(skel, skel->maps.testmod_1);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+static void optional_maps(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_1), "testmod_1 autocreate");
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_2), "testmod_2 autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map), "optional_map autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map2), "optional_map2 autocreate");
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_1, false);
+ err |= bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ err |= bpf_map__set_autocreate(skel->maps.optional_map2, true);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ check_test_1_link(skel, skel->maps.optional_map2);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+/* Swap test_mod1->test_1 program from 'bar' to 'foo' using shadow vars.
+ * test_mod1 load should enable autoload for 'foo'.
+ */
+static void autoload_and_shadow_vars(void)
+{
+ struct struct_ops_autocreate2 *skel = NULL;
+ struct bpf_link *link = NULL;
+ int err;
+
+ skel = struct_ops_autocreate2__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.foo), "foo default autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar default autoload");
+
+ /* loading map testmod_1 would switch foo's autoload to true */
+ skel->struct_ops.testmod_1->test_1 = skel->progs.foo;
+
+ err = struct_ops_autocreate2__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar actual autoload");
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto cleanup;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ struct_ops_autocreate2__destroy(skel);
+}
+
+void test_struct_ops_autocreate(void)
+{
+ if (test__start_subtest("cant_load_full_object"))
+ cant_load_full_object();
+ if (test__start_subtest("can_load_partial_object"))
+ can_load_partial_object();
+ if (test__start_subtest("autoload_and_shadow_vars"))
+ autoload_and_shadow_vars();
+ if (test__start_subtest("optional_maps"))
+ optional_maps();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index ea8537c54413..c33c05161a9e 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -117,12 +117,6 @@ static void test_recursion(void)
ASSERT_OK(err, "lookup map_b");
ASSERT_EQ(value, 100, "map_b value");
- prog_fd = bpf_program__fd(skel->progs.on_lookup);
- memset(&info, 0, sizeof(info));
- err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
- ASSERT_OK(err, "get prog info");
- ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion");
-
prog_fd = bpf_program__fd(skel->progs.on_update);
memset(&info, 0, sizeof(info));
err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 518f143c5b0f..dbe06aeaa2b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -188,6 +188,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+ char src_addr[IFADDR_STR_LEN + 1] = {};
int err;
if (result->dev_mode == MODE_VETH) {
@@ -208,6 +209,9 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (get_ifaddr("src_fwd", src_fwd_addr))
goto fail;
+ if (get_ifaddr("src", src_addr))
+ goto fail;
+
result->ifindex_src = if_nametoindex("src");
if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
goto fail;
@@ -270,6 +274,13 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
+ SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
+ }
+
close_netns(nstoken);
/** setup in 'dst' namespace */
@@ -280,6 +291,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
SYS(fail, "ip link set dev dst up");
+ SYS(fail, "ip link set dev lo up");
SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
@@ -457,7 +469,7 @@ static int set_forwarding(bool enable)
return 0;
}
-static void rcv_tstamp(int fd, const char *expected, size_t s)
+static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
{
struct __kernel_timespec pkt_ts = {};
char ctl[CMSG_SPACE(sizeof(pkt_ts))];
@@ -478,7 +490,7 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
ret = recvmsg(fd, &msg, 0);
if (!ASSERT_EQ(ret, s, "recvmsg"))
- return;
+ return -1;
ASSERT_STRNEQ(data, expected, s, "expected rcv data");
cmsg = CMSG_FIRSTHDR(&msg);
@@ -487,6 +499,12 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
+ if (tstamp) {
+ /* caller will check the tstamp itself */
+ *tstamp = pkt_ns;
+ return 0;
+ }
+
ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
ret = clock_gettime(CLOCK_REALTIME, &now_ts);
@@ -496,6 +514,60 @@ static void rcv_tstamp(int fd, const char *expected, size_t s)
if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
"check rcv tstamp");
+ return 0;
+}
+
+static void rcv_tstamp(int fd, const char *expected, size_t s)
+{
+ __rcv_tstamp(fd, expected, s, NULL);
+}
+
+static int wait_netstamp_needed_key(void)
+{
+ int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+ __u64 tstamp = 0;
+
+ nstoken = open_netns(NS_DST);
+ if (!nstoken)
+ return -1;
+
+ srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto done;
+
+ err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
+ goto done;
+
+again:
+ n = write(cli_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
+ if (!ASSERT_OK(err, "__rcv_tstamp"))
+ goto done;
+ if (!tstamp && nretries++ < 5) {
+ sleep(1);
+ printf("netstamp_needed_key retry#%d\n", nretries);
+ goto again;
+ }
+
+done:
+ if (!tstamp && srv_fd != -1) {
+ close(srv_fd);
+ srv_fd = -1;
+ }
+ if (cli_fd != -1)
+ close(cli_fd);
+ close_netns(nstoken);
+ return srv_fd;
}
static void snd_tstamp(int fd, char *b, size_t s)
@@ -832,11 +904,20 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
{
struct test_tc_dtime *skel;
struct nstoken *nstoken;
- int err;
+ int hold_tstamp_fd, err;
+
+ /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
+ * is no delay in the kernel net_enable_timestamp().
+ * This ensures the following tests must have
+ * non zero rcv tstamp in the recvmsg().
+ */
+ hold_tstamp_fd = wait_netstamp_needed_key();
+ if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
+ return;
skel = test_tc_dtime__open();
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
@@ -881,6 +962,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
done:
test_tc_dtime__destroy(skel);
+ close(hold_tstamp_fd);
}
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
new file mode 100644
index 000000000000..eaf441dc7e79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_custom_syncookie.skel.h"
+
+static struct test_tcp_custom_syncookie_case {
+ int family, type;
+ char addr[16];
+ char name[10];
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .addr = "127.0.0.1",
+ },
+ {
+ .name = "IPv6 TCP",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .addr = "::1",
+ },
+};
+
+static int setup_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "ip"))
+ goto err;
+
+ if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"),
+ "write_sysctl"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int setup_tc(struct test_tcp_custom_syncookie *skel)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie));
+
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+#define msg "Hello World"
+#define msglen 11
+
+static void transfer_message(int sender, int receiver)
+{
+ char buf[msglen];
+ int ret;
+
+ ret = send(sender, msg, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "send"))
+ return;
+
+ memset(buf, 0, sizeof(buf));
+
+ ret = recv(receiver, buf, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "recv"))
+ return;
+
+ ret = strncmp(buf, msg, msglen);
+ if (!ASSERT_EQ(ret, 0, "strncmp"))
+ return;
+}
+
+static void create_connection(struct test_tcp_custom_syncookie_case *test_case)
+{
+ int server, client, child;
+
+ server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0);
+ if (!ASSERT_NEQ(server, -1, "start_server"))
+ return;
+
+ client = connect_to_fd(server, 0);
+ if (!ASSERT_NEQ(client, -1, "connect_to_fd"))
+ goto close_server;
+
+ child = accept(server, NULL, 0);
+ if (!ASSERT_NEQ(child, -1, "accept"))
+ goto close_client;
+
+ transfer_message(client, child);
+ transfer_message(child, client);
+
+ close(child);
+close_client:
+ close(client);
+close_server:
+ close(server);
+}
+
+void test_tcp_custom_syncookie(void)
+{
+ struct test_tcp_custom_syncookie *skel;
+ int i;
+
+ if (setup_netns())
+ return;
+
+ skel = test_tcp_custom_syncookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (setup_tc(skel))
+ goto destroy_skel;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (!test__start_subtest(test_cases[i].name))
+ continue;
+
+ skel->bss->handled_syn = false;
+ skel->bss->handled_ack = false;
+
+ create_connection(&test_cases[i]);
+
+ ASSERT_EQ(skel->bss->handled_syn, true, "SYN is not handled at tc.");
+ ASSERT_EQ(skel->bss->handled_ack, true, "ACK is not handled at tc");
+ }
+
+destroy_skel:
+ system("tc qdisc del dev lo clsact");
+
+ test_tcp_custom_syncookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
new file mode 100644
index 000000000000..01dc2613c8a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_maybe_null.skel.h"
+#include "struct_ops_maybe_null_fail.skel.h"
+
+/* Test that the verifier accepts a program that access a nullable pointer
+ * with a proper check.
+ */
+static void maybe_null(void)
+{
+ struct struct_ops_maybe_null *skel;
+
+ skel = struct_ops_maybe_null__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_and_load"))
+ return;
+
+ struct_ops_maybe_null__destroy(skel);
+}
+
+/* Test that the verifier rejects a program that access a nullable pointer
+ * without a check beforehand.
+ */
+static void maybe_null_fail(void)
+{
+ struct struct_ops_maybe_null_fail *skel;
+
+ skel = struct_ops_maybe_null_fail__open_and_load();
+ if (ASSERT_ERR_PTR(skel, "struct_ops_module_fail__open_and_load"))
+ return;
+
+ struct_ops_maybe_null_fail__destroy(skel);
+}
+
+void test_struct_ops_maybe_null(void)
+{
+ /* The verifier verifies the programs at load time, so testing both
+ * programs in the same compile-unit is complicated. We run them in
+ * separate objects to simplify the testing.
+ */
+ if (test__start_subtest("maybe_null"))
+ maybe_null();
+ if (test__start_subtest("maybe_null_fail"))
+ maybe_null_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
new file mode 100644
index 000000000000..ee5372c7f2c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <time.h>
+
+#include "struct_ops_module.skel.h"
+
+static void check_map_info(struct bpf_map_info *info)
+{
+ struct bpf_btf_info btf_info;
+ char btf_name[256];
+ u32 btf_info_len = sizeof(btf_info);
+ int err, fd;
+
+ fd = bpf_btf_get_fd_by_id(info->btf_vmlinux_id);
+ if (!ASSERT_GE(fd, 0, "get_value_type_btf_obj_fd"))
+ return;
+
+ memset(&btf_info, 0, sizeof(btf_info));
+ btf_info.name = ptr_to_u64(btf_name);
+ btf_info.name_len = sizeof(btf_name);
+ err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_info_len);
+ if (!ASSERT_OK(err, "get_value_type_btf_obj_info"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(strcmp(btf_name, "bpf_testmod"), 0, "get_value_type_btf_obj_name"))
+ goto cleanup;
+
+cleanup:
+ close(fd);
+}
+
+static int attach_ops_and_check(struct struct_ops_module *skel,
+ struct bpf_map *map,
+ int expected_test_2_result)
+{
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(map);
+ ASSERT_OK_PTR(link, "attach_test_mod_1");
+ if (!link)
+ return -1;
+
+ /* test_{1,2}() would be called from bpf_dummy_reg() in bpf_testmod.c */
+ ASSERT_EQ(skel->bss->test_1_result, 0xdeadbeef, "test_1_result");
+ ASSERT_EQ(skel->bss->test_2_result, expected_test_2_result, "test_2_result");
+
+ bpf_link__destroy(link);
+ return 0;
+}
+
+static void test_struct_ops_load(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_map_info info = {};
+ int err;
+ u32 len;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_1->data = 13;
+ skel->struct_ops.testmod_1->test_2 = skel->progs.test_3;
+ /* Since test_2() is not being used, it should be disabled from
+ * auto-loading, or it will fail to load.
+ */
+ bpf_program__set_autoload(skel->progs.test_2, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "struct_ops_module_load"))
+ goto cleanup;
+
+ len = sizeof(info);
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.testmod_1), &info,
+ &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto cleanup;
+
+ check_map_info(&info);
+ /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c
+ *
+ * In bpf_testmod.c it will pass 4 and 13 (the value of data) to
+ * .test_2. So, the value of test_2_result should be 20 (4 + 13 +
+ * 3).
+ */
+ if (!attach_ops_and_check(skel, skel->maps.testmod_1, 20))
+ goto cleanup;
+ if (!attach_ops_and_check(skel, skel->maps.testmod_2, 12))
+ goto cleanup;
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+void serial_test_struct_ops_module(void)
+{
+ if (test__start_subtest("test_struct_ops_load"))
+ test_struct_ops_load();
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
new file mode 100644
index 000000000000..645d32b5160c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_multi_pages.skel.h"
+
+static void do_struct_ops_multi_pages(void)
+{
+ struct struct_ops_multi_pages *skel;
+ struct bpf_link *link;
+
+ /* The size of all trampolines of skel->maps.multi_pages should be
+ * over 1 page (at least for x86).
+ */
+ skel = struct_ops_multi_pages__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.multi_pages);
+ ASSERT_OK_PTR(link, "attach_multi_pages");
+
+ bpf_link__destroy(link);
+ struct_ops_multi_pages__destroy(skel);
+}
+
+void test_struct_ops_multi_pages(void)
+{
+ if (test__start_subtest("multi_pages"))
+ do_struct_ops_multi_pages();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
new file mode 100644
index 000000000000..106ea447965a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <testing_helpers.h>
+
+static void load_bpf_test_no_cfi(void)
+{
+ int fd;
+ int err;
+
+ fd = open("bpf_test_no_cfi.ko", O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open"))
+ return;
+
+ /* The module will try to register a struct_ops type without
+ * cfi_stubs and with cfi_stubs.
+ *
+ * The one without cfi_stub should fail. The module will be loaded
+ * successfully only if the result of the registration is as
+ * expected, or it fails.
+ */
+ err = finit_module(fd, "", 0);
+ close(fd);
+ if (!ASSERT_OK(err, "finit_module"))
+ return;
+
+ err = delete_module("bpf_test_no_cfi", 0);
+ ASSERT_OK(err, "delete_module");
+}
+
+void test_struct_ops_no_cfi(void)
+{
+ if (test__start_subtest("load_bpf_test_no_cfi"))
+ load_bpf_test_no_cfi();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 2b3c6dd66259..5f1fb0a2ea56 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -118,9 +118,9 @@ fail:
static void cleanup(void)
{
SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0");
- SYS_NOFAIL("ip link del veth1 2> /dev/null");
- SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1);
- SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1);
+ SYS_NOFAIL("ip link del veth1");
+ SYS_NOFAIL("ip link del %s", VXLAN_TUNL_DEV1);
+ SYS_NOFAIL("ip link del %s", IP6VXLAN_TUNL_DEV1);
}
static int add_vxlan_tunnel(void)
@@ -265,9 +265,9 @@ fail:
static void delete_ipip_tunnel(void)
{
SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0);
- SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null");
+ SYS_NOFAIL("ip -n at_ns0 fou del port 5555");
SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1);
- SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
+ SYS_NOFAIL("ip fou del port 5555");
}
static int add_xfrm_tunnel(void)
@@ -346,13 +346,13 @@ fail:
static void delete_xfrm_tunnel(void)
{
- SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null",
+ SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32",
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
- SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null",
+ SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32",
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
- SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
- SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
new file mode 100644
index 000000000000..fc4a175d8d76
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -0,0 +1,1052 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "cap_helpers.h"
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include "priv_map.skel.h"
+#include "priv_prog.skel.h"
+#include "dummy_st_ops_success.skel.h"
+#include "token_lsm.skel.h"
+
+static inline int sys_mount(const char *dev_name, const char *dir_name,
+ const char *type, unsigned long flags,
+ const void *data)
+{
+ return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
+}
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fspick(int dfd, const char *path, unsigned flags)
+{
+ return syscall(__NR_fspick, dfd, path, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
+}
+
+static int drop_priv_caps(__u64 *old_caps)
+{
+ return cap_disable_effective((1ULL << CAP_BPF) |
+ (1ULL << CAP_PERFMON) |
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_SYS_ADMIN), old_caps);
+}
+
+static int restore_priv_caps(__u64 old_caps)
+{
+ return cap_enable_effective(old_caps, NULL);
+}
+
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
+{
+ char buf[32];
+ int err;
+
+ if (!mask_str) {
+ if (mask == ~0ULL) {
+ mask_str = "any";
+ } else {
+ snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+ mask_str = buf;
+ }
+ }
+
+ err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
+ mask_str, 0);
+ if (err < 0)
+ err = -errno;
+ return err;
+}
+
+#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
+
+struct bpffs_opts {
+ __u64 cmds;
+ __u64 maps;
+ __u64 progs;
+ __u64 attachs;
+ const char *cmds_str;
+ const char *maps_str;
+ const char *progs_str;
+ const char *attachs_str;
+};
+
+static int create_bpffs_fd(void)
+{
+ int fs_fd;
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ ASSERT_GE(fs_fd, 0, "fs_fd");
+
+ return fs_fd;
+}
+
+static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
+{
+ int mnt_fd, err;
+
+ /* set up token delegation mount options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
+ if (!ASSERT_OK(err, "fs_cfg_cmds"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
+ if (!ASSERT_OK(err, "fs_cfg_maps"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
+ if (!ASSERT_OK(err, "fs_cfg_progs"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
+ if (!ASSERT_OK(err, "fs_cfg_attachs"))
+ return err;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (err < 0)
+ return -errno;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (err < 0)
+ return -errno;
+
+ return mnt_fd;
+}
+
+/* send FD over Unix domain (AF_UNIX) socket */
+static int sendfd(int sockfd, int fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1] = { fd }, err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
+ err = sendmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "sendmsg"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* receive FD over Unix domain (AF_UNIX) socket */
+static int recvfd(int sockfd, int *fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1], err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+
+ err = recvmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "recvmsg"))
+ return -EINVAL;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
+ !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
+ !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
+ !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
+ return -EINVAL;
+
+ memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
+ *fd = fds[0];
+
+ return 0;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
+
+static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
+{
+ int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
+ struct token_lsm *lsm_skel = NULL;
+
+ /* load and attach LSM "policy" before we go into unpriv userns */
+ lsm_skel = token_lsm__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ lsm_skel->bss->my_pid = getpid();
+ err = token_lsm__attach(lsm_skel);
+ if (!ASSERT_OK(err, "lsm_skel_attach"))
+ goto cleanup;
+
+ /* setup userns with root mappings */
+ err = create_and_enter_userns();
+ if (!ASSERT_OK(err, "create_and_enter_userns"))
+ goto cleanup;
+
+ /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "create_mountns"))
+ goto cleanup;
+
+ err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+ if (!ASSERT_OK(err, "remount_root"))
+ goto cleanup;
+
+ fs_fd = create_bpffs_fd();
+ if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, fs_fd);
+ if (!ASSERT_OK(err, "send_fs_fd"))
+ goto cleanup;
+ zclose(fs_fd);
+
+ /* avoid mucking around with mount namespaces and mounting at
+ * well-known path, just get detach-mounted BPF FS fd back from parent
+ */
+ err = recvfd(sock_fd, &mnt_fd);
+ if (!ASSERT_OK(err, "recv_mnt_fd"))
+ goto cleanup;
+
+ /* try to fspick() BPF FS and try to add some delegation options */
+ fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
+ if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot reconfigure to set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
+ if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* create BPF token FD and pass it to parent for some extra checks */
+ token_fd = bpf_token_create(bpffs_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = sendfd(sock_fd, token_fd);
+ if (!ASSERT_OK(err, "send_token_fd"))
+ goto cleanup;
+ zclose(token_fd);
+
+ /* do custom test logic with customly set up BPF FS instance */
+ err = callback(bpffs_fd, lsm_skel);
+ if (!ASSERT_OK(err, "test_callback"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ zclose(sock_fd);
+ zclose(mnt_fd);
+ zclose(fs_fd);
+ zclose(bpffs_fd);
+ zclose(token_fd);
+
+ lsm_skel->bss->my_pid = 0;
+ token_lsm__destroy(lsm_skel);
+
+ exit(-err);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
+{
+ int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
+
+ err = recvfd(sock_fd, &fs_fd);
+ if (!ASSERT_OK(err, "recv_bpffs_fd"))
+ goto cleanup;
+
+ mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, mnt_fd);
+ if (!ASSERT_OK(err, "send_mnt_fd"))
+ goto cleanup;
+ zclose(mnt_fd);
+
+ /* receive BPF token FD back from child for some extra tests */
+ err = recvfd(sock_fd, &token_fd);
+ if (!ASSERT_OK(err, "recv_token_fd"))
+ goto cleanup;
+
+ err = wait_for_pid(child_pid);
+ ASSERT_OK(err, "waitpid_child");
+
+cleanup:
+ zclose(sock_fd);
+ zclose(fs_fd);
+ zclose(mnt_fd);
+ zclose(token_fd);
+
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static void subtest_userns(struct bpffs_opts *bpffs_opts,
+ child_callback_fn child_cb)
+{
+ int sock_fds[2] = { -1, -1 };
+ int child_pid = 0, err;
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
+ if (!ASSERT_OK(err, "socketpair"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GE(child_pid, 0, "fork"))
+ goto cleanup;
+
+ if (child_pid == 0) {
+ zclose(sock_fds[0]);
+ return child(sock_fds[1], bpffs_opts, child_cb);
+
+ } else {
+ zclose(sock_fds[1]);
+ return parent(child_pid, bpffs_opts, sock_fds[0]);
+ }
+
+cleanup:
+ zclose(sock_fds[0]);
+ zclose(sock_fds[1]);
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+ int err, token_fd = -1, map_fd = -1;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no token, no CAP_BPF -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* token without CAP_BPF -> fail */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* CAP_BPF without token -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* finally, namespaced CAP_BPF + token -> success */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+cleanup:
+ zclose(token_fd);
+ zclose(map_fd);
+ return err;
+}
+
+static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
+ int err, token_fd = -1, btf_fd = -1;
+ const void *raw_btf_data;
+ struct btf *btf = NULL;
+ __u32 raw_btf_size;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* setup a trivial BTF data to load to the kernel */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ goto cleanup;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
+ goto cleanup;
+
+ /* no token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = 0;
+ btf_opts.token_fd = 0;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* token + CAP_BPF -> success */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ btf__free(btf);
+ zclose(btf_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
+ int err, token_fd = -1, prog_fd = -1;
+ struct bpf_insn insns[] = {
+ /* bpf_jiffies64() requires CAP_BPF */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ /* bpf_get_current_task() requires CAP_PERFMON */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
+ /* r0 = 0; exit; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* validate we can successfully load BPF program with token; this
+ * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
+ * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
+ * BPF token wired properly in a bunch of places in the kernel
+ */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_opts.expected_attach_type = BPF_XDP;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no token + caps -> failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no caps + token -> failure */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no caps + no token -> definitely a failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = 0;
+cleanup:
+ zclose(prog_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_map *skel;
+ int err;
+
+ skel = priv_map__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_map__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_map__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = priv_map__load(skel);
+ priv_map__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_prog *skel;
+ int err;
+
+ skel = priv_prog__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_prog__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_capable() with LSM */
+ lsm_skel->bss->reject_capable = true;
+ lsm_skel->bss->reject_cmd = false;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_cmd() with LSM */
+ lsm_skel->bss->reject_capable = false;
+ lsm_skel->bss->reject_cmd = true;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
+ * which should cause struct_ops application to fail, as BTF won't be uploaded
+ * into the kernel, even if STRUCT_OPS programs themselves are allowed
+ */
+static int validate_struct_ops_load(int mnt_fd, bool expect_success)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (expect_success) {
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+ } else /* expect failure */ {
+ if (!ASSERT_ERR(err, "obj_token_path_load"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, false /* should fail */);
+}
+
+static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, true /* should succeed */);
+}
+
+#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
+#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
+
+static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
+ * token automatically and implicitly
+ */
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ return -EINVAL;
+
+ /* disable implicit BPF token creation by setting
+ * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
+ */
+ err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ return -EINVAL;
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
+ unsetenv(TOKEN_ENVVAR);
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+ unsetenv(TOKEN_ENVVAR);
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from /sys/fs/bpf mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ return -EINVAL;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, should fail */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over custom location, so libbpf can't create
+ * BPF token implicitly, unless pointed to it through
+ * LIBBPF_BPF_TOKEN_PATH envvar
+ */
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
+ goto err_out;
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ goto err_out;
+
+ /* even though we have BPF FS with delegation, it's not at default
+ * /sys/fs/bpf location, so we still fail to load until envvar is set up
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
+ dummy_st_ops_success__destroy(skel);
+ goto err_out;
+ }
+
+ err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ goto err_out;
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from custom mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ goto err_out;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, envvar
+ * will be ignored, should fail
+ */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ goto err_out;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ goto err_out;
+
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return 0;
+err_out:
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return -EINVAL;
+}
+
+#define bit(n) (1ULL << (n))
+
+void test_token(void)
+{
+ if (test__start_subtest("map_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "map_create",
+ .maps_str = "stack",
+ };
+
+ subtest_userns(&opts, userns_map_create);
+ }
+ if (test__start_subtest("btf_token")) {
+ struct bpffs_opts opts = {
+ .cmds = 1ULL << BPF_BTF_LOAD,
+ };
+
+ subtest_userns(&opts, userns_btf_load);
+ }
+ if (test__start_subtest("prog_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "PROG_LOAD",
+ .progs_str = "XDP",
+ .attachs_str = "xdp",
+ };
+
+ subtest_userns(&opts, userns_prog_load);
+ }
+ if (test__start_subtest("obj_priv_map")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .maps = bit(BPF_MAP_TYPE_QUEUE),
+ };
+
+ subtest_userns(&opts, userns_obj_priv_map);
+ }
+ if (test__start_subtest("obj_priv_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_PROG_LOAD),
+ .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_prog);
+ }
+ if (test__start_subtest("obj_priv_btf_fail")) {
+ struct bpffs_opts opts = {
+ /* disallow BTF loading */
+ .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_fail);
+ }
+ if (test__start_subtest("obj_priv_btf_success")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_success);
+ }
+ if (test__start_subtest("obj_priv_implicit_token")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token);
+ }
+ if (test__start_subtest("obj_priv_implicit_token_envvar")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
new file mode 100644
index 000000000000..a222df765bc3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "tracing_failure.skel.h"
+
+static void test_bpf_spin_lock(bool is_spin_lock)
+{
+ struct tracing_failure *skel;
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ if (is_spin_lock)
+ bpf_program__set_autoload(skel->progs.test_spin_lock, true);
+ else
+ bpf_program__set_autoload(skel->progs.test_spin_unlock, true);
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_OK(err, "tracing_failure__load"))
+ goto out;
+
+ err = tracing_failure__attach(skel);
+ ASSERT_ERR(err, "tracing_failure__attach");
+
+out:
+ tracing_failure__destroy(skel);
+}
+
+void test_tracing_failure(void)
+{
+ if (test__start_subtest("bpf_spin_lock"))
+ test_bpf_spin_lock(true);
+ if (test__start_subtest("bpf_spin_unlock"))
+ test_bpf_spin_lock(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index d62c5bf00e71..985273832f89 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -4,6 +4,7 @@
#include "cap_helpers.h"
#include "verifier_and.skel.h"
+#include "verifier_arena.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
@@ -28,6 +29,7 @@
#include "verifier_div0.skel.h"
#include "verifier_div_overflow.skel.h"
#include "verifier_global_subprogs.skel.h"
+#include "verifier_global_ptr_args.skel.h"
#include "verifier_gotol.skel.h"
#include "verifier_helper_access_var_len.skel.h"
#include "verifier_helper_packet_access.skel.h"
@@ -117,6 +119,7 @@ static void run_tests_aux(const char *skel_name,
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
void test_verifier_and(void) { RUN(verifier_and); }
+void test_verifier_arena(void) { RUN(verifier_arena); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
@@ -140,6 +143,7 @@ void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_st
void test_verifier_div0(void) { RUN(verifier_div0); }
void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
+void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
void test_verifier_gotol(void) { RUN(verifier_gotol); }
void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
index f3927829a55a..4599154c8e9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdpwall.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
@@ -9,7 +9,7 @@ void test_xdpwall(void)
struct xdpwall *skel;
skel = xdpwall__open_and_load();
- ASSERT_OK_PTR(skel, "Does LLMV have https://reviews.llvm.org/D109073?");
+ ASSERT_OK_PTR(skel, "Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5?");
xdpwall__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
new file mode 100644
index 000000000000..b7bb712cacfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_htab.h"
+
+void __arena *htab_for_user;
+bool skip = false;
+
+int zero = 0;
+
+SEC("syscall")
+int arena_htab_llvm(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+ struct htab __arena *htab;
+ __u64 i;
+
+ htab = bpf_alloc(sizeof(*htab));
+ cast_kern(htab);
+ htab_init(htab);
+
+ /* first run. No old elems in the table */
+ for (i = zero; i < 1000; i++)
+ htab_update_elem(htab, i, i);
+
+ /* should replace all elems with new ones */
+ for (i = zero; i < 1000; i++)
+ htab_update_elem(htab, i, i);
+ cast_user(htab);
+ htab_for_user = htab;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab_asm.c b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
new file mode 100644
index 000000000000..6cd70ea12f0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_ARENA_FORCE_ASM
+#define arena_htab_llvm arena_htab_asm
+#include "arena_htab.c"
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
new file mode 100644
index 000000000000..cd35b8448435
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+struct arena_list_head __arena *list_head;
+int list_sum;
+int cnt;
+bool skip = false;
+
+#ifdef __BPF_FEATURE_ARENA_CAST
+long __arena arena_sum;
+int __arena test_val = 1;
+struct arena_list_head __arena global_head;
+#else
+long arena_sum SEC(".arena.1");
+int test_val SEC(".arena.1");
+#endif
+
+int zero;
+
+SEC("syscall")
+int arena_list_add(void *ctx)
+{
+#ifdef __BPF_FEATURE_ARENA_CAST
+ __u64 i;
+
+ list_head = &global_head;
+
+ for (i = zero; i < cnt; cond_break, i++) {
+ struct elem __arena *n = bpf_alloc(sizeof(*n));
+
+ test_val++;
+ n->value = i;
+ arena_sum += i;
+ list_add_head(&n->node, list_head);
+ }
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+int arena_list_del(void *ctx)
+{
+#ifdef __BPF_FEATURE_ARENA_CAST
+ struct elem __arena *n;
+ int sum = 0;
+
+ arena_sum = 0;
+ list_for_each_entry(n, list_head, node) {
+ sum += n->value;
+ arena_sum += n->value;
+ list_del(&n->node);
+ bpf_free(n);
+ }
+ list_sum = sum;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
index 3517c0e01206..36734683acbd 100644
--- a/tools/testing/selftests/bpf/progs/async_stack_depth.c
+++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c
@@ -30,7 +30,7 @@ static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
}
SEC("tc")
-__failure __msg("combined stack size of 2 calls is 576. Too large")
+__failure __msg("combined stack size of 2 calls is")
int pseudo_call_check(struct __sk_buff *ctx)
{
struct hmap_elem *elem;
@@ -45,7 +45,7 @@ int pseudo_call_check(struct __sk_buff *ctx)
}
SEC("tc")
-__failure __msg("combined stack size of 2 calls is 608. Too large")
+__failure __msg("combined stack size of 2 calls is")
int async_call_root_check(struct __sk_buff *ctx)
{
struct hmap_elem *elem;
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
new file mode 100644
index 000000000000..b7e175cd0af0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1) { return 0; }
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2) { return 0; }
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops2 testmod_2 = {
+ .test_1 = (void *)test_1
+};
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops2.c b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
new file mode 100644
index 000000000000..64a95f6be86d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* This is an unused struct_ops program, it lacks corresponding
+ * struct_ops map, which provides attachment information.
+ * W/o additional configuration attempt to load such
+ * BPF object file would fail.
+ */
+SEC("struct_ops/foo")
+void foo(void) {}
diff --git a/tools/testing/selftests/bpf/progs/bpf_compiler.h b/tools/testing/selftests/bpf/progs/bpf_compiler.h
new file mode 100644
index 000000000000..a7c343dc82e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_compiler.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_COMPILER_H__
+#define __BPF_COMPILER_H__
+
+#define DO_PRAGMA_(X) _Pragma(#X)
+
+#if __clang__
+#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable))
+#else
+/* In GCC -funroll-loops, which is enabled with -O2, should have the
+ same impact than the loop-unroll-enable pragma above. */
+#define __pragma_loop_unroll
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N))
+#else
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N)
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full))
+#else
+#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534)
+#endif
+
+#if __clang__
+#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable))
+#else
+#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 2fd59970c43a..fb2f5513e29e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -80,7 +80,7 @@
#define __imm(name) [name]"i"(name)
#define __imm_const(name, expr) [name]"i"(expr)
#define __imm_addr(name) [name]"i"(&name)
-#define __imm_ptr(name) [name]"p"(&name)
+#define __imm_ptr(name) [name]"r"(&name)
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
/* Magic constants used with __retval() */
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index e8bd4b7b5ef7..7001965d1cc3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -51,9 +51,25 @@
#define ICSK_TIME_LOSS_PROBE 5
#define ICSK_TIME_REO_TIMEOUT 6
+#define ETH_ALEN 6
#define ETH_HLEN 14
+#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_TIMESTAMP 8
+#define TCPOPT_SACK_PERM 4
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_TIMESTAMP 10
+#define TCPOLEN_SACK_PERM 2
+
#define CHECKSUM_NONE 0
#define CHECKSUM_PARTIAL 3
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
index 610c2427fd93..3500e4b69ebe 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -27,32 +27,6 @@ bool is_cgroup1 = 0;
struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
-static void __on_lookup(struct cgroup *cgrp)
-{
- bpf_cgrp_storage_delete(&map_a, cgrp);
- bpf_cgrp_storage_delete(&map_b, cgrp);
-}
-
-SEC("fentry/bpf_local_storage_lookup")
-int BPF_PROG(on_lookup)
-{
- struct task_struct *task = bpf_get_current_task_btf();
- struct cgroup *cgrp;
-
- if (is_cgroup1) {
- cgrp = bpf_task_get_cgroup1(task, target_hid);
- if (!cgrp)
- return 0;
-
- __on_lookup(cgrp);
- bpf_cgroup_release(cgrp);
- return 0;
- }
-
- __on_lookup(task->cgroups->dfl_cgrp);
- return 0;
-}
-
static void __on_update(struct cgroup *cgrp)
{
long *ptr;
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
index ca8aa2f116b3..2ef0e0c46d17 100644
--- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -28,8 +28,7 @@ int connect_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 0;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 0cd4aebb97cf..c705d8112a35 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -23,41 +23,42 @@ struct array_map {
__uint(max_entries, 1);
} __cpumask_map SEC(".maps");
-struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
-void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
-struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
-u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
-u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak;
+void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak;
+struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym __weak;
u32 bpf_cpumask_first_and(const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
-void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym;
-bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
-void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym;
-void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym;
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym __weak;
bool bpf_cpumask_and(struct bpf_cpumask *cpumask,
const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
+ const struct cpumask *src2) __ksym __weak;
void bpf_cpumask_or(struct bpf_cpumask *cpumask,
const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
+ const struct cpumask *src2) __ksym __weak;
void bpf_cpumask_xor(struct bpf_cpumask *cpumask,
const struct cpumask *src1,
- const struct cpumask *src2) __ksym;
-bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym;
-bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
-void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
-u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym;
-u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym;
-u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
-
-void bpf_rcu_read_lock(void) __ksym;
-void bpf_rcu_read_unlock(void) __ksym;
+ const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+
+void bpf_rcu_read_lock(void) __ksym __weak;
+void bpf_rcu_read_unlock(void) __ksym __weak;
static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
{
diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
index 9c078f34bbb2..5a76754f846b 100644
--- a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
@@ -27,8 +27,7 @@ int getpeername_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 1;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
index ac7145111497..7867113c696f 100644
--- a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
@@ -27,8 +27,7 @@ int getsockname_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 1;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index fe971992e635..3db416606f2f 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_compiler.h"
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
@@ -78,8 +79,8 @@ int iter_err_unsafe_asm_loop(const void *ctx)
"*(u32 *)(r1 + 0) = r6;" /* invalid */
:
: [it]"r"(&it),
- [small_arr]"p"(small_arr),
- [zero]"p"(zero),
+ [small_arr]"r"(small_arr),
+ [zero]"r"(zero),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
@@ -183,7 +184,7 @@ int iter_pragma_unroll_loop(const void *ctx)
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 2);
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
@@ -238,7 +239,7 @@ int iter_multiple_sequential_loops(const void *ctx)
bpf_iter_num_destroy(&it);
bpf_iter_num_new(&it, 0, 2);
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
new file mode 100644
index 000000000000..2414ac20b6d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bin_data {
+ char blob[32];
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(kptr) struct bin_data __kptr * ptr;
+
+SEC("tc")
+__naked int kptr_xchg_inline(void)
+{
+ asm volatile (
+ "r1 = %[ptr] ll;"
+ "r2 = 0;"
+ "call %[bpf_kptr_xchg];"
+ "if r0 == 0 goto 1f;"
+ "r1 = r0;"
+ "r2 = 0;"
+ "call %[bpf_obj_drop_impl];"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(ptr),
+ __imm(bpf_kptr_xchg),
+ __imm(bpf_obj_drop_impl)
+ : __clobber_all
+ );
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __btf_root(void)
+{
+ bpf_obj_drop(NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
index b35337926d66..0de0357f57cc 100644
--- a/tools/testing/selftests/bpf/progs/loop4.c
+++ b/tools/testing/selftests/bpf/progs/loop4.c
@@ -3,6 +3,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
SEC("socket")
@@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb)
{
int ret = 0, i;
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 20; i++)
if (skb->len)
ret |= 1 << i;
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 3325da17ec81..efaf622c28dd 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -316,7 +316,7 @@ struct lpm_trie {
} __attribute__((preserve_access_index));
struct lpm_key {
- struct bpf_lpm_trie_key trie_key;
+ struct bpf_lpm_trie_key_hdr trie_key;
__u32 data;
};
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
new file mode 100644
index 000000000000..9085be50f03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_map.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, 1);
+ __type(value, __u32);
+} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
new file mode 100644
index 000000000000..3c7b2b618c8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe")
+int kprobe_prog(void *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index de3b6e4e4d0a..6957d9f2805e 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -8,6 +8,7 @@
#include "profiler.h"
#include "err.h"
#include "bpf_experimental.h"
+#include "bpf_compiler.h"
#ifndef NULL
#define NULL 0
@@ -169,7 +170,7 @@ static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
int spid)
{
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == spid)
@@ -185,7 +186,7 @@ static INLINE void populate_ancestors(struct task_struct* task,
ancestors_data->num_ancestors = 0;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
parent = BPF_CORE_READ(parent, real_parent);
@@ -212,7 +213,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
size_t filepart_length;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
filepart_length =
@@ -261,7 +262,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
pids_cgrp_id___local);
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys_state* subsys =
@@ -402,7 +403,7 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
if (kill_data == NULL)
return 0;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == 0) {
@@ -482,7 +483,7 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
struct dentry* parent_dentry;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
filepart_length =
@@ -508,7 +509,7 @@ is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
{
struct dentry* parent_dentry;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
@@ -629,7 +630,7 @@ int raw_tracepoint__sched_process_exit(void* ctx)
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 026d573ce179..86484f07e1d1 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -8,6 +8,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_compiler.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@@ -298,11 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
#if defined(USE_ITER)
/* no for loop, no unrolling */
#elif defined(NO_UNROLL)
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#elif defined(UNROLL_COUNT)
-#pragma clang loop unroll_count(UNROLL_COUNT)
+ __pragma_loop_unroll_count(UNROLL_COUNT)
#else
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
#endif /* NO_UNROLL */
/* Unwind python stack */
#ifdef USE_ITER
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 14fb01437fb8..ab3a532b7dd6 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -319,3 +319,123 @@ int cross_rcu_region(void *ctx)
bpf_rcu_read_unlock();
return 0;
}
+
+__noinline
+static int static_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog(NULL);
+}
+
+__noinline
+static int static_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_lock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_lock(NULL);
+}
+
+__noinline
+static int static_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_unlock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_unlock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_unlock(NULL);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += static_subprog(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += global_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += static_subprog_lock(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += global_subprog_lock(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += static_subprog_unlock(ctx);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_unlock(ret);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
index 4dfbc8552558..1c7ab44bccfa 100644
--- a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
@@ -27,8 +27,7 @@ int recvmsg_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 1;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS,
sizeof(SERVUN_ADDRESS) - 1) != 0)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
index 1f67e832666e..d8869b03dda9 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -28,8 +28,7 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
if (sa_kern->uaddrlen != unaddrlen)
return 0;
- sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
- bpf_core_type_id_kernel(struct sockaddr_un));
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
index 3e745793b27a..46d6eb2a3b17 100644
--- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -12,8 +12,6 @@ int cookie_found = 0;
__u64 cookie = 0;
__u32 omem = 0;
-void *bpf_rdonly_cast(void *, __u32) __ksym;
-
struct {
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
@@ -29,7 +27,7 @@ int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
if (local_storage_ptr != local_storage)
return 0;
- sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock));
+ sk = bpf_core_cast(sk_ptr, struct sock);
if (sk->sk_cookie.counter != cookie)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index ffbbfe1fa1c1..96531b0d9d55 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -32,7 +32,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
if (!sk)
return 0;
- sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock));
+ sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != AF_INET6 ||
sk->sk_state != TCP_LISTEN ||
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
@@ -68,7 +68,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
if (!sk)
return 0;
- sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock));
+ sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != AF_INET6 ||
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
return 0;
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 40df2cc26eaf..f74459eead26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -10,6 +10,8 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
typedef uint32_t pid_t;
struct task_struct {};
@@ -419,9 +421,9 @@ static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
if (i >= map.cnt)
@@ -560,25 +562,25 @@ static void *read_strobe_meta(struct task_struct *task,
payload_off = sizeof(data->payload);
#else
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
new file mode 100644
index 000000000000..ba10c3896213
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_2)
+{
+ return 0;
+}
+
+struct bpf_testmod_ops___v1 {
+ int (*test_1)(void);
+};
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ int (*does_not_exist)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v1 testmod_1 = {
+ .test_1 = (void *)test_1
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .does_not_exist = (void *)test_2
+};
+
+SEC("?.struct_ops")
+struct bpf_testmod_ops___v1 optional_map = {
+ .test_1 = (void *)test_1,
+};
+
+SEC("?.struct_ops.link")
+struct bpf_testmod_ops___v1 optional_map2 = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
new file mode 100644
index 000000000000..6049d9c902d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(foo)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(bar)
+{
+ test_1_result = 24;
+ return 0;
+}
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)bar
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
new file mode 100644
index 000000000000..b450f72e744a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+/* This is a test BPF program that uses struct_ops to access an argument
+ * that may be NULL. This is a test for the verifier to ensure that it can
+ * rip PTR_MAYBE_NULL correctly.
+ */
+SEC("struct_ops/test_maybe_null")
+int BPF_PROG(test_maybe_null, int dummy,
+ struct task_struct *task)
+{
+ if (task)
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_maybe_null = (void *)test_maybe_null,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
new file mode 100644
index 000000000000..6283099ec383
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+SEC("struct_ops/test_maybe_null_struct_ptr")
+int BPF_PROG(test_maybe_null_struct_ptr, int dummy,
+ struct task_struct *task)
+{
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_struct_ptr = {
+ .test_maybe_null = (void *)test_maybe_null_struct_ptr,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
new file mode 100644
index 000000000000..026cabfa7f1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+int test_2_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 0xdeadbeef;
+ return 0;
+}
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2, int a, int b)
+{
+ test_2_result = a + b;
+}
+
+SEC("struct_ops/test_3")
+int BPF_PROG(test_3, int a, int b)
+{
+ test_2_result = a + b + 3;
+ return a + b + 3;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 0x1,
+};
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2_v2, int a, int b)
+{
+ test_2_result = a * b;
+}
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
new file mode 100644
index 000000000000..9efcc6e4d356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TRAMP(x) \
+ SEC("struct_ops/tramp_" #x) \
+ int BPF_PROG(tramp_ ## x, int a) \
+ { \
+ return a; \
+ }
+
+TRAMP(1)
+TRAMP(2)
+TRAMP(3)
+TRAMP(4)
+TRAMP(5)
+TRAMP(6)
+TRAMP(7)
+TRAMP(8)
+TRAMP(9)
+TRAMP(10)
+TRAMP(11)
+TRAMP(12)
+TRAMP(13)
+TRAMP(14)
+TRAMP(15)
+TRAMP(16)
+TRAMP(17)
+TRAMP(18)
+TRAMP(19)
+TRAMP(20)
+TRAMP(21)
+TRAMP(22)
+TRAMP(23)
+TRAMP(24)
+TRAMP(25)
+TRAMP(26)
+TRAMP(27)
+TRAMP(28)
+TRAMP(29)
+TRAMP(30)
+TRAMP(31)
+TRAMP(32)
+TRAMP(33)
+TRAMP(34)
+TRAMP(35)
+TRAMP(36)
+TRAMP(37)
+TRAMP(38)
+TRAMP(39)
+TRAMP(40)
+
+#define F_TRAMP(x) .tramp_ ## x = (void *)tramp_ ## x
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops multi_pages = {
+ F_TRAMP(1),
+ F_TRAMP(2),
+ F_TRAMP(3),
+ F_TRAMP(4),
+ F_TRAMP(5),
+ F_TRAMP(6),
+ F_TRAMP(7),
+ F_TRAMP(8),
+ F_TRAMP(9),
+ F_TRAMP(10),
+ F_TRAMP(11),
+ F_TRAMP(12),
+ F_TRAMP(13),
+ F_TRAMP(14),
+ F_TRAMP(15),
+ F_TRAMP(16),
+ F_TRAMP(17),
+ F_TRAMP(18),
+ F_TRAMP(19),
+ F_TRAMP(20),
+ F_TRAMP(21),
+ F_TRAMP(22),
+ F_TRAMP(23),
+ F_TRAMP(24),
+ F_TRAMP(25),
+ F_TRAMP(26),
+ F_TRAMP(27),
+ F_TRAMP(28),
+ F_TRAMP(29),
+ F_TRAMP(30),
+ F_TRAMP(31),
+ F_TRAMP(32),
+ F_TRAMP(33),
+ F_TRAMP(34),
+ F_TRAMP(35),
+ F_TRAMP(36),
+ F_TRAMP(37),
+ F_TRAMP(38),
+ F_TRAMP(39),
+ F_TRAMP(40),
+};
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
index 4542dc683b44..f1853c38aada 100644
--- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -27,23 +27,6 @@ struct {
__type(value, long);
} map_b SEC(".maps");
-SEC("fentry/bpf_local_storage_lookup")
-int BPF_PROG(on_lookup)
-{
- struct task_struct *task = bpf_get_current_task_btf();
-
- if (!test_pid || task->pid != test_pid)
- return 0;
-
- /* The bpf_task_storage_delete will call
- * bpf_local_storage_lookup. The prog->active will
- * stop the recursion.
- */
- bpf_task_storage_delete(&map_a, task);
- bpf_task_storage_delete(&map_b, task);
- return 0;
-}
-
SEC("fentry/bpf_local_storage_update")
int BPF_PROG(on_update)
{
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 66b304982245..683c8aaa63da 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -20,8 +20,11 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
#ifdef SUBPROGS
#define INLINING __noinline
#else
@@ -267,7 +270,7 @@ static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
uint32_t acc = 0;
uint16_t *ipw = (uint16_t *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
acc += ipw[i];
}
@@ -294,7 +297,7 @@ bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
};
*is_fragment = false;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 6; i++) {
switch (exthdr.next) {
case IPPROTO_FRAGMENT:
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index f41c81212ee9..da54c09e9a15 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -23,6 +23,8 @@
#include "test_cls_redirect.h"
#include "bpf_kfuncs.h"
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
index 22aba3f6e344..6fc8b9d66e34 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -80,7 +80,7 @@ int test_core_type_id(void *ctx)
* to detect whether this test has to be executed, however strange
* that might look like.
*
- * [0] https://reviews.llvm.org/D85174
+ * [0] https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
*/
#if __has_builtin(__builtin_preserve_type_info)
struct core_reloc_type_id_output *out = (void *)&data.out;
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
index 69509f8bb680..6afa834756e9 100644
--- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -33,6 +33,12 @@ int BPF_PROG(tp_run)
return 0;
}
+SEC("perf_event")
+int event_run(void *ctx)
+{
+ return 0;
+}
+
SEC("kprobe.multi")
int BPF_PROG(kmulti_run)
{
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 17a9f59bf5f3..fc69ff18880d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -5,7 +5,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define MAX_STACK (512 - 3 * 32 + 8)
+#define MAX_STACK 260
static __attribute__ ((noinline))
int f0(int var, struct __sk_buff *skb)
@@ -30,6 +30,10 @@ int f3(int, struct __sk_buff *skb, int);
__attribute__ ((noinline))
int f2(int val, struct __sk_buff *skb)
{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
return f1(skb) + f3(val, skb, 1);
}
@@ -44,7 +48,7 @@ int f3(int val, struct __sk_buff *skb, int var)
}
SEC("tc")
-__failure __msg("combined stack size of 4 calls is 544")
+__failure __msg("combined stack size of 3 calls is")
int global_func1(struct __sk_buff *skb)
{
return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
index 9a06e5eb1fbe..143c8a4852bf 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
@@ -26,6 +26,23 @@ int kprobe_typedef_ctx(void *ctx)
return kprobe_typedef_ctx_subprog(ctx);
}
+/* s390x defines:
+ *
+ * typedef user_pt_regs bpf_user_pt_regs_t;
+ * typedef struct { ... } user_pt_regs;
+ *
+ * And so "canonical" underlying struct type is anonymous.
+ * So on s390x only valid ways to have PTR_TO_CTX argument in global subprogs
+ * are:
+ * - bpf_user_pt_regs_t *ctx (typedef);
+ * - struct bpf_user_pt_regs_t *ctx (backwards compatible struct hack);
+ * - void *ctx __arg_ctx (arg:ctx tag)
+ *
+ * Other architectures also allow using underlying struct types (e.g.,
+ * `struct pt_regs *ctx` for x86-64)
+ */
+#ifndef bpf_target_s390
+
#define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL)))
__weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx)
@@ -40,6 +57,8 @@ int kprobe_resolved_ctx(void *ctx)
return kprobe_struct_ctx_subprog(ctx);
}
+#endif
+
/* this is current hack to make this work on old kernels */
struct bpf_user_pt_regs_t {};
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 48ff2b2ad5e7..fed66f36adb6 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 10; i++) {
struct sr6_tlv_t tlv;
@@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb)
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (unsigned long long lo = 0; lo < 4; lo++) {
seg->lo = bpf_cpu_to_be64(4 - lo);
seg->hi = bpf_cpu_to_be64(hi);
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index f416032ba858..b295f9b721bf 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -21,6 +21,32 @@ struct {
__type(value, __u32);
} mim_hash SEC(".maps");
+/* The following three maps are used to test
+ * perf_event_array map can be an inner
+ * map of hash/array_of_maps.
+ */
+struct perf_event_array {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map0 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_array_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_hash_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
SEC("xdp")
int xdp_mimtest0(struct xdp_md *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
index 4bdd65b5aa2d..2fdc44e76624 100644
--- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -6,13 +6,13 @@
char tp_name[128];
-SEC("lsm/bpf")
+SEC("lsm.s/bpf")
int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
{
switch (cmd) {
case BPF_RAW_TRACEPOINT_OPEN:
- bpf_probe_read_user_str(tp_name, sizeof(tp_name) - 1,
- (void *)attr->raw_tracepoint.name);
+ bpf_copy_from_user(tp_name, sizeof(tp_name) - 1,
+ (void *)attr->raw_tracepoint.name);
break;
default:
break;
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index a7278f064368..5059050f74f6 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
// we can only go as far as ~10 TLVs due to the BPF max stack size
// workaround: define induction variable "i" as "long" instead
// of "int" to prevent alu32 sub-register spilling.
- #pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (long i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;
diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h
new file mode 100644
index 000000000000..5d3a7ec36780
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_siphash.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_SIPHASH_H
+#define _TEST_SIPHASH_H
+
+/* include/linux/bitops.h */
+static inline u64 rol64(u64 word, unsigned int shift)
+{
+ return (word << (shift & 63)) | (word >> ((-shift) & 63));
+}
+
+/* include/linux/siphash.h */
+#define SIPHASH_PERMUTATION(a, b, c, d) ( \
+ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
+ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
+ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
+ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
+
+#define SIPHASH_CONST_0 0x736f6d6570736575ULL
+#define SIPHASH_CONST_1 0x646f72616e646f6dULL
+#define SIPHASH_CONST_2 0x6c7967656e657261ULL
+#define SIPHASH_CONST_3 0x7465646279746573ULL
+
+/* lib/siphash.c */
+#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+#define PREAMBLE(len) \
+ u64 v0 = SIPHASH_CONST_0; \
+ u64 v1 = SIPHASH_CONST_1; \
+ u64 v2 = SIPHASH_CONST_2; \
+ u64 v3 = SIPHASH_CONST_3; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index c482110cfc95..a724a70c6700 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -3,12 +3,14 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
SEC("tc")
int process(struct __sk_buff *skb)
{
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 5; i++) {
if (skb->cb[i] != i + 1)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index b2440a0ff422..d8d77bdffd3d 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -101,4 +101,69 @@ int bpf_spin_lock_test(struct __sk_buff *skb)
err:
return err;
}
+
+struct bpf_spin_lock lockA __hidden SEC(".data.A");
+
+__noinline
+static int static_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_lock(&lockA);
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_unlock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret + ctx->len;
+}
+
+SEC("tc")
+int lock_static_subprog_call(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_lock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ ret = static_subprog_lock(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_unlock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ ret = static_subprog_unlock(ctx);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
index 86cd183ef6dc..43f40c4fe241 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -201,4 +201,48 @@ CHECK(innermapval_mapval, &iv->lock, &v->lock);
#undef CHECK
+__noinline
+int global_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ ret += ctx->protocol;
+ return ret + ctx->mark;
+}
+
+__noinline
+static int static_subprog_call_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len + global_subprog(ctx);
+}
+
+SEC("?tc")
+int lock_global_subprog_call1(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?tc")
+int lock_global_subprog_call2(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog_call_global(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 553a282d816a..7f74077d6622 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -30,7 +32,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -59,7 +61,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index 2b64bc563a12..68a75436e8af 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -30,7 +32,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 5489823c83fc..efc3c61f7852 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
@@ -31,7 +33,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index e6e678aa9874..404124a93892 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -19,6 +19,9 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
static const int cfg_port = 8000;
@@ -81,7 +84,7 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
csum += *iph16++;
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
new file mode 100644
index 000000000000..c8e4553648bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_kfuncs.h"
+#include "test_siphash.h"
+#include "test_tcp_custom_syncookie.h"
+
+#define MAX_PACKET_OFF 0xffff
+
+/* Hash is calculated for each client and split into ISN and TS.
+ *
+ * MSB LSB
+ * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 |
+ * | Hash_1 | MSS | ECN | SACK | WScale |
+ *
+ * TS: | 31 ... 8 | 7 ... 0 |
+ * | Random | Hash_2 |
+ */
+#define COOKIE_BITS 8
+#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1)
+
+enum {
+ /* 0xf is invalid thus means that SYN did not have WScale. */
+ BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1,
+ BPF_SYNCOOKIE_SACK = (1 << 4),
+ BPF_SYNCOOKIE_ECN = (1 << 5),
+};
+
+#define MSS_LOCAL_IPV4 65495
+#define MSS_LOCAL_IPV6 65476
+
+const __u16 msstab4[] = {
+ 536,
+ 1300,
+ 1460,
+ MSS_LOCAL_IPV4,
+};
+
+const __u16 msstab6[] = {
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
+ 1480 - 60,
+ 9000 - 60,
+ MSS_LOCAL_IPV6,
+};
+
+static siphash_key_t test_key_siphash = {
+ { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }
+};
+
+struct tcp_syncookie {
+ struct __sk_buff *skb;
+ void *data;
+ void *data_end;
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __be32 *ptr32;
+ struct bpf_tcp_req_attrs attrs;
+ u32 off;
+ u32 cookie;
+ u64 first;
+};
+
+bool handled_syn, handled_ack;
+
+static int tcp_load_headers(struct tcp_syncookie *ctx)
+{
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+
+ if (ctx->eth + 1 > ctx->data_end)
+ goto err;
+
+ switch (bpf_ntohs(ctx->eth->h_proto)) {
+ case ETH_P_IP:
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+
+ if (ctx->ipv4 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4)
+ goto err;
+
+ if (ctx->ipv4->version != 4)
+ goto err;
+
+ if (ctx->ipv4->protocol != IPPROTO_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ break;
+ case ETH_P_IPV6:
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+
+ if (ctx->ipv6 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv6->version != 6)
+ goto err;
+
+ if (ctx->ipv6->nexthdr != NEXTHDR_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ break;
+ default:
+ goto err;
+ }
+
+ if (ctx->tcp + 1 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_reload_headers(struct tcp_syncookie *ctx)
+{
+ /* Without volatile,
+ * R3 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 data_len = ctx->skb->data_end - ctx->skb->data;
+
+ if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4)
+ goto err;
+
+ /* Needed to calculate csum and parse TCP options. */
+ if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0))
+ goto err;
+
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+ if (ctx->ipv4) {
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+ ctx->ipv6 = NULL;
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ } else {
+ ctx->ipv4 = NULL;
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ }
+
+ if ((void *)ctx->tcp + 60 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static int tcp_validate_header(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_reload_headers(ctx))
+ goto err;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ /* check tcp_v4_csum(csum) is 0 if not on lo. */
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (csum_fold(csum) != 0)
+ goto err;
+ } else if (ctx->ipv6) {
+ /* check tcp_v6_csum(csum) is 0 if not on lo. */
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
+static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz)
+{
+ __u64 off = ctx->off;
+ __u8 *data;
+
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
+
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
+
+ ctx->off += sz;
+ return data;
+}
+
+static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+{
+ __u8 *opcode, *opsize, *wscale;
+ __u32 *tsval, *tsecr;
+ __u16 *mss;
+ __u32 off;
+
+ off = ctx->off;
+ opcode = next(ctx, 1);
+ if (!opcode)
+ goto stop;
+
+ if (*opcode == TCPOPT_EOL)
+ goto stop;
+
+ if (*opcode == TCPOPT_NOP)
+ goto next;
+
+ opsize = next(ctx, 1);
+ if (!opsize)
+ goto stop;
+
+ if (*opsize < 2)
+ goto stop;
+
+ switch (*opcode) {
+ case TCPOPT_MSS:
+ mss = next(ctx, 2);
+ if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss)
+ ctx->attrs.mss = get_unaligned_be16(mss);
+ break;
+ case TCPOPT_WINDOW:
+ wscale = next(ctx, 1);
+ if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) {
+ ctx->attrs.wscale_ok = 1;
+ ctx->attrs.snd_wscale = *wscale;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ tsval = next(ctx, 4);
+ tsecr = next(ctx, 4);
+ if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) {
+ ctx->attrs.rcv_tsval = get_unaligned_be32(tsval);
+ ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr);
+
+ if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
+ ctx->attrs.tstamp_ok = 0;
+ else
+ ctx->attrs.tstamp_ok = 1;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn)
+ ctx->attrs.sack_ok = 1;
+ break;
+ }
+
+ ctx->off = off + *opsize;
+next:
+ return 0;
+stop:
+ return 1;
+}
+
+static void tcp_parse_options(struct tcp_syncookie *ctx)
+{
+ ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data,
+
+ bpf_loop(40, tcp_parse_option, ctx, 0);
+}
+
+static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
+{
+ if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) ||
+ (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
+ goto err;
+
+ if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ goto err;
+
+ if (!ctx->attrs.tstamp_ok)
+ goto err;
+
+ if (!ctx->attrs.sack_ok)
+ goto err;
+
+ if (!ctx->tcp->ece || !ctx->tcp->cwr)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static void tcp_prepare_cookie(struct tcp_syncookie *ctx)
+{
+ u32 seq = bpf_ntohl(ctx->tcp->seq);
+ u64 first = 0, second;
+ int mssind = 0;
+ u32 hash;
+
+ if (ctx->ipv4) {
+ for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab4[mssind])
+ break;
+
+ ctx->attrs.mss = msstab4[mssind];
+
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ } else if (ctx->ipv6) {
+ for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab6[mssind])
+ break;
+
+ ctx->attrs.mss = msstab6[mssind];
+
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+ }
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok) {
+ ctx->attrs.rcv_tsecr = bpf_get_prandom_u32();
+ ctx->attrs.rcv_tsecr &= ~COOKIE_MASK;
+ ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK;
+ }
+
+ hash &= ~COOKIE_MASK;
+ hash |= mssind << 6;
+
+ if (ctx->attrs.wscale_ok)
+ hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK;
+
+ if (ctx->attrs.sack_ok)
+ hash |= BPF_SYNCOOKIE_SACK;
+
+ if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr)
+ hash |= BPF_SYNCOOKIE_ECN;
+
+ ctx->cookie = hash;
+}
+
+static void tcp_write_options(struct tcp_syncookie *ctx)
+{
+ ctx->ptr32 = (__be32 *)(ctx->tcp + 1);
+
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 |
+ ctx->attrs.mss);
+
+ if (ctx->attrs.wscale_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 |
+ TCPOLEN_WINDOW << 8 |
+ ctx->attrs.snd_wscale);
+
+ if (ctx->attrs.tstamp_ok) {
+ if (ctx->attrs.sack_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 |
+ TCPOLEN_SACK_PERM << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+ else
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr);
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval);
+ } else if (ctx->attrs.sack_ok) {
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_SACK_PERM << 8 |
+ TCPOLEN_SACK_PERM);
+ }
+}
+
+static int tcp_handle_syn(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_sysctl(ctx))
+ goto err;
+
+ tcp_prepare_cookie(ctx);
+ tcp_write_options(ctx);
+
+ swap(ctx->tcp->source, ctx->tcp->dest);
+ ctx->tcp->check = 0;
+ ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1);
+ ctx->tcp->seq = bpf_htonl(ctx->cookie);
+ ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2;
+ ctx->tcp->ack = 1;
+ if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr)
+ ctx->tcp->ece = 0;
+ ctx->tcp->cwr = 0;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ swap(ctx->ipv4->saddr, ctx->ipv4->daddr);
+ ctx->tcp->check = tcp_v4_csum(ctx, csum);
+
+ ctx->ipv4->check = 0;
+ ctx->ipv4->tos = 0;
+ ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4);
+ ctx->ipv4->id = 0;
+ ctx->ipv4->ttl = 64;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0);
+ if (csum < 0)
+ goto err;
+
+ ctx->ipv4->check = csum_fold(csum);
+ } else if (ctx->ipv6) {
+ swap(ctx->ipv6->saddr, ctx->ipv6->daddr);
+ ctx->tcp->check = tcp_v6_csum(ctx, csum);
+
+ *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000);
+ ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp);
+ ctx->ipv6->hop_limit = 64;
+ }
+
+ swap_array(ctx->eth->h_source, ctx->eth->h_dest);
+
+ if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0))
+ goto err;
+
+ return bpf_redirect(ctx->skb->ifindex, 0);
+err:
+ return TC_ACT_SHOT;
+}
+
+static int tcp_validate_cookie(struct tcp_syncookie *ctx)
+{
+ u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1;
+ u32 seq = bpf_ntohl(ctx->tcp->seq) - 1;
+ u64 first = 0, second;
+ int mssind;
+ u32 hash;
+
+ if (ctx->ipv4)
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ else if (ctx->ipv6)
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok)
+ hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK;
+ else
+ hash &= ~COOKIE_MASK;
+
+ hash -= cookie & ~COOKIE_MASK;
+ if (hash)
+ goto err;
+
+ mssind = (cookie & (3 << 6)) >> 6;
+ if (ctx->ipv4) {
+ if (mssind > ARRAY_SIZE(msstab4))
+ goto err;
+
+ ctx->attrs.mss = msstab4[mssind];
+ } else {
+ if (mssind > ARRAY_SIZE(msstab6))
+ goto err;
+
+ ctx->attrs.mss = msstab6[mssind];
+ }
+
+ ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
+ ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK;
+ ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_handle_ack(struct tcp_syncookie *ctx)
+{
+ struct bpf_sock_tuple tuple;
+ struct bpf_sock *skc;
+ int ret = TC_ACT_OK;
+ struct sock *sk;
+ u32 tuple_size;
+
+ if (ctx->ipv4) {
+ tuple.ipv4.saddr = ctx->ipv4->saddr;
+ tuple.ipv4.daddr = ctx->ipv4->daddr;
+ tuple.ipv4.sport = ctx->tcp->source;
+ tuple.ipv4.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv4);
+ } else if (ctx->ipv6) {
+ __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr));
+ __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr));
+ tuple.ipv6.sport = ctx->tcp->source;
+ tuple.ipv6.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv6);
+ } else {
+ goto out;
+ }
+
+ skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0);
+ if (!skc)
+ goto out;
+
+ if (skc->state != TCP_LISTEN)
+ goto release;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(skc);
+ if (!sk)
+ goto err;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_cookie(ctx))
+ goto err;
+
+ ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs));
+ if (ret < 0)
+ goto err;
+
+release:
+ bpf_sk_release(skc);
+out:
+ return ret;
+
+err:
+ ret = TC_ACT_SHOT;
+ goto release;
+}
+
+SEC("tc")
+int tcp_custom_syncookie(struct __sk_buff *skb)
+{
+ struct tcp_syncookie ctx = {
+ .skb = skb,
+ };
+
+ if (tcp_load_headers(&ctx))
+ return TC_ACT_OK;
+
+ if (ctx.tcp->rst)
+ return TC_ACT_OK;
+
+ if (ctx.tcp->syn) {
+ if (ctx.tcp->ack)
+ return TC_ACT_OK;
+
+ handled_syn = true;
+
+ return tcp_handle_syn(&ctx);
+ }
+
+ handled_ack = true;
+
+ return tcp_handle_ack(&ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
new file mode 100644
index 000000000000..29a6a53cf229
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_TCP_SYNCOOKIE_H
+#define _TEST_TCP_SYNCOOKIE_H
+
+#define __packed __attribute__((__packed__))
+#define __force
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define swap(a, b) \
+ do { \
+ typeof(a) __tmp = (a); \
+ (a) = (b); \
+ (b) = __tmp; \
+ } while (0)
+
+#define swap_array(a, b) \
+ do { \
+ typeof(a) __tmp[sizeof(a)]; \
+ __builtin_memcpy(__tmp, a, sizeof(a)); \
+ __builtin_memcpy(a, b, sizeof(a)); \
+ __builtin_memcpy(b, __tmp, sizeof(a)); \
+ } while (0)
+
+/* asm-generic/unaligned.h */
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return bpf_ntohs(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return bpf_ntohl(__get_unaligned_t(__be32, p));
+}
+
+/* lib/checksum.c */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/* asm-generic/checksum.h */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/* net/ipv6/ip6_checksum.c */
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ int carry;
+ __u32 ulen;
+ __u32 uproto;
+ __u32 sum = (__force u32)csum;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ ulen = (__force u32)bpf_htonl((__u32)len);
+ sum += ulen;
+ carry = (sum < ulen);
+ sum += carry;
+
+ uproto = (__force u32)bpf_htonl(proto);
+ sum += uproto;
+ carry = (sum < uproto);
+ sum += carry;
+
+ return csum_fold((__force __wsum)sum);
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index cf7ed8cbb1fe..a3f3f43fc195 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -59,7 +59,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
asm volatile (
"%[op] = *(u32 *)(%[skops] +96)"
- : [op] "+r"(op)
+ : [op] "=r"(op)
: [skops] "r"(skops)
:);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index d7a9a74b7245..8caf58be5818 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -19,6 +19,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -137,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
index 78c368e71797..67a77944ef29 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -18,11 +18,11 @@
#include "test_iptunnel_common.h"
#include "bpf_kfuncs.h"
-const size_t tcphdr_sz = sizeof(struct tcphdr);
-const size_t udphdr_sz = sizeof(struct udphdr);
-const size_t ethhdr_sz = sizeof(struct ethhdr);
-const size_t iphdr_sz = sizeof(struct iphdr);
-const size_t ipv6hdr_sz = sizeof(struct ipv6hdr);
+#define tcphdr_sz sizeof(struct tcphdr)
+#define udphdr_sz sizeof(struct udphdr)
+#define ethhdr_sz sizeof(struct ethhdr)
+#define iphdr_sz sizeof(struct iphdr)
+#define ipv6hdr_sz sizeof(struct ipv6hdr)
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index c98fb44156f0..93267a68825b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -133,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 42c8f6ded0e4..5c7e4758a0ca 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,6 +15,7 @@
#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
@@ -362,7 +363,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
iph->ttl = 4;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
@@ -409,7 +410,7 @@ int send_icmp_reply(void *data, void *data_end)
iph->saddr = tmp_addr;
iph->check = 0;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c
new file mode 100644
index 000000000000..e4d59b6ba743
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/token_lsm.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int my_pid;
+bool reject_capable;
+bool reject_cmd;
+
+SEC("lsm/bpf_token_capable")
+int BPF_PROG(token_capable, struct bpf_token *token, int cap)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_capable)
+ return -1;
+ return 0;
+}
+
+SEC("lsm/bpf_token_cmd")
+int BPF_PROG(token_cmd, struct bpf_token *token, enum bpf_cmd cmd)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_cmd)
+ return -1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
new file mode 100644
index 000000000000..d41665d2ec8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry/bpf_spin_lock")
+int BPF_PROG(test_spin_lock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_spin_unlock")
+int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 694e7cec1823..5fda43901033 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -33,6 +33,27 @@ int bench_trigger_kprobe(void *ctx)
return 0;
}
+SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kretprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kprobe_multi(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kretprobe_multi(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry(void *ctx)
{
@@ -40,6 +61,13 @@ int bench_trigger_fentry(void *ctx)
return 0;
}
+SEC("fexit/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_fexit(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry_sleep(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
index a9629ac230fd..9d808b8f4ab0 100644
--- a/tools/testing/selftests/bpf/progs/type_cast.c
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
@@ -19,9 +20,6 @@ char name[IFNAMSIZ];
unsigned int inum;
unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
-void *bpf_cast_to_kern_ctx(void *) __ksym;
-void *bpf_rdonly_cast(void *, __u32) __ksym;
-
SEC("?xdp")
int md_xdp(struct xdp_md *ctx)
{
@@ -48,13 +46,12 @@ int md_skb(struct __sk_buff *skb)
/* Simulate the following kernel macro:
* #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
*/
- shared_info = bpf_rdonly_cast(kskb->head + kskb->end,
- bpf_core_type_id_kernel(struct skb_shared_info));
+ shared_info = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
meta_len = shared_info->meta_len;
frag0_len = shared_info->frag_list->len;
/* kskb2 should be equal to kskb */
- kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff));
+ kskb2 = bpf_core_cast(kskb, typeof(*kskb2));
kskb2_len = kskb2->len;
return 0;
}
@@ -65,7 +62,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
struct task_struct *task, *task_dup;
task = bpf_get_current_task_btf();
- task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct));
+ task_dup = bpf_core_cast(task, struct task_struct);
(void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
return 0;
}
@@ -73,7 +70,7 @@ int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
SEC("?tracepoint/syscalls/sys_enter_nanosleep")
int kctx_u64(void *ctx)
{
- u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64));
+ u64 *kctx = bpf_core_cast(ctx, u64);
(void)kctx;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
new file mode 100644
index 000000000000..5540b05ff9ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST)
+ volatile int __arena *page1, *page2, *no_page, *page3;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+ if (*page1 != 1)
+ return 6;
+ if (*page2 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page2 != page3)
+ return 9;
+ if (*page1 != 1)
+ return 10;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST)
+ volatile char __arena *page1, *page2, *page3, *page4;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ page2 = page1 + __PAGE_SIZE;
+ page3 = page1 + __PAGE_SIZE * 2;
+ page4 = page1 - __PAGE_SIZE;
+ *page1 = 1;
+ *page2 = 2;
+ *page3 = 3;
+ *page4 = 4;
+ if (*page1 != 1)
+ return 1;
+ if (*page2 != 2)
+ return 2;
+ if (*page3 != 0)
+ return 3;
+ if (*page4 != 0)
+ return 4;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+ if (*page1 != 0)
+ return 5;
+ if (*page2 != 0)
+ return 6;
+ if (*page3 != 0)
+ return 7;
+ if (*page4 != 0)
+ return 8;
+#endif
+ return 0;
+}
+
+struct bpf_arena___l {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+SEC("syscall")
+__success __retval(0) __log_level(2)
+int basic_alloc3(void *ctx)
+{
+ struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
+ volatile char __arena *pages;
+
+ pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 1;
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__success __log_level(2)
+int iter_maps1(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("expected pointer to STRUCT bpf_map")
+int iter_maps2(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+
+ bpf_arena_alloc_pages((void *)seq, NULL, 1, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("untrusted_ptr_bpf_map")
+int iter_maps3(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map->inner_map_meta, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index be95570ab382..28b602ac9cbe 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -568,7 +568,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("direct packet access: test23 (x += pkt_ptr, 4)")
-__failure __msg("invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)")
+__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)")
__flag(BPF_F_ANY_ALIGNMENT)
__naked void test23_x_pkt_ptr_4(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
new file mode 100644
index 000000000000..4ab0ef18d7eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
+
+extern struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+extern void bpf_task_release(struct task_struct *p) __ksym __weak;
+
+__weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ if (!task)
+ return 0;
+ return task->pid + task->tgid;
+}
+
+__weak int subprog_trusted_task_nullable_extra_layer(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ return subprog_trusted_task_nullable(task) + subprog_trusted_task_nullable(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nullable() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int trusted_task_arg_nullable(void *ctx)
+{
+ struct task_struct *t1 = bpf_get_current_task_btf();
+ struct task_struct *t2 = bpf_task_acquire(t1);
+ int res = 0;
+
+ /* known NULL */
+ res += subprog_trusted_task_nullable(NULL);
+
+ /* known non-NULL */
+ res += subprog_trusted_task_nullable(t1);
+ res += subprog_trusted_task_nullable_extra_layer(t1);
+
+ /* unknown if NULL or not */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ if (t2) {
+ /* known non-NULL after explicit NULL check, just in case */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ bpf_task_release(t2);
+ }
+
+ return res;
+}
+
+__weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted)
+{
+ return task->pid + task->tgid;
+}
+
+SEC("?kprobe")
+__failure __log_level(2)
+__msg("R1 type=scalar expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail1(void *ctx)
+{
+ return subprog_trusted_task_nonnull(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail2(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+ struct task_struct *nullable;
+ int res;
+
+ nullable = bpf_task_acquire(t);
+
+ /* should fail, PTR_TO_BTF_ID_OR_NULL */
+ res = subprog_trusted_task_nonnull(nullable);
+
+ if (nullable)
+ bpf_task_release(nullable);
+
+ return res;
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nonnull() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int trusted_task_arg_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_trusted_task_nonnull(t);
+}
+
+struct task_struct___local {} __attribute__((preserve_access_index));
+
+__weak int subprog_nullable_task_flavor(
+ struct task_struct___local *task __arg_trusted __arg_nullable)
+{
+ char buf[16];
+
+ if (!task)
+ return 0;
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nullable_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int flavor_ptr_nullable(void *ctx)
+{
+ struct task_struct___local *t = (void *)bpf_get_current_task_btf();
+
+ return subprog_nullable_task_flavor(t);
+}
+
+__weak int subprog_nonnull_task_flavor(struct task_struct___local *task __arg_trusted)
+{
+ char buf[16];
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nonnull_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int flavor_ptr_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_nonnull_task_flavor((void *)t);
+}
+
+__weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted)
+{
+ bpf_task_release(task); /* should be rejected */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID")
+int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_destroy(task);
+}
+
+__weak int subprog_trusted_acq_rel(struct task_struct *task __arg_trusted)
+{
+ struct task_struct *owned;
+
+ owned = bpf_task_acquire(task);
+ if (!owned)
+ return 0;
+
+ bpf_task_release(owned); /* this one is OK, we acquired it locally */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_acq_rel(task);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index 67dddd941891..baff5ffe9405 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -115,6 +115,35 @@ int arg_tag_nullable_ptr_fail(void *ctx)
return subprog_nullable_ptr_bad(&x);
}
+typedef struct {
+ int x;
+} user_struct_t;
+
+__noinline __weak int subprog_user_anon_mem(user_struct_t *t)
+{
+ return t ? t->x : 0;
+}
+
+SEC("?tracepoint")
+__failure __log_level(2)
+__msg("invalid bpf_context access")
+__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')")
+int anon_user_mem_invalid(void *ctx)
+{
+ /* can't pass PTR_TO_CTX as user memory */
+ return subprog_user_anon_mem(ctx);
+}
+
+SEC("?tracepoint")
+__success __log_level(2)
+__msg("Func#1 ('subprog_user_anon_mem') is safe for any args that match its prototype")
+int anon_user_mem_valid(void *ctx)
+{
+ user_struct_t t = { .x = 42 };
+
+ return subprog_user_anon_mem(&t);
+}
+
__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull)
{
return (*p1) * (*p2); /* good, no need for NULL checks */
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index a955a6358206..99e561f18f9b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_experimental.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -309,4 +307,103 @@ int iter_limit_bug(struct __sk_buff *skb)
return 0;
}
+#define ARR_SZ 1000000
+int zero;
+char arr[ARR_SZ];
+
+SEC("socket")
+__success __retval(0xd495cdc0)
+int cond_break1(const void *ctx)
+{
+ unsigned long i;
+ unsigned int sum = 0;
+
+ for (i = zero; i < ARR_SZ; cond_break, i++)
+ sum += i;
+ for (i = zero; i < ARR_SZ; i++) {
+ barrier_var(i);
+ sum += i + arr[i];
+ cond_break;
+ }
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(999000000)
+int cond_break2(const void *ctx)
+{
+ int i, j;
+ int sum = 0;
+
+ for (i = zero; i < 1000; cond_break, i++)
+ for (j = zero; j < 1000; j++) {
+ sum += i + j;
+ cond_break;
+ }
+
+ return sum;
+}
+
+static __noinline int loop(void)
+{
+ int i, sum = 0;
+
+ for (i = zero; i <= 1000000; i++, cond_break)
+ sum += i;
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(0x6a5a2920)
+int cond_break3(const void *ctx)
+{
+ return loop();
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break4(const void *ctx)
+{
+ int cnt = zero;
+
+ for (;;) {
+ /* should eventually break out of the loop */
+ cond_break;
+ cnt++;
+ }
+ /* if we looped a bit, it's a success */
+ return cnt > 1 ? 1 : 0;
+}
+
+static __noinline int static_subprog(void)
+{
+ int cnt = zero;
+
+ for (;;) {
+ cond_break;
+ cnt++;
+ }
+
+ return cnt;
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break5(const void *ctx)
+{
+ int cnt1 = zero, cnt2;
+
+ for (;;) {
+ cond_break;
+ cnt1++;
+ }
+
+ cnt2 = static_subprog();
+
+ /* main and subprog have to loop a bit */
+ return cnt1 > 1 && cnt2 > 1 ? 1 : 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index 71735dbf33d4..e07b43b78fd2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -259,4 +259,28 @@ l0_%=: r2 += r1; \
" ::: __clobber_all);
}
+SEC("xdp")
+__success
+__naked void not_an_inifinite_loop(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+loop_%=: \
+ r0 = *(u64 *)(r10 - 8); \
+ if r0 > 10 goto exit_%=; \
+ r0 += 1; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+ goto loop_%=; \
+exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 39fe3372e0e0..85e48069c9e6 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void)
SEC("tc")
__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data")
-__failure __msg("invalid access to packet")
+__success __retval(0)
__naked void u16_offset_to_skb_data(void)
{
asm volatile (" \
@@ -225,13 +225,19 @@ __naked void u16_offset_to_skb_data(void)
r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
w4 = 20; \
*(u32*)(r10 - 8) = r4; \
- r4 = *(u16*)(r10 - 8); \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 8);"
+#else
+ "r4 = *(u16*)(r10 - 6);"
+#endif
+ " \
r0 = r2; \
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
r0 += r4; \
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
if r0 > r3 goto l0_%=; \
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ /* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
r0 = *(u32*)(r2 + 0); \
l0_%=: r0 = 0; \
exit; \
@@ -243,7 +249,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("Spill u32 const scalars. Refill as u64. Offset to skb->data")
-__failure __msg("invalid access to packet")
+__failure __msg("math between pkt pointer and register with unbounded min value is not allowed")
__naked void u64_offset_to_skb_data(void)
{
asm volatile (" \
@@ -253,13 +259,11 @@ __naked void u64_offset_to_skb_data(void)
w7 = 20; \
*(u32*)(r10 - 4) = r6; \
*(u32*)(r10 - 8) = r7; \
- r4 = *(u16*)(r10 - 8); \
+ r4 = *(u64*)(r10 - 8); \
r0 = r2; \
- /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4= */ \
r0 += r4; \
- /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
if r0 > r3 goto l0_%=; \
- /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
r0 = *(u32*)(r2 + 0); \
l0_%=: r0 = 0; \
exit; \
@@ -270,7 +274,7 @@ l0_%=: r0 = 0; \
}
SEC("tc")
-__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data")
+__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data")
__failure __msg("invalid access to packet")
__naked void _6_offset_to_skb_data(void)
{
@@ -279,7 +283,13 @@ __naked void _6_offset_to_skb_data(void)
r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
w4 = 20; \
*(u32*)(r10 - 8) = r4; \
- r4 = *(u16*)(r10 - 6); \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 6);"
+#else
+ "r4 = *(u16*)(r10 - 8);"
+#endif
+ " \
r0 = r2; \
/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
r0 += r4; \
@@ -454,9 +464,9 @@ l0_%=: r1 >>= 16; \
SEC("raw_tp")
__log_level(2)
__success
-__msg("fp-8=0m??mmmm")
-__msg("fp-16=00mm??mm")
-__msg("fp-24=00mm???m")
+__msg("fp-8=0m??scalar()")
+__msg("fp-16=00mm??scalar()")
+__msg("fp-24=00mm???scalar()")
__naked void spill_subregs_preserve_stack_zero(void)
{
asm volatile (
@@ -495,14 +505,14 @@ char single_byte_buf[1] SEC(".data.single_byte_buf");
SEC("raw_tp")
__log_level(2)
__success
-/* make sure fp-8 is all STACK_ZERO */
-__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000")
+/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
/* but fp-16 is spilled IMPRECISE zero const reg */
__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
-/* validate that assigning R2 from STACK_ZERO doesn't mark register
+/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
* precise immediately; if necessary, it will be marked precise later
*/
-__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000")
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
/* similarly, when R2 is assigned from spilled register, it is initially
* imprecise, but will be marked precise later once it is used in precise context
*/
@@ -520,14 +530,14 @@ __msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
__naked void partial_stack_load_preserves_zeros(void)
{
asm volatile (
- /* fp-8 is all STACK_ZERO */
+ /* fp-8 is value zero (represented by a zero value fake reg) */
".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
/* fp-16 is const zero register */
"r0 = 0;"
"*(u64 *)(r10 -16) = r0;"
- /* load single U8 from non-aligned STACK_ZERO slot */
+ /* load single U8 from non-aligned spilled value zero slot */
"r1 = %[single_byte_buf];"
"r2 = *(u8 *)(r10 -1);"
"r1 += r2;"
@@ -539,7 +549,7 @@ __naked void partial_stack_load_preserves_zeros(void)
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
- /* load single U16 from non-aligned STACK_ZERO slot */
+ /* load single U16 from non-aligned spilled value zero slot */
"r1 = %[single_byte_buf];"
"r2 = *(u16 *)(r10 -2);"
"r1 += r2;"
@@ -551,7 +561,7 @@ __naked void partial_stack_load_preserves_zeros(void)
"r1 += r2;"
"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
- /* load single U32 from non-aligned STACK_ZERO slot */
+ /* load single U32 from non-aligned spilled value zero slot */
"r1 = %[single_byte_buf];"
"r2 = *(u32 *)(r10 -4);"
"r1 += r2;"
@@ -583,6 +593,47 @@ __naked void partial_stack_load_preserves_zeros(void)
: __clobber_common);
}
+SEC("raw_tp")
+__log_level(2)
+__success
+/* fp-4 is STACK_ZERO */
+__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("5: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
+__naked void partial_stack_load_preserves_partial_zeros(void)
+{
+ asm volatile (
+ /* fp-4 is value zero */
+ ".8byte %[fp4_st_zero];" /* LLVM-18+: *(u32 *)(r10 -4) = 0; */
+
+ /* load single U8 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp4_st_zero, BPF_ST_MEM(BPF_W, BPF_REG_FP, -4, 0))
+ : __clobber_common);
+}
+
char two_byte_buf[2] SEC(".data.two_byte_buf");
SEC("raw_tp")
@@ -737,4 +788,460 @@ __naked void stack_load_preserves_const_precision_subreg(void)
: __clobber_common);
}
+SEC("xdp")
+__description("32-bit spilled reg range should be tracked")
+__success __retval(0)
+__naked void spill_32bit_range_track(void)
+{
+ asm volatile(" \
+ call %[bpf_ktime_get_ns]; \
+ /* Make r0 bounded. */ \
+ r0 &= 65535; \
+ /* Assign an ID to r0. */ \
+ r1 = r0; \
+ /* 32-bit spill r0 to stack. */ \
+ *(u32*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 < 1 goto l0_%=; \
+ /* 32-bit fill r1 from stack. */ \
+ r1 = *(u32*)(r10 - 8); \
+ /* r1 == r0 => r1 >= 1 always. */ \
+ if r1 >= 1 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. \
+ * Do an invalid memory access if the verifier \
+ * follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("64-bit spill of 64-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_64bit_of_64bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80000000; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 64-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u64*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit spill of 32-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_32bit_of_32bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0x80000000; \
+ /* 32-bit spill r0 to stack - should assign an ID. */\
+ *(u32*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u32*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 16-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_16bit_of_16bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8000; \
+ /* 16-bit spill r0 to stack - should assign an ID. */\
+ *(u16*)(r10 - 8) = r0; \
+ /* 16-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u16*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("8-bit spill of 8-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_8bit_of_8bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80; \
+ /* 8-bit spill r0 to stack - should assign an ID. */\
+ *(u8*)(r10 - 8) = r0; \
+ /* 8-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u8*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("spill unbounded reg, then range check src")
+__success __retval(0)
+__naked void spill_unbounded(void)
+{
+ asm volatile (" \
+ /* Produce an unbounded scalar. */ \
+ call %[bpf_get_prandom_u32]; \
+ /* Spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 > 16 goto l0_%=; \
+ /* Fill r0 from stack. */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 <= 16 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit(void)
+{
+ asm volatile(" \
+ /* Randomize the upper 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r0 from stack. */ \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(u32*)(r10 - 8);"
+#else
+ "r0 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 == 0 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit_preserve_id(void)
+{
+ asm volatile (" \
+ /* Randomize the lower 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xffffffff; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(u32*)(r10 - 8);"
+#else
+ "r1 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r1 with another register to trigger find_equal_scalars. */\
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void fill_32bit_after_spill_64bit_clear_id(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* 64-bit spill r1 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack - should clear the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r2 = *(u32*)(r10 - 8);"
+#else
+ "r2 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on fill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* The verifier shouldn't propagate r2's range to r1, so it should\
+ * still remember r1 = 0xffffffff and reject the below.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if stack spill of an imprecise scalar in old state
+ * is considered equivalent to STACK_{MISC,INVALID} in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+/* STACK_INVALID should prevent verifier in unpriv mode from
+ * considering states equivalent and force an error on second
+ * verification path (entry - label 1 - label 2).
+ */
+__failure_unpriv
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("9: (95) exit")
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("invalid read from stack off -8+2 size 8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_imprecise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that stack spill of a precise scalar in old state
+ * is not considered equivalent to STACK_MISC in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should visit 'if r1 == 0x2a ...' two times:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("if r1 == 0x2a goto pc+0")
+__msg("if r1 == 0x2a goto pc+0")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_precise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ /* use r1 in precise context */
+ "if r1 == 42 goto +0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if STACK_MISC in old state is considered
+ * equivalent to stack spill of a scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r0 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_scalar(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r0 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that STACK_MISC in old state is not considered
+ * equivalent to stack spill of a non-scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should process exit instructions twice:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("r1 = *(u64 *)(r10 -8)")
+__msg("exit")
+__msg("r1 = *(u64 *)(r10 -8)")
+__msg("exit")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_ctx_ptr(void)
+{
+ asm volatile(
+ /* remember context pointer in r9 */
+ "r9 = r1;"
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure context pointer in fp-8 */
+ "*(u64*)(r10 - 8) = r9;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
index 9c1aa69650f8..fb316c080c84 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
@@ -330,7 +330,7 @@ l1_%=: r7 = r0; \
SEC("cgroup/skb")
__description("spin_lock: test10 lock in subprog without unlock")
-__failure __msg("unlock is missing")
+__success
__failure_unpriv __msg_unpriv("")
__naked void lock_in_subprog_without_unlock(void)
{
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 518329c666e9..7ea9785738b5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -7,6 +7,8 @@
#include <bpf/bpf_endian.h>
#include <asm/errno.h>
+#include "bpf_compiler.h"
+
#define TC_ACT_OK 0
#define TC_ACT_SHOT 2
@@ -151,11 +153,11 @@ static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
__u64 sum = csum;
int i;
-#pragma unroll
+ __pragma_loop_unroll
for (i = 0; i < 4; i++)
sum += (__u32)saddr->in6_u.u6_addr32[i];
-#pragma unroll
+ __pragma_loop_unroll
for (i = 0; i < 4; i++)
sum += (__u32)daddr->in6_u.u6_addr32[i];
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 54cf1765118b..44e2b0ef23ae 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "xdping.h"
struct {
@@ -116,7 +117,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_PASS;
if (pinginfo->start) {
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < XDPING_MAX_COUNT; i++) {
if (pinginfo->times[i] == 0)
break;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index f01391021218..524c38e9cde4 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -181,7 +181,7 @@ static int parse_test_spec(struct test_loader *tester,
memset(spec, 0, sizeof(*spec));
spec->prog_name = bpf_program__name(prog);
- spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
+ spec->prog_flags = testing_prog_flags();
btf = bpf_object__btf(obj);
if (!btf) {
@@ -501,7 +501,7 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
}
}
-static int do_prog_test_run(int fd_prog, int *retval)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
{
__u8 tmp_out[TEST_DATA_LEN << 2] = {};
__u8 tmp_in[TEST_DATA_LEN] = {};
@@ -514,6 +514,10 @@ static int do_prog_test_run(int fd_prog, int *retval)
.repeat = 1,
);
+ if (empty_opts) {
+ memset(&topts, 0, sizeof(struct bpf_test_run_opts));
+ topts.sz = sizeof(struct bpf_test_run_opts);
+ }
err = bpf_prog_test_run_opts(fd_prog, &topts);
saved_errno = errno;
@@ -649,7 +653,8 @@ void run_subtest(struct test_loader *tester,
}
}
- do_prog_test_run(bpf_program__fd(tprog), &retval);
+ do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
@@ -688,7 +693,7 @@ static void process_subtest(struct test_loader *tester,
++nr_progs;
specs = calloc(nr_progs, sizeof(struct test_spec));
- if (!ASSERT_OK_PTR(specs, "Can't alloc specs array"))
+ if (!ASSERT_OK_PTR(specs, "specs_alloc"))
return;
i = 0;
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index c028d621c744..d98c72dc563e 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -211,7 +211,7 @@ static void test_lpm_map(int keysize)
volatile size_t n_matches, n_matches_after_delete;
size_t i, j, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
uint8_t *data, *value;
int r, map;
@@ -331,8 +331,8 @@ static void test_lpm_map(int keysize)
static void test_lpm_ipaddr(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
- struct bpf_lpm_trie_key *key_ipv4;
- struct bpf_lpm_trie_key *key_ipv6;
+ struct bpf_lpm_trie_key_u8 *key_ipv4;
+ struct bpf_lpm_trie_key_u8 *key_ipv6;
size_t key_size_ipv4;
size_t key_size_ipv6;
int map_fd_ipv4;
@@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void)
static void test_lpm_delete(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
size_t key_size;
int map_fd;
__u64 value;
@@ -532,7 +532,7 @@ static void test_lpm_delete(void)
static void test_lpm_get_next_key(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
- struct bpf_lpm_trie_key *key_p, *next_key_p;
+ struct bpf_lpm_trie_key_u8 *key_p, *next_key_p;
size_t key_size;
__u32 value = 0;
int map_fd;
@@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg)
{
int i, j, ret, iter, key_size;
struct lpm_mt_test_info *info = arg;
- struct bpf_lpm_trie_key *key_p;
+ struct bpf_lpm_trie_key_u8 *key_p;
- key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_size = sizeof(*key_p) + sizeof(__u32);
key_p = alloca(key_size);
for (iter = 0; iter < info->iter; iter++)
for (i = 0; i < MAX_TEST_KEYS; i++) {
@@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg)
ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
assert(ret == 0 || errno == ENOENT);
} else {
- struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size);
ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
}
@@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void)
/* create a trie */
value_size = sizeof(__u32);
- key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+ key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size;
map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
/* create 4 threads to test update, delete, lookup and get_next_key */
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 767e0693df10..dfbab214f4d1 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1190,7 +1190,11 @@ static void test_map_in_map(void)
goto out_map_in_map;
}
- bpf_object__load(obj);
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("Failed to load test prog\n");
+ goto out_map_in_map;
+ }
map = bpf_object__find_map_by_name(obj, "mim_array");
if (!map) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1b9387890148..89ff704e9dad 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -547,24 +547,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
return bpf_map__fd(map);
}
-static bool is_jit_enabled(void)
-{
- const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
- bool enabled = false;
- int sysctl_fd;
-
- sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
- if (sysctl_fd != -1) {
- char tmpc;
-
- if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
- enabled = (tmpc != '0');
- close(sysctl_fd);
- }
-
- return enabled;
-}
-
int compare_map_keys(int map1_fd, int map2_fd)
{
__u32 key, next_key;
@@ -701,11 +683,69 @@ static const struct argp_option opts[] = {
{},
};
+static FILE *libbpf_capture_stream;
+
+static struct {
+ char *buf;
+ size_t buf_sz;
+} libbpf_output_capture;
+
+/* Creates a global memstream capturing INFO and WARN level output
+ * passed to libbpf_print_fn.
+ * Returns 0 on success, negative value on failure.
+ * On failure the description is printed using PRINT_FAIL and
+ * current test case is marked as fail.
+ */
+int start_libbpf_log_capture(void)
+{
+ if (libbpf_capture_stream) {
+ PRINT_FAIL("%s: libbpf_capture_stream != NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ libbpf_capture_stream = open_memstream(&libbpf_output_capture.buf,
+ &libbpf_output_capture.buf_sz);
+ if (!libbpf_capture_stream) {
+ PRINT_FAIL("%s: open_memstream failed errno=%d\n", __func__, errno);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Destroys global memstream created by start_libbpf_log_capture().
+ * Returns a pointer to captured data which has to be freed.
+ * Returned buffer is null terminated.
+ */
+char *stop_libbpf_log_capture(void)
+{
+ char *buf;
+
+ if (!libbpf_capture_stream)
+ return NULL;
+
+ fputc(0, libbpf_capture_stream);
+ fclose(libbpf_capture_stream);
+ libbpf_capture_stream = NULL;
+ /* get 'buf' after fclose(), see open_memstream() documentation */
+ buf = libbpf_output_capture.buf;
+ memset(&libbpf_output_capture, 0, sizeof(libbpf_output_capture));
+ return buf;
+}
+
static int libbpf_print_fn(enum libbpf_print_level level,
const char *format, va_list args)
{
+ if (libbpf_capture_stream && level != LIBBPF_DEBUG) {
+ va_list args2;
+
+ va_copy(args2, args);
+ vfprintf(libbpf_capture_stream, format, args2);
+ }
+
if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
return 0;
+
vfprintf(stdout, format, args);
return 0;
}
@@ -1099,6 +1139,7 @@ static void run_one_test(int test_num)
cleanup_cgroup_environment();
stdio_restore();
+ free(stop_libbpf_log_capture());
dump_test_log(test, state, false, false, NULL);
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 2f9f6f250f17..0ba5a20b19ba 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -385,13 +385,21 @@ int test__join_cgroup(const char *path);
goto goto_label; \
})
+#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
+
#define SYS_NOFAIL(fmt, ...) \
({ \
char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ int n; \
+ n = snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (n < sizeof(cmd) && sizeof(cmd) - n >= sizeof(ALL_TO_DEV_NULL)) \
+ strcat(cmd, ALL_TO_DEV_NULL); \
system(cmd); \
})
+int start_libbpf_log_capture(void);
+char *stop_libbpf_log_capture(void);
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index b0068a9d2cfe..80c42583f597 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -19,6 +19,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "bpf_util.h"
#ifndef ENOTSUPP
@@ -679,7 +680,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
+ bpf_program__set_flags(prog, testing_prog_flags());
err = bpf_object__load(obj);
if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index f36e41435be7..df04bda1c927 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -67,6 +67,7 @@
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+#define F_NEEDS_JIT_ENABLED (1 << 2)
/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
#define ADMIN_CAPS (1ULL << CAP_NET_ADMIN | \
@@ -74,6 +75,7 @@
1ULL << CAP_BPF)
#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
static bool unpriv_disabled = false;
+static bool jit_disabled;
static int skips;
static bool verbose = false;
static int verif_log_level = 0;
@@ -1341,48 +1343,6 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
-static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt)
-{
- __u32 buf_element_size = sizeof(struct bpf_insn);
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 xlated_prog_len;
- struct bpf_insn *buf;
-
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("bpf_prog_get_info_by_fd failed");
- return NULL;
- }
-
- xlated_prog_len = info.xlated_prog_len;
- if (xlated_prog_len % buf_element_size) {
- printf("Program length %d is not multiple of %d\n",
- xlated_prog_len, buf_element_size);
- return NULL;
- }
-
- *cnt = xlated_prog_len / buf_element_size;
- buf = calloc(*cnt, buf_element_size);
- if (!buf) {
- perror("can't allocate xlated program buffer");
- return NULL;
- }
-
- bzero(&info, sizeof(info));
- info.xlated_prog_len = xlated_prog_len;
- info.xlated_prog_insns = (__u64)(unsigned long)buf;
- if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
- perror("second bpf_prog_get_info_by_fd failed");
- goto out_free_buf;
- }
-
- return buf;
-
-out_free_buf:
- free(buf);
- return NULL;
-}
-
static bool is_null_insn(struct bpf_insn *insn)
{
struct bpf_insn null_insn = {};
@@ -1505,7 +1465,7 @@ static void print_insn(struct bpf_insn *buf, int cnt)
static bool check_xlated_program(struct bpf_test *test, int fd_prog)
{
struct bpf_insn *buf;
- int cnt;
+ unsigned int cnt;
bool result = true;
bool check_expected = !is_null_insn(test->expected_insns);
bool check_unexpected = !is_null_insn(test->unexpected_insns);
@@ -1513,8 +1473,7 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog)
if (!check_expected && !check_unexpected)
goto out;
- buf = get_xlated_program(fd_prog, &cnt);
- if (!buf) {
+ if (get_xlated_program(fd_prog, &buf, &cnt)) {
printf("FAIL: can't get xlated program\n");
result = false;
goto out;
@@ -1567,6 +1526,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
__u32 pflags;
int i, err;
+ if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) {
+ printf("SKIP (requires BPF JIT)\n");
+ skips++;
+ sched_yield();
+ return;
+ }
+
fd_prog = -1;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
@@ -1588,7 +1554,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (fixup_skips != skips)
return;
- pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
+ pflags = testing_prog_flags();
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
@@ -1887,6 +1853,8 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
+ jit_disabled = !is_jit_enabled();
+
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index d2458c1b1671..28b6646662af 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -252,6 +252,34 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
int extra_prog_load_log_flags = 0;
+int testing_prog_flags(void)
+{
+ static int cached_flags = -1;
+ static int prog_flags[] = { BPF_F_TEST_RND_HI32, BPF_F_TEST_REG_INVARIANTS };
+ static struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int insn_cnt = ARRAY_SIZE(insns), i, fd, flags = 0;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ if (cached_flags >= 0)
+ return cached_flags;
+
+ for (i = 0; i < ARRAY_SIZE(prog_flags); i++) {
+ opts.prog_flags = prog_flags[i];
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "flag-test", "GPL",
+ insns, insn_cnt, &opts);
+ if (fd >= 0) {
+ flags |= prog_flags[i];
+ close(fd);
+ }
+ }
+
+ cached_flags = flags;
+ return cached_flags;
+}
+
int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
@@ -276,7 +304,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
bpf_program__set_type(prog, type);
- flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
+ flags = bpf_program__flags(prog) | testing_prog_flags();
bpf_program__set_flags(prog, flags);
err = bpf_object__load(obj);
@@ -299,7 +327,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
.kern_version = kern_version,
- .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
+ .prog_flags = testing_prog_flags(),
.log_level = extra_prog_load_log_flags,
.log_buf = log_buf,
.log_size = log_buf_sz,
@@ -328,12 +356,12 @@ __u64 read_perf_max_sample_freq(void)
return sample_freq;
}
-static int finit_module(int fd, const char *param_values, int flags)
+int finit_module(int fd, const char *param_values, int flags)
{
return syscall(__NR_finit_module, fd, param_values, flags);
}
-static int delete_module(const char *name, int flags)
+int delete_module(const char *name, int flags)
{
return syscall(__NR_delete_module, name, flags);
}
@@ -387,3 +415,63 @@ int kern_sync_rcu(void)
{
return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
}
+
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
+{
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_prog_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %u is not multiple of %u\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_prog_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ *buf = NULL;
+ return -1;
+}
+
+bool is_jit_enabled(void)
+{
+ const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ bool enabled = false;
+ int sysctl_fd;
+
+ sysctl_fd = open(jit_sysctl, O_RDONLY);
+ if (sysctl_fd != -1) {
+ char tmpc;
+
+ if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+ enabled = (tmpc != '0');
+ close(sysctl_fd);
+ }
+
+ return enabled;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 35284faff4f2..d55f6ab12433 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -36,6 +36,8 @@ __u64 read_perf_max_sample_freq(void);
int load_bpf_testmod(bool verbose);
int unload_bpf_testmod(bool verbose);
int kern_sync_rcu(void);
+int finit_module(int fd, const char *param_values, int flags);
+int delete_module(const char *name, int flags);
static inline __u64 get_time_ns(void)
{
@@ -46,4 +48,12 @@ static inline __u64 get_time_ns(void)
return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
}
+struct bpf_insn;
+/* Request BPF program instructions after all rewrites are applied,
+ * e.g. verifier.c:convert_ctx_access() is done.
+ */
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt);
+int testing_prog_flags(void);
+bool is_jit_enabled(void);
+
#endif /* __TESTING_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4faa898ff7fc..27fd7ed3e4b0 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -271,7 +271,7 @@ ssize_t get_uprobe_offset(const void *addr)
* addi r2,r2,XXXX
*/
{
- const u32 *insn = (const u32 *)(uintptr_t)addr;
+ const __u32 *insn = (const __u32 *)(uintptr_t)addr;
if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
index a535d41dc20d..59125b22ae39 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -57,6 +57,7 @@
.expected_insns = { PSEUDO_CALL_INSN() },
.unexpected_insns = { HELPER_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
@@ -90,6 +91,7 @@
.expected_insns = { HELPER_CALL_INSN() },
.unexpected_insns = { PSEUDO_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
@@ -127,6 +129,7 @@
.expected_insns = { HELPER_CALL_INSN() },
.unexpected_insns = { PSEUDO_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = {
@@ -165,6 +168,7 @@
.expected_insns = { PSEUDO_CALL_INSN() },
.unexpected_insns = { HELPER_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.runs = 0,
.func_info = {
@@ -235,6 +239,7 @@
},
.unexpected_insns = { HELPER_CALL_INSN() },
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.result = ACCEPT,
.func_info = {
{ 0, MAIN_TYPE },
@@ -252,6 +257,7 @@
.unexpected_insns = { HELPER_CALL_INSN() },
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
.func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
.func_info_cnt = 2,
BTF_TYPES
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 8a2ff81d8350..0a9293a57211 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -183,10 +183,10 @@
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
.errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
- mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: parent state regs=r4 stack=-8:\
mark_precise: frame0: last_idx 6 first_idx 4\
- mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
- mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
+ mark_precise: frame0: regs=r4 stack=-8 before 6: (b7) r0 = -1\
+ mark_precise: frame0: regs=r4 stack=-8 before 5: (79) r4 = *(u64 *)(r10 -8)\
mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
mark_precise: frame0: parent state regs=r0 stack=:\
mark_precise: frame0: last_idx 3 first_idx 3\
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 878d68db0325..bdf5d8180067 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -480,7 +480,7 @@ peek:
for (int j = 0; j < 500; j++) {
if (complete_tx(xsk, clock_id))
break;
- usleep(10*1000);
+ usleep(10);
}
}
}
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 8a72bb7de70f..03a089165d3f 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -15,7 +15,10 @@ TEST_PROGS := \
TEST_FILES := \
lag_lib.sh \
bond_topo_2d1c.sh \
- bond_topo_3d1c.sh \
- net_forwarding_lib.sh
+ bond_topo_3d1c.sh
+
+TEST_INCLUDES := \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
index 6358df5752f9..1ec7f59db7f4 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -20,21 +20,21 @@
# +------+ +------+
#
# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
set -e
-tmp=$(mktemp -q dump.XXXXXX)
cleanup() {
ip link del fab-br0 >/dev/null 2>&1 || :
ip link del fbond >/dev/null 2>&1 || :
ip link del veth1-bond >/dev/null 2>&1 || :
ip link del veth2-bond >/dev/null 2>&1 || :
- modprobe -r bonding >/dev/null 2>&1 || :
- rm -f -- ${tmp}
}
trap cleanup 0 1 2
cleanup
-sleep 1
# create the bridge
ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
@@ -67,13 +67,12 @@ ip link set fab-br0 up
ip link set fbond up
ip addr add dev fab-br0 10.0.0.3
-tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 &
-sleep 15
-pkill tcpdump >/dev/null 2>&1
rc=0
-num=$(grep "packets captured" ${tmp} | awk '{print $1}')
-if test "$num" -gt 0; then
- echo "PASS, captured ${num}"
+tc qdisc add dev veth1-end clsact
+tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass
+if slowwait_for_counter 15 2 \
+ tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then
+ echo "PASS, captured 2"
else
echo "FAIL"
rc=1
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
index 862e947e17c7..8293dbc7c18f 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -11,7 +11,7 @@ ALL_TESTS="
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
bond_check_flags()
{
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
index 89af402fabbe..78d3e0fe6604 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
@@ -17,6 +17,11 @@
# +----------------+
#
# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
sw="sw-$(mktemp -u XXXXXX)"
host="ns-$(mktemp -u XXXXXX)"
@@ -26,6 +31,16 @@ cleanup()
ip netns del $host
}
+wait_lladdr_dad()
+{
+ $@ | grep fe80 | grep -qv tentative
+}
+
+wait_bond_up()
+{
+ $@ | grep -q 'state UP'
+}
+
trap cleanup 0 1 2
ip netns add $sw
@@ -37,8 +52,8 @@ ip -n $host link add veth1 type veth peer name veth1 netns $sw
ip -n $sw link add br0 type bridge
ip -n $sw link set br0 up
sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1)
-# sleep some time to make sure bridge lladdr pass DAD
-sleep 2
+# wait some time to make sure bridge lladdr pass DAD
+slowwait 2 wait_lladdr_dad ip -n $sw addr show br0
ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \
arp_validate 3 arp_interval 1000
@@ -53,7 +68,7 @@ ip -n $sw link set veth1 master br0
ip -n $sw link set veth0 up
ip -n $sw link set veth1 up
-sleep 5
+slowwait 5 wait_bond_up ip -n $host link show bond0
rc=0
if ip -n $host link show bond0 | grep -q LOWER_UP; then
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 9a3d3c389dad..41d0859feb7d 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -45,15 +45,23 @@ skip_ns()
}
active_slave=""
+active_slave_changed()
+{
+ local old_active_slave=$1
+ local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \
+ ".[].linkinfo.info_data.active_slave")
+ [ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ]
+}
+
check_active_slave()
{
local target_active_slave=$1
+ slowwait 5 active_slave_changed $active_slave
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
test "$active_slave" = "$target_active_slave"
check_err $? "Current active slave is $active_slave but not $target_active_slave"
}
-
# Test bonding prio option
prio_test()
{
@@ -86,13 +94,13 @@ prio_test()
# active slave should be the higher prio slave
ip -n ${s_ns} link set $active_slave down
- bond_check_connection "fail over"
check_active_slave eth2
+ bond_check_connection "fail over"
# when only 1 slave is up
ip -n ${s_ns} link set $active_slave down
- bond_check_connection "only 1 slave up"
check_active_slave eth0
+ bond_check_connection "only 1 slave up"
# when a higher prio slave change to up
ip -n ${s_ns} link set eth2 up
@@ -142,8 +150,8 @@ prio_test()
check_active_slave "eth1"
ip -n ${s_ns} link set $active_slave down
- bond_check_connection "change slave prio"
check_active_slave "eth0"
+ bond_check_connection "change slave prio"
fi
}
@@ -201,6 +209,15 @@ prio()
prio_ns "active-backup"
}
+wait_mii_up()
+{
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ [ ${mii_status} != "UP" ] && return 1
+ done
+ return 0
+}
+
arp_validate_test()
{
local param="$1"
@@ -213,7 +230,7 @@ arp_validate_test()
[ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
# wait for a while to make sure the mii status stable
- sleep 5
+ slowwait 5 wait_mii_up
for i in $(seq 0 2); do
mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
if [ ${mii_status} != "UP" ]; then
@@ -278,10 +295,13 @@ garp_test()
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
ip -n ${s_ns} link set ${active_slave} down
- exp_num=$(echo "${param}" | cut -f6 -d ' ')
- sleep $((exp_num + 2))
+ # wait for active link change
+ slowwait 2 active_slave_changed $active_slave
+ exp_num=$(echo "${param}" | cut -f6 -d ' ')
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ slowwait_for_counter $((exp_num + 5)) $exp_num \
+ tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
# check result
real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
@@ -298,8 +318,8 @@ garp_test()
num_grat_arp()
{
local val
- for val in 10 20 30 50; do
- garp_test "mode active-backup miimon 100 num_grat_arp $val peer_notify_delay 1000"
+ for val in 10 20 30; do
+ garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100"
log_test "num_grat_arp" "active-backup miimon num_grat_arp $val"
done
}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
index a509ef949dcf..195ef83cfbf1 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -28,7 +28,7 @@
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source ${lib_dir}/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
s_ns="s-$(mktemp -u XXXXXX)"
c_ns="c-$(mktemp -u XXXXXX)"
@@ -73,7 +73,6 @@ server_create()
ip -n ${s_ns} link set bond0 up
ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
}
# Reset bond with new mode and options
@@ -96,7 +95,8 @@ bond_reset()
ip -n ${s_ns} link set bond0 up
ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
+ # Wait for IPv6 address ready as it needs DAD
+ slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null
}
server_destroy()
@@ -150,7 +150,7 @@ bond_check_connection()
{
local msg=${1:-"check connection"}
- sleep 2
+ slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null
ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
check_err $? "${msg}: ping failed"
ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
index 5cfe7d8ebc25..e6fa24eded5b 100755
--- a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
+++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
@@ -14,7 +14,7 @@ ALL_TESTS="
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
source "$lib_dir"/lag_lib.sh
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
index dbdd736a41d3..bf9bcd1b5ec0 100644
--- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -107,13 +107,12 @@ lag_setup2x2()
NAMESPACES="${namespaces}"
}
-# cleanup all lag related namespaces and remove the bonding module
+# cleanup all lag related namespaces
lag_cleanup()
{
for n in ${NAMESPACES}; do
ip netns delete ${n} >/dev/null 2>&1 || true
done
- modprobe -r bonding
}
SWITCH="lag_node1"
@@ -159,7 +158,7 @@ test_bond_recovery()
create_bond $@
# verify connectivity
- ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
check_err $? "No connectivity"
# force the links of the bond down
@@ -169,7 +168,7 @@ test_bond_recovery()
ip netns exec ${SWITCH} ip link set eth1 down
# re-verify connectivity
- ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
local rc=$?
check_err $rc "Bond failed to recover"
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
index b76bf5030952..9d26ab4cad0b 100755
--- a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
+++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
@@ -23,7 +23,7 @@ REQUIRE_MZ=no
REQUIRE_JQ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
source "$lib_dir"/lag_lib.sh
cleanup()
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
index 8c2619002147..2d275b3e47dd 100755
--- a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
+++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
@@ -23,7 +23,7 @@ REQUIRE_MZ=no
REQUIRE_JQ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
source "$lib_dir"/lag_lib.sh
cleanup()
diff --git a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh
deleted file mode 120000
index 39c96828c5ef..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
index c393e7b73805..cd6817fe5be6 100644
--- a/tools/testing/selftests/drivers/net/dsa/Makefile
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -11,8 +11,22 @@ TEST_PROGS = bridge_locked_port.sh \
tc_actions.sh \
test_bridge_fdb_stress.sh
-TEST_PROGS_EXTENDED := lib.sh tc_common.sh
+TEST_FILES := \
+ run_net_forwarding_test.sh \
+ forwarding.config
-TEST_FILES := forwarding.config
+TEST_INCLUDES := \
+ ../../../net/forwarding/bridge_locked_port.sh \
+ ../../../net/forwarding/bridge_mdb.sh \
+ ../../../net/forwarding/bridge_mld.sh \
+ ../../../net/forwarding/bridge_vlan_aware.sh \
+ ../../../net/forwarding/bridge_vlan_mcast.sh \
+ ../../../net/forwarding/bridge_vlan_unaware.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/local_termination.sh \
+ ../../../net/forwarding/no_forwarding.sh \
+ ../../../net/forwarding/tc_actions.sh \
+ ../../../net/forwarding/tc_common.sh \
+ ../../../net/lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
index f5eb940c4c7c..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_locked_port.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
index 76492da525f7..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_mdb.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
index 81a7e0df0474..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_mld.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
index 9831ed74376a..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_vlan_aware.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
index 7f3c3f0bf719..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_vlan_mcast.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
index bf1a57e6bde1..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -1 +1 @@
-../../../net/forwarding/bridge_vlan_unaware.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh
deleted file mode 120000
index 39c96828c5ef..000000000000
--- a/tools/testing/selftests/drivers/net/dsa/lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
index c08166f84501..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/local_termination.sh
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -1 +1 @@
-../../../net/forwarding/local_termination.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
index b9757466bc97..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -1 +1 @@
-../../../net/forwarding/no_forwarding.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
new file mode 100755
index 000000000000..4106c0a102ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+testname=$(basename "${BASH_SOURCE[0]}")
+
+source "$libdir"/forwarding.config
+cd "$libdir"/../../../net/forwarding/ || exit 1
+source "./$testname" "$@"
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
index 306213d9430e..d16a65e7595d 120000
--- a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
+++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
@@ -1 +1 @@
-../../../net/forwarding/tc_actions.sh \ No newline at end of file
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_common.sh b/tools/testing/selftests/drivers/net/dsa/tc_common.sh
deleted file mode 120000
index bc3465bdc36b..000000000000
--- a/tools/testing/selftests/drivers/net/dsa/tc_common.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/tc_common.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
index 92acab83fbe2..74682151d04d 100755
--- a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -19,7 +19,7 @@ REQUIRE_JQ="no"
REQUIRE_MZ="no"
NETIF_CREATE="no"
lib_dir=$(dirname "$0")
-source "$lib_dir"/lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
cleanup() {
echo "Cleaning up"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index 616d3581419c..31252bc8775e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -869,7 +869,7 @@ bloom_simple_test()
bloom_complex_test()
{
# Bloom filter index computation is affected from region ID, eRP
- # ID and from the region key size. In order to excercise those parts
+ # ID and from the region key size. In order to exercise those parts
# of the Bloom filter code, use a series of regions, each with a
# different key size and send packet that should hit all of them.
local index
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..5bace0b7fb57
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = devlink.sh \
+ devlink_in_netns.sh \
+ devlink_trap.sh \
+ ethtool-coalesce.sh \
+ ethtool-fec.sh \
+ ethtool-pause.sh \
+ ethtool-ring.sh \
+ fib.sh \
+ hw_stats_l3.sh \
+ nexthop.sh \
+ peer.sh \
+ psample.sh \
+ tc-mq-visibility.sh \
+ udp_tunnel_nic.sh \
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 46e20b13473c..b5ea2526f23c 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -31,7 +31,7 @@ devlink_wait()
fw_flash_test()
{
- DUMMYFILE=$(find /lib/firmware -maxdepth 1 -type f -printf '%f\n' |head -1)
+ DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1)
RET=0
if [ -z "$DUMMYFILE" ]
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
index 7d7829f57550..6c52ce1b0450 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -49,7 +49,7 @@ for o in llrs rs; do
Active FEC encoding: ${o^^}"
done
-# Test mutliple bits
+# Test multiple bits
$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
check $?
s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..aed62d9e6c0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ../../../net/net_helper.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netdevsim should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netnsid should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with self should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "invalid arg should fail"
+ cleanup_ns
+ exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+ echo "expected 3 bytes, got $count"
+ res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index f98435c502f6..384cfa3d38a6 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -270,7 +270,7 @@ for port in 0 1; do
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
msg="new NIC device created"
exp0=( 0 0 0 0 )
@@ -284,8 +284,8 @@ for port in 0 1; do
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
- ifconfig vxlan1 down
- ifconfig vxlan0 down
+ ip link set dev vxlan1 down
+ ip link set dev vxlan0 down
check_tables
msg="VxLAN v6 devices"
@@ -293,7 +293,7 @@ for port in 0 1; do
new_vxlan vxlanA 4789 $NSIM_NETDEV 6
for ifc in vxlan0 vxlan1; do
- ifconfig $ifc up
+ ip link set dev $ifc up
done
new_vxlan vxlanB 4789 $NSIM_NETDEV 6
@@ -307,14 +307,14 @@ for port in 0 1; do
new_geneve gnv0 6081
msg="NIC device goes down"
- ifconfig $NSIM_NETDEV down
+ ip link set dev $NSIM_NETDEV down
if [ $port -eq 1 ]; then
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
fi
check_tables
msg="NIC device goes up again"
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
exp1=( `mke 6081 2` 0 0 0 )
check_tables
@@ -433,7 +433,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "overflow NIC table"
overflow_table1 "overflow NIC table"
@@ -491,7 +491,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "overflow NIC table"
overflow_table1 "overflow NIC table"
@@ -548,7 +548,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "destroy NIC"
overflow_table1 "destroy NIC"
@@ -578,7 +578,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
@@ -639,7 +639,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
@@ -695,7 +695,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
exp0=( `mke 10000 1` 0 0 0 )
@@ -755,7 +755,7 @@ for port in 0 1; do
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
exp0=( `mke 10000 1` 0 0 0 )
@@ -768,7 +768,7 @@ for port in 0 1; do
check_tables
msg="NIC device goes down"
- ifconfig $NSIM_NETDEV down
+ ip link set dev $NSIM_NETDEV down
if [ $port -eq 1 ]; then
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
@@ -779,7 +779,7 @@ for port in 0 1; do
check_tables
msg="NIC device goes up again"
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
exp0=( `mke 10000 1` 0 0 0 )
check_tables
@@ -827,12 +827,12 @@ new_vxlan vxlan1 4789 $NSIM_NETDEV2
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
-ifconfig vxlan1 down
-ifconfig vxlan0 down
+ip link set dev vxlan1 down
+ip link set dev vxlan0 down
check_tables
for ifc in vxlan0 vxlan1; do
- ifconfig $ifc up
+ ip link set dev $ifc up
done
msg="VxLAN v6 device"
@@ -844,11 +844,11 @@ exp1=( `mke 6081 2` 0 0 0 )
new_geneve gnv0 6081
msg="NIC device goes down"
-ifconfig $NSIM_NETDEV down
+ip link set dev $NSIM_NETDEV down
check_tables
msg="NIC device goes up again"
-ifconfig $NSIM_NETDEV up
+ip link set dev $NSIM_NETDEV up
check_tables
for i in `seq 2`; do
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 6a86e61e8bfe..2d5a76d99181 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -3,8 +3,9 @@
TEST_PROGS := dev_addr_lists.sh
-TEST_FILES := \
- lag_lib.sh \
- net_forwarding_lib.sh
+TEST_INCLUDES := \
+ ../bonding/lag_lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
index 33913112d5ca..b1ec7755b783 100755
--- a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
+++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
@@ -11,9 +11,9 @@ ALL_TESTS="
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
-source "$lib_dir"/lag_lib.sh
+source "$lib_dir"/../bonding/lag_lib.sh
destroy()
diff --git a/tools/testing/selftests/drivers/net/team/lag_lib.sh b/tools/testing/selftests/drivers/net/team/lag_lib.sh
deleted file mode 120000
index e1347a10afde..000000000000
--- a/tools/testing/selftests/drivers/net/team/lag_lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../bonding/lag_lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh
deleted file mode 120000
index 39c96828c5ef..000000000000
--- a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index a781e6311810..541bf192e30e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -25,6 +25,7 @@
* ksft_test_result_skip(fmt, ...);
* ksft_test_result_xfail(fmt, ...);
* ksft_test_result_error(fmt, ...);
+ * ksft_test_result_code(exit_code, test_name, fmt, ...);
*
* When all tests are finished, clean up and exit the program with one of:
*
@@ -254,6 +255,50 @@ static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...)
va_end(args);
}
+static inline __printf(3, 4)
+void ksft_test_result_code(int exit_code, const char *test_name,
+ const char *msg, ...)
+{
+ const char *tap_code = "ok";
+ const char *directive = "";
+ int saved_errno = errno;
+ va_list args;
+
+ switch (exit_code) {
+ case KSFT_PASS:
+ ksft_cnt.ksft_pass++;
+ break;
+ case KSFT_XFAIL:
+ directive = " # XFAIL ";
+ ksft_cnt.ksft_xfail++;
+ break;
+ case KSFT_XPASS:
+ directive = " # XPASS ";
+ ksft_cnt.ksft_xpass++;
+ break;
+ case KSFT_SKIP:
+ directive = " # SKIP ";
+ ksft_cnt.ksft_xskip++;
+ break;
+ case KSFT_FAIL:
+ default:
+ tap_code = "not ok";
+ ksft_cnt.ksft_fail++;
+ break;
+ }
+
+ /* Docs seem to call for double space if directive is absent */
+ if (!directive[0] && msg[0])
+ directive = " # ";
+
+ va_start(args, msg);
+ printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
+ errno = saved_errno;
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+}
+
static inline int ksft_exit_pass(void)
{
ksft_print_cnts();
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index e05ac8261046..4fd735e48ee7 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -56,6 +56,7 @@
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -95,14 +96,6 @@
* E.g., #define TH_LOG_ENABLED 1
*
* If no definition is provided, logging is enabled by default.
- *
- * If there is no way to print an error message for the process running the
- * test (e.g. not allowed to write to stderr), it is still possible to get the
- * ASSERT_* number for which the test failed. This behavior can be enabled by
- * writing `_metadata->no_print = true;` before the check sequence that is
- * unable to print. When an error occur, instead of printing an error message
- * and calling `abort(3)`, the test process call `_exit(2)` with the assert
- * number as argument, which is then printed by the parent process.
*/
#define TH_LOG(fmt, ...) do { \
if (TH_LOG_ENABLED) \
@@ -135,8 +128,7 @@
fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
_metadata->results->reason); \
} \
- _metadata->passed = 1; \
- _metadata->skip = 1; \
+ _metadata->exit_code = KSFT_SKIP; \
_metadata->trigger = 0; \
statement; \
} while (0)
@@ -363,6 +355,11 @@
* Defines a test that depends on a fixture (e.g., is part of a test case).
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
+ *
+ * The @test_name code is run in a separate process sharing the same memory
+ * (i.e. vfork), which means that the test process can update its privileges
+ * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from
+ * a directory where write access was dropped).
*/
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@@ -384,17 +381,34 @@
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
+ pid_t child = 1; \
+ int status = 0; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
if (setjmp(_metadata->env) == 0) { \
- fixture_name##_setup(_metadata, &self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (!_metadata->passed || _metadata->skip) \
- return; \
- _metadata->setup_completed = true; \
- fixture_name##_##test_name(_metadata, &self, variant->data); \
+ /* Use the same _metadata. */ \
+ child = vfork(); \
+ if (child == 0) { \
+ fixture_name##_setup(_metadata, &self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ _metadata->setup_completed = true; \
+ fixture_name##_##test_name(_metadata, &self, variant->data); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
+ } \
+ } \
+ if (child == 0) { \
+ if (_metadata->setup_completed && !_metadata->teardown_parent) \
+ fixture_name##_teardown(_metadata, &self, variant->data); \
+ _exit(0); \
} \
- if (_metadata->setup_completed) \
+ if (_metadata->setup_completed && _metadata->teardown_parent) \
fixture_name##_teardown(_metadata, &self, variant->data); \
+ if (!WIFEXITED(status) && WIFSIGNALED(status)) \
+ /* Forward signal to __wait_for_test(). */ \
+ kill(getpid(), WTERMSIG(status)); \
__test_check_assert(_metadata); \
} \
static struct __test_metadata \
@@ -404,6 +418,7 @@
.fixture = &_##fixture_name##_fixture_object, \
.termsig = signal, \
.timeout = tmout, \
+ .teardown_parent = false, \
}; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##test_name(void) \
@@ -694,18 +709,12 @@
for (; _metadata->trigger; _metadata->trigger = \
__bail(_assert, _metadata))
-#define __INC_STEP(_metadata) \
- /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \
- if (_metadata->passed && _metadata->step < 253) \
- _metadata->step++;
-
#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
__typeof__(_expected) __exp = (_expected); \
__typeof__(_seen) __seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(__exp _t __seen)) { \
/* Report with actual signedness to avoid weird output. */ \
switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
@@ -742,7 +751,7 @@
break; \
} \
} \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
/* Ensure the optional handler is triggered */ \
_metadata->trigger = 1; \
} \
@@ -751,10 +760,9 @@
#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
const char *__exp = (_expected); \
const char *__seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(strcmp(__exp, __seen) _t 0)) { \
__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
_metadata->trigger = 1; \
} \
} while (0); OPTIONAL_HANDLER(_assert)
@@ -800,6 +808,37 @@ struct __fixture_metadata {
.prev = &_fixture_global,
};
+struct __test_xfail {
+ struct __fixture_metadata *fixture;
+ struct __fixture_variant_metadata *variant;
+ struct __test_metadata *test;
+ struct __test_xfail *prev, *next;
+};
+
+/**
+ * XFAIL_ADD() - mark variant + test case combination as expected to fail
+ * @fixture_name: name of the fixture
+ * @variant_name: name of the variant
+ * @test_name: name of the test case
+ *
+ * Mark a combination of variant + test case for a given fixture as expected
+ * to fail. Tests marked this way will report XPASS / XFAIL return codes,
+ * instead of PASS / FAIL,and use respective counters.
+ */
+#define XFAIL_ADD(fixture_name, variant_name, test_name) \
+ static struct __test_xfail \
+ _##fixture_name##_##variant_name##_##test_name##_xfail = \
+ { \
+ .fixture = &_##fixture_name##_fixture_object, \
+ .variant = &_##fixture_name##_##variant_name##_object, \
+ .test = &_##fixture_name##_##test_name##_object, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
+ { \
+ __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \
+ }
+
static struct __fixture_metadata *__fixture_list = &_fixture_global;
static int __constructor_order;
@@ -814,6 +853,7 @@ static inline void __register_fixture(struct __fixture_metadata *f)
struct __fixture_variant_metadata {
const char *name;
const void *data;
+ struct __test_xfail *xfails;
struct __fixture_variant_metadata *prev, *next;
};
@@ -832,20 +872,24 @@ struct __test_metadata {
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
int termsig;
- int passed;
- int skip; /* did SKIP get used? */
+ int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
bool timed_out; /* did this test timeout instead of exiting? */
- __u8 step;
- bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
bool aborted; /* stopped test due to failed ASSERT */
bool setup_completed; /* did setup finish? */
+ bool teardown_parent; /* run teardown in a parent process */
jmp_buf env; /* for exiting out of test early */
struct __test_results *results;
struct __test_metadata *prev, *next;
};
+static inline bool __test_passed(struct __test_metadata *metadata)
+{
+ return metadata->exit_code != KSFT_FAIL &&
+ metadata->exit_code <= KSFT_SKIP;
+}
+
/*
* Since constructors are called in reverse order, reverse the test
* list so tests are run in source declaration order.
@@ -860,6 +904,11 @@ static inline void __register_test(struct __test_metadata *t)
__LIST_APPEND(t->fixture->tests, t);
}
+static inline void __register_xfail(struct __test_xfail *xf)
+{
+ __LIST_APPEND(xf->variant->xfails, xf);
+}
+
static inline int __bail(int for_realz, struct __test_metadata *t)
{
/* if this is ASSERT, return immediately. */
@@ -873,11 +922,8 @@ static inline int __bail(int for_realz, struct __test_metadata *t)
static inline void __test_check_assert(struct __test_metadata *t)
{
- if (t->aborted) {
- if (t->no_print)
- _exit(t->step);
+ if (t->aborted)
abort();
- }
}
struct __test_metadata *__active_test;
@@ -913,7 +959,7 @@ void __wait_for_test(struct __test_metadata *t)
int status;
if (sigaction(SIGALRM, &action, &saved_action)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: unable to install SIGALRM handler\n",
t->name);
@@ -925,7 +971,7 @@ void __wait_for_test(struct __test_metadata *t)
waitpid(t->pid, &status, 0);
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: unable to uninstall SIGALRM handler\n",
t->name);
@@ -934,16 +980,16 @@ void __wait_for_test(struct __test_metadata *t)
__active_test = NULL;
if (t->timed_out) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- if (WEXITSTATUS(status) == 255) {
- /* SKIP */
- t->passed = 1;
- t->skip = 1;
+ if (WEXITSTATUS(status) == KSFT_SKIP ||
+ WEXITSTATUS(status) == KSFT_XPASS ||
+ WEXITSTATUS(status) == KSFT_XFAIL) {
+ t->exit_code = WEXITSTATUS(status);
} else if (t->termsig != -1) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test exited normally instead of by signal (code: %d)\n",
t->name,
@@ -951,26 +997,25 @@ void __wait_for_test(struct __test_metadata *t)
} else {
switch (WEXITSTATUS(status)) {
/* Success */
- case 0:
- t->passed = 1;
+ case KSFT_PASS:
+ t->exit_code = KSFT_PASS;
break;
- /* Other failure, assume step report. */
+ /* Failure */
default:
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: Test failed at step #%d\n",
- t->name,
- WEXITSTATUS(status));
+ "# %s: Test failed\n",
+ t->name);
}
}
} else if (WIFSIGNALED(status)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
if (WTERMSIG(status) == SIGABRT) {
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by assertion\n",
t->name);
} else if (WTERMSIG(status) == t->termsig) {
- t->passed = 1;
+ t->exit_code = KSFT_PASS;
} else {
fprintf(TH_LOG_STREAM,
"# %s: Test terminated unexpectedly by signal %d\n",
@@ -1110,16 +1155,19 @@ void __run_test(struct __fixture_metadata *f,
struct __fixture_variant_metadata *variant,
struct __test_metadata *t)
{
+ struct __test_xfail *xfail;
+ char test_name[LINE_MAX];
+ const char *diagnostic;
+
/* reset test struct */
- t->passed = 1;
- t->skip = 0;
+ t->exit_code = KSFT_PASS;
t->trigger = 0;
- t->step = 1;
- t->no_print = 0;
memset(t->results->reason, 0, sizeof(t->results->reason));
- ksft_print_msg(" RUN %s%s%s.%s ...\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ ksft_print_msg(" RUN %s ...\n", test_name);
/* Make sure output buffers are flushed before fork */
fflush(stdout);
@@ -1128,29 +1176,33 @@ void __run_test(struct __fixture_metadata *f,
t->pid = fork();
if (t->pid < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
} else if (t->pid == 0) {
setpgrp();
t->fn(t, variant);
- if (t->skip)
- _exit(255);
- /* Pass is exit 0 */
- if (t->passed)
- _exit(0);
- /* Something else happened, report the step. */
- _exit(t->step);
+ _exit(t->exit_code);
} else {
__wait_for_test(t);
}
- ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ ksft_print_msg(" %4s %s\n",
+ __test_passed(t) ? "OK" : "FAIL", test_name);
- if (t->skip)
- ksft_test_result_skip("%s\n", t->results->reason[0] ?
- t->results->reason : "unknown");
+ /* Check if we're expecting this test to fail */
+ for (xfail = variant->xfails; xfail; xfail = xfail->next)
+ if (xfail->test == t)
+ break;
+ if (xfail)
+ t->exit_code = __test_passed(t) ? KSFT_XPASS : KSFT_XFAIL;
+
+ if (t->results->reason[0])
+ diagnostic = t->results->reason;
+ else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL)
+ diagnostic = NULL;
else
- ksft_test_result(t->passed, "%s%s%s.%s\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ diagnostic = "unknown";
+
+ ksft_test_result_code(t->exit_code, test_name,
+ diagnostic ? "%s" : "", diagnostic);
}
static int test_harness_run(int argc, char **argv)
@@ -1198,7 +1250,7 @@ static int test_harness_run(int argc, char **argv)
t->results = results;
__run_test(f, v, t);
t->results = NULL;
- if (t->passed)
+ if (__test_passed(t))
pass_count++;
else
ret = 1;
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 646f778dfb1e..a6f89aaea77d 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -307,7 +307,7 @@ TEST(ruleset_fd_transfer)
dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
ASSERT_LE(0, dir_fd);
ASSERT_EQ(0, close(dir_fd));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index e64bbdf0e86e..401e2eb092a3 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -23,62 +23,8 @@
#define __maybe_unused __attribute__((__unused__))
#endif
-/*
- * TEST_F_FORK() is useful when a test drop privileges but the corresponding
- * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory
- * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is
- * also called when the test failed, but not when FIXTURE_SETUP() failed. For
- * this to be possible, we must not call abort() but instead exit smoothly
- * (hence the step print).
- */
-/* clang-format off */
-#define TEST_F_FORK(fixture_name, test_name) \
- static void fixture_name##_##test_name##_child( \
- struct __test_metadata *_metadata, \
- FIXTURE_DATA(fixture_name) *self, \
- const FIXTURE_VARIANT(fixture_name) *variant); \
- TEST_F(fixture_name, test_name) \
- { \
- int status; \
- const pid_t child = fork(); \
- if (child < 0) \
- abort(); \
- if (child == 0) { \
- _metadata->no_print = 1; \
- fixture_name##_##test_name##_child(_metadata, self, variant); \
- if (_metadata->skip) \
- _exit(255); \
- if (_metadata->passed) \
- _exit(0); \
- _exit(_metadata->step); \
- } \
- if (child != waitpid(child, &status, 0)) \
- abort(); \
- if (WIFSIGNALED(status) || !WIFEXITED(status)) { \
- _metadata->passed = 0; \
- _metadata->step = 1; \
- return; \
- } \
- switch (WEXITSTATUS(status)) { \
- case 0: \
- _metadata->passed = 1; \
- break; \
- case 255: \
- _metadata->passed = 1; \
- _metadata->skip = 1; \
- break; \
- default: \
- _metadata->passed = 0; \
- _metadata->step = WEXITSTATUS(status); \
- break; \
- } \
- } \
- static void fixture_name##_##test_name##_child( \
- struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
- const FIXTURE_VARIANT(fixture_name) \
- __attribute__((unused)) *variant)
-/* clang-format on */
+/* TEST_F_FORK() should not be used for new tests. */
+#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
#ifndef landlock_create_ruleset
static inline int
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 2d6d9b43d958..9a6036fbf289 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -285,6 +285,8 @@ static void prepare_layout_opt(struct __test_metadata *const _metadata,
static void prepare_layout(struct __test_metadata *const _metadata)
{
+ _metadata->teardown_parent = true;
+
prepare_layout_opt(_metadata, &mnt_tmp);
}
@@ -1964,7 +1966,7 @@ static void test_execute(struct __test_metadata *const _metadata, const int err,
strerror(errno));
};
ASSERT_EQ(err, errno);
- _exit(_metadata->passed ? 2 : 1);
+ _exit(__test_passed(_metadata) ? 2 : 1);
return;
}
ASSERT_EQ(child, waitpid(child, &status, 0));
@@ -3807,7 +3809,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes)
ASSERT_EQ(0, close(socket_fds[0]));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
@@ -3861,9 +3863,7 @@ FIXTURE_SETUP(layout1_bind)
FIXTURE_TEARDOWN(layout1_bind)
{
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(dir_s2d2));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+ /* umount(dir_s2d2)) is handled by namespace lifetime. */
remove_layout1(_metadata);
@@ -4276,9 +4276,8 @@ FIXTURE_TEARDOWN(layout2_overlay)
EXPECT_EQ(0, remove_path(lower_fl1));
EXPECT_EQ(0, remove_path(lower_do1_fo2));
EXPECT_EQ(0, remove_path(lower_fo1));
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(LOWER_BASE));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* umount(LOWER_BASE)) is handled by namespace lifetime. */
EXPECT_EQ(0, remove_path(LOWER_BASE));
EXPECT_EQ(0, remove_path(upper_do1_fu3));
@@ -4287,14 +4286,11 @@ FIXTURE_TEARDOWN(layout2_overlay)
EXPECT_EQ(0, remove_path(upper_do1_fo2));
EXPECT_EQ(0, remove_path(upper_fo1));
EXPECT_EQ(0, remove_path(UPPER_WORK "/work"));
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(UPPER_BASE));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* umount(UPPER_BASE)) is handled by namespace lifetime. */
EXPECT_EQ(0, remove_path(UPPER_BASE));
- set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(MERGE_DATA));
- clear_cap(_metadata, CAP_SYS_ADMIN);
+ /* umount(MERGE_DATA)) is handled by namespace lifetime. */
EXPECT_EQ(0, remove_path(MERGE_DATA));
cleanup_layout(_metadata);
@@ -4691,6 +4687,8 @@ FIXTURE_SETUP(layout3_fs)
SKIP(return, "this filesystem is not supported (setup)");
}
+ _metadata->teardown_parent = true;
+
slash = strrchr(variant->file_path, '/');
ASSERT_NE(slash, NULL);
dir_len = (size_t)slash - (size_t)variant->file_path;
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 936cfc879f1d..f21cfbbc3638 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -539,7 +539,7 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata,
}
EXPECT_EQ(0, close(connect_fd));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
@@ -834,7 +834,7 @@ TEST_F(protocol, connect_unspec)
}
EXPECT_EQ(0, close(connect_fd));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 55e7871631a1..a19db4d0b3bd 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -314,7 +314,7 @@ TEST_F(hierarchy, trace)
ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
if (variant->domain_both) {
create_domain(_metadata);
- if (!_metadata->passed)
+ if (!__test_passed(_metadata))
/* Aborts before forking. */
return;
}
@@ -375,7 +375,7 @@ TEST_F(hierarchy, trace)
/* Waits for the parent PTRACE_ATTACH test. */
ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
- _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ _exit(_metadata->exit_code);
return;
}
@@ -430,9 +430,10 @@ TEST_F(hierarchy, trace)
/* Signals that the parent PTRACE_ATTACH test is done. */
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
ASSERT_EQ(child, waitpid(child, &status, 0));
+
if (WIFSIGNALED(status) || !WIFEXITED(status) ||
WEXITSTATUS(status) != EXIT_SUCCESS)
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 286ce0ee102b..da2cade3bab0 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -70,11 +70,29 @@ define RUN_TESTS
run_many $(1)
endef
+define INSTALL_INCLUDES
+ $(if $(TEST_INCLUDES), \
+ relative_files=""; \
+ for entry in $(TEST_INCLUDES); do \
+ entry_dir=$$(readlink -e "$$(dirname "$$entry")"); \
+ entry_name=$$(basename "$$entry"); \
+ relative_dir=$${entry_dir#"$$SRC_PATH"/}; \
+ if [ "$$relative_dir" = "$$entry_dir" ]; then \
+ echo "Error: TEST_INCLUDES entry \"$$entry\" not located inside selftests directory ($$SRC_PATH)" >&2; \
+ exit 1; \
+ fi; \
+ relative_files="$$relative_files $$relative_dir/$$entry_name"; \
+ done; \
+ cd $(SRC_PATH) && rsync -aR $$relative_files $(OBJ_PATH)/ \
+ )
+endef
+
run_tests: all
ifdef building_out_of_srctree
@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)$(TEST_GEN_MODS_DIR)" != "X" ]; then \
rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_MODS_DIR) $(OUTPUT); \
fi
+ @$(INSTALL_INCLUDES)
@if [ "X$(TEST_PROGS)" != "X" ]; then \
$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
$(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \
@@ -116,6 +134,7 @@ endef
install: all
ifdef INSTALL_PATH
$(INSTALL_RULE)
+ $(INSTALL_INCLUDES)
else
$(error Error: set INSTALL_PATH to use install)
endif
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 20294553a5dd..d2cfc9b494a0 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -138,7 +138,7 @@ FIXTURE_SETUP(hmm)
self->fd = hmm_open(variant->device_number);
if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
- SKIP(exit(0), "DEVICE_COHERENT not available");
+ SKIP(return, "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
}
@@ -149,7 +149,7 @@ FIXTURE_SETUP(hmm2)
self->fd0 = hmm_open(variant->device_number0);
if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
- SKIP(exit(0), "DEVICE_COHERENT not available");
+ SKIP(return, "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 211753756bde..7b6918d5f4af 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -97,6 +97,8 @@ TEST_PROGS += vlan_hw_filter.sh
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_INCLUDES := forwarding/lib.sh
+
include ../lib.mk
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 0d4f252427e2..386ebd829df5 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -38,6 +38,9 @@
# server / client nomenclature relative to ns-A
source lib.sh
+
+PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
+
VERBOSE=0
NSA_DEV=eth1
@@ -97,6 +100,7 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local ans
[ "${VERBOSE}" = "1" ] && echo
@@ -106,19 +110,20 @@ log_test()
else
nfail=$((nfail+1))
printf "TEST: %-70s [FAIL]\n" "${msg}"
+ echo " expected rc $expected; actual rc $rc"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
+ read ans
+ [ "$ans" = "q" ] && exit 1
fi
fi
if [ "${PAUSE}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
+ read ans
+ [ "$ans" = "q" ] && exit 1
fi
kill_procs
@@ -187,6 +192,15 @@ kill_procs()
sleep 1
}
+set_ping_group()
+{
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'"
+ fi
+
+ ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'
+}
+
do_run_cmd()
{
local cmd="$*"
@@ -835,14 +849,14 @@ ipv4_ping()
set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
ipv4_ping_novrf
setup
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv4_ping_vrf
setup "yes"
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_ping_vrf
}
@@ -2053,12 +2067,12 @@ ipv4_addr_bind()
log_subsection "No VRF"
setup
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_addr_bind_novrf
log_subsection "With VRF"
setup "yes"
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv4_addr_bind_vrf
}
@@ -2521,14 +2535,14 @@ ipv6_ping()
setup
ipv6_ping_novrf
setup
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv6_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv6_ping_vrf
setup "yes"
- set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ set_ping_group
ipv6_ping_vrf
}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d5a281aadbac..ac0b2c6a5761 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -2066,6 +2066,12 @@ basic()
run_cmd "$IP nexthop get id 1"
log_test $? 2 "Nexthop get on non-existent id"
+ run_cmd "$IP nexthop del id 1"
+ log_test $? 2 "Nexthop del with non-existent id"
+
+ run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8"
+ log_test $? 2 "Nexthop del with non-existent id and extra attributes"
+
# attempt to create nh without a device or gw - fails
run_cmd "$IP nexthop add id 1"
log_test $? 2 "Nexthop with no device or gateway"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index b3ecccbbfcd2..73895711cdf4 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -743,6 +743,43 @@ fib_notify_test()
cleanup &> /dev/null
}
+# Create a new dummy_10 to remove all associated routes.
+reset_dummy_10()
+{
+ $IP link del dev dummy_10
+
+ $IP link add dummy_10 type dummy
+ $IP link set dev dummy_10 up
+ $IP -6 address add 2001:10::1/64 dev dummy_10
+}
+
+check_rt_num()
+{
+ local expected=$1
+ local num=$2
+
+ if [ $num -ne $expected ]; then
+ echo "FAIL: Expected $expected routes, got $num"
+ ret=1
+ else
+ ret=0
+ fi
+}
+
+check_rt_num_clean()
+{
+ local expected=$1
+ local num=$2
+
+ if [ $num -ne $expected ]; then
+ log_test 1 0 "expected $expected routes, got $num"
+ set +e
+ cleanup &> /dev/null
+ return 1
+ fi
+ return 0
+}
+
fib6_gc_test()
{
setup
@@ -751,7 +788,8 @@ fib6_gc_test()
echo "Fib6 garbage collection test"
set -e
- EXPIRE=3
+ EXPIRE=5
+ GC_WAIT_TIME=$((EXPIRE * 2 + 2))
# Check expiration of routes every $EXPIRE seconds (GC)
$NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE
@@ -763,44 +801,110 @@ fib6_gc_test()
$NS_EXEC sysctl -wq net.ipv6.route.flush=1
# Temporary routes
- for i in $(seq 1 1000); do
+ for i in $(seq 1 5); do
# Expire route after $EXPIRE seconds
$IP -6 route add 2001:20::$i \
via 2001:10::2 dev dummy_10 expires $EXPIRE
done
- sleep $(($EXPIRE * 2))
- N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l)
- if [ $N_EXP_SLEEP -ne 0 ]; then
- echo "FAIL: expected 0 routes with expires, got $N_EXP_SLEEP"
- ret=1
- else
- ret=0
- fi
+ sleep $GC_WAIT_TIME
+ $NS_EXEC sysctl -wq net.ipv6.route.flush=1
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection"
+
+ reset_dummy_10
# Permanent routes
- for i in $(seq 1 5000); do
+ for i in $(seq 1 5); do
$IP -6 route add 2001:30::$i \
via 2001:10::2 dev dummy_10
done
# Temporary routes
- for i in $(seq 1 1000); do
+ for i in $(seq 1 5); do
# Expire route after $EXPIRE seconds
$IP -6 route add 2001:20::$i \
via 2001:10::2 dev dummy_10 expires $EXPIRE
done
- sleep $(($EXPIRE * 2))
- N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l)
- if [ $N_EXP_SLEEP -ne 0 ]; then
- echo "FAIL: expected 0 routes with expires," \
- "got $N_EXP_SLEEP (5000 permanent routes)"
- ret=1
- else
- ret=0
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (with permanent routes)"
+
+ reset_dummy_10
+
+ # Permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ # Replace with temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route replace 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (replace with expires)"
+
+ reset_dummy_10
+
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Replace with permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route replace 2001:20::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return
+
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
+
+ # ra6 is required for the next test. (ipv6toolkit)
+ if [ ! -x "$(command -v ra6)" ]; then
+ echo "SKIP: ra6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
fi
- set +e
+ # Delete dummy_10 and remove all routes
+ $IP link del dev dummy_10
- log_test $ret 0 "ipv6 route garbage collection"
+ # Create a pair of veth devices to send a RA message from one
+ # device to another.
+ $IP link add veth1 type veth peer name veth2
+ $IP link set dev veth1 up
+ $IP link set dev veth2 up
+ $IP -6 address add 2001:10::1/64 dev veth1 nodad
+ $IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+ # Make veth1 ready to receive RA messages.
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+ # Send a RA message with a route from veth2 to veth1.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE
+
+ # Wait for the RA message.
+ sleep 1
+
+ # systemd may mess up the test. You syould make sure that
+ # systemd-networkd.service and systemd-networkd.socket are stopped.
+ check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return
+
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (RA message)"
+
+ set +e
cleanup &> /dev/null
}
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 4de92632f483..535865b3d1d6 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -123,10 +123,14 @@ TEST_FILES := devlink_lib.sh \
mirror_gre_topo_lib.sh \
mirror_lib.sh \
mirror_topo_lib.sh \
+ router_mpath_nh_lib.sh \
sch_ets_core.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
sch_tbf_etsprio.sh \
tc_common.sh
+TEST_INCLUDES := \
+ ../lib.sh
+
include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 697994a9278b..8d7a1a004b7c 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,14 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_VRF=m
CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_MACVLAN=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_VLAN=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_ACT_GACT=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
CONFIG_VETH=m
CONFIG_NAMESPACES=y
CONFIG_NET_NS=y
+CONFIG_VXLAN=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 56eb83d1a3bd..1783c10215e5 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -183,42 +183,42 @@ send_src_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_src_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
send_src_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_flowlabel()
@@ -234,14 +234,14 @@ send_src_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:4::2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:4::2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
custom_hash_test()
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index 4a546509de90..1fc4f0242fc5 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -28,6 +28,8 @@ PING=ping
PING6=ping6
# Packet generator. Some distributions use 'mz'.
MZ=mausezahn
+# mausezahn delay between transmissions in microseconds.
+MZ_DELAY=0
# Time to wait after interfaces participating in the test are all UP
WAIT_TIME=5
# Whether to pause on failure or not.
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index 0446db9c6f74..9788bd0f6e8b 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -278,42 +278,42 @@ send_src_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_src_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
send_src_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_flowlabel()
@@ -329,14 +329,14 @@ send_src_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
custom_hash_test()
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
index e4009f658003..efca6114a3ce 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
@@ -267,7 +267,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
index e449475c4d3e..a71ad39fc0c3 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
@@ -266,9 +266,9 @@ multipath6_test()
local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
ip vrf exec v$h1 \
- $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
- -B "2001:db8:2::2-2001:db8:2::1e" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+ -B "2001:db8:2::2-2001:db8:2::3e" \
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
index a8d8e8b3dc81..57531c1d884d 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -220,7 +220,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
index d03aa2cab9fd..7d5b2b9cc133 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -64,7 +64,6 @@ ALL_TESTS="
ping_ipv6
multipath_ipv4
multipath_ipv6
- multipath_ipv6_l4
"
NUM_NETIFS=6
@@ -245,7 +244,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -264,34 +263,6 @@ multipath6_test()
local weight1=$1; shift
local weight2=$1; shift
- sysctl_set net.ipv6.fib_multipath_hash_policy 0
- ip nexthop replace id 103 group 101,$weight1/102,$weight2
-
- local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- # Generate 16384 echo requests, each with a random flow label.
- for ((i=0; i < 16384; ++i)); do
- ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- local d111=$((t1_111 - t0_111))
- local d222=$((t1_222 - t0_222))
- multipath_eval "$what" $weight1 $weight2 $d111 $d222
-
- ip nexthop replace id 103 group 101/102
- sysctl_restore net.ipv6.fib_multipath_hash_policy
-}
-
-multipath6_l4_test()
-{
- local what=$1; shift
- local weight1=$1; shift
- local weight2=$1; shift
-
sysctl_set net.ipv6.fib_multipath_hash_policy 1
ip nexthop replace id 103 group 101,$weight1/102,$weight2
@@ -300,7 +271,7 @@ multipath6_l4_test()
ip vrf exec v$h1 \
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -339,14 +310,6 @@ multipath_ipv6()
multipath6_test "Weighted MP 11:45" 11 45
}
-multipath_ipv6_l4()
-{
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
-}
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
index 088b65e64d66..370f9925302d 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -64,7 +64,6 @@ ALL_TESTS="
ping_ipv6
multipath_ipv4
multipath_ipv6
- multipath_ipv6_l4
"
NUM_NETIFS=6
@@ -248,7 +247,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -267,35 +266,6 @@ multipath6_test()
local weight1=$1; shift
local weight2=$1; shift
- sysctl_set net.ipv6.fib_multipath_hash_policy 0
- ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
- type resilient
-
- local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- # Generate 16384 echo requests, each with a random flow label.
- for ((i=0; i < 16384; ++i)); do
- ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
- local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
-
- local d111=$((t1_111 - t0_111))
- local d222=$((t1_222 - t0_222))
- multipath_eval "$what" $weight1 $weight2 $d111 $d222
-
- ip nexthop replace id 103 group 101/102 type resilient
- sysctl_restore net.ipv6.fib_multipath_hash_policy
-}
-
-multipath6_l4_test()
-{
- local what=$1; shift
- local weight1=$1; shift
- local weight2=$1; shift
-
sysctl_set net.ipv6.fib_multipath_hash_policy 1
ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
type resilient
@@ -305,7 +275,7 @@ multipath6_l4_test()
ip vrf exec v$h1 \
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
@@ -344,14 +314,6 @@ multipath_ipv6()
multipath6_test "Weighted MP 11:45" 11 45
}
-multipath_ipv6_l4()
-{
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
-}
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index d40183b4eccc..2ab9eaaa5532 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -280,42 +280,42 @@ send_src_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_src_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp4()
{
ip vrf exec v$h1 $MZ $h1 -q -p 64 \
-A 198.51.100.2 -B 203.0.113.2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
send_src_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_dst_ipv6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
- -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
}
send_flowlabel()
@@ -331,14 +331,14 @@ send_src_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=0-32768,dp=30000"
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
}
send_dst_udp6()
{
ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
-A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=20000,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
}
custom_hash_test()
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
index a257979d3fc5..32d1461f37b7 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
@@ -266,7 +266,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
index d208f5243ade..e1a4b50505f5 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
@@ -265,9 +265,9 @@ multipath6_test()
local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
ip vrf exec v$h1 \
- $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
- -B "2001:db8:2::2-2001:db8:2::1e" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+ -B "2001:db8:2::2-2001:db8:2::3e" \
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
index 58a3597037b1..24f4ab328bd2 100644
--- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
@@ -356,7 +356,7 @@ test_traffic_ip4ip6()
flower $TC_FLAG dst_ip 203.0.113.1 action pass
$MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \
- -B 203.0.113.1 -t ip -q -d 1msec
+ -B 203.0.113.1 -t ip -q -d $MZ_DELAY
# Check ports after encap and after decap.
tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
@@ -389,7 +389,7 @@ test_traffic_ip6ip6()
flower $TC_FLAG dst_ip 2001:db8:2::1 action pass
$MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \
- -B 2001:db8:2::1 -t ip -q -d 1msec
+ -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY
# Check ports after encap and after decap.
tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 8a61464ab6eb..e579c2e0c462 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -8,6 +8,7 @@
PING=${PING:=ping}
PING6=${PING6:=ping6}
MZ=${MZ:=mausezahn}
+MZ_DELAY=${MZ_DELAY:=0}
ARPING=${ARPING:=arping}
TEAMD=${TEAMD:=teamd}
WAIT_TIME=${WAIT_TIME:=5}
@@ -29,23 +30,20 @@ STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
TROUTE6=${TROUTE6:=traceroute6}
-relative_path="${BASH_SOURCE%/*}"
-if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
- relative_path="."
-fi
+net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-if [[ -f $relative_path/forwarding.config ]]; then
- source "$relative_path/forwarding.config"
+if [[ -f $net_forwarding_dir/forwarding.config ]]; then
+ source "$net_forwarding_dir/forwarding.config"
fi
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source "$net_forwarding_dir/../lib.sh"
-busywait()
+# timeout in seconds
+slowwait()
{
local timeout=$1; shift
- local start_time="$(date -u +%s%3N)"
+ local start_time="$(date -u +%s)"
while true
do
local out
@@ -56,11 +54,13 @@ busywait()
return 0
fi
- local current_time="$(date -u +%s%3N)"
+ local current_time="$(date -u +%s)"
if ((current_time - start_time > timeout)); then
echo -n "$out"
return 1
fi
+
+ sleep 0.1
done
}
@@ -505,6 +505,15 @@ busywait_for_counter()
busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
+slowwait_for_counter()
+{
+ local timeout=$1; shift
+ local delta=$1; shift
+
+ local base=$("$@")
+ slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
+}
+
setup_wait_dev()
{
local dev=$1; shift
@@ -891,6 +900,33 @@ hw_stats_get()
jq ".[0].stats64.$dir.$stat"
}
+__nh_stats_get()
+{
+ local key=$1; shift
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ ip -j -s -s nexthop show id $group_id |
+ jq --argjson member_id "$member_id" --arg key "$key" \
+ '.[].group_stats[] | select(.id == $member_id) | .[$key]'
+}
+
+nh_stats_get()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets "$group_id" "$member_id"
+}
+
+nh_stats_get_hw()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets_hw "$group_id" "$member_id"
+}
+
humanize()
{
local speed=$1; shift
@@ -2001,3 +2037,10 @@ bail_on_lldpad()
fi
fi
}
+
+absval()
+{
+ local v=$1; shift
+
+ echo $((v > 0 ? v : -v))
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index fac486178ef7..0c36546e131e 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-source "$relative_path/mirror_lib.sh"
+source "$net_forwarding_dir/mirror_lib.sh"
quick_test_span_gre_dir_ips()
{
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index 39c03e2867f4..6e615fffa4ef 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -33,7 +33,7 @@
# | |
# +-------------------------------------------------------------------------+
-source "$relative_path/mirror_topo_lib.sh"
+source "$net_forwarding_dir/mirror_topo_lib.sh"
mirror_gre_topo_h3_create()
{
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index a0d612e04990..3f0f5dc95542 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -7,9 +7,12 @@ ALL_TESTS="
multipath_test
ping_ipv4_blackhole
ping_ipv6_blackhole
+ nh_stats_test_v4
+ nh_stats_test_v6
"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -204,7 +207,7 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -218,7 +221,7 @@ multipath4_test()
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
-multipath6_l4_test()
+multipath6_test()
{
local desc="$1"
local weight_rp12=$2
@@ -237,7 +240,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -251,34 +254,6 @@ multipath6_l4_test()
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
-multipath6_test()
-{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
- local t0_rp12 t0_rp13 t1_rp12 t1_rp13
- local packets_rp12 packets_rp13
-
- ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
-
- t0_rp12=$(link_stats_tx_packets_get $rp12)
- t0_rp13=$(link_stats_tx_packets_get $rp13)
-
- # Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
-
- t1_rp12=$(link_stats_tx_packets_get $rp12)
- t1_rp13=$(link_stats_tx_packets_get $rp13)
-
- let "packets_rp12 = $t1_rp12 - $t0_rp12"
- let "packets_rp13 = $t1_rp13 - $t0_rp13"
- multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
- ip nexthop replace id 106 group 104/105
-}
-
multipath_test()
{
log_info "Running IPv4 multipath tests"
@@ -301,11 +276,6 @@ multipath_test()
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
-
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
}
ping_ipv4_blackhole()
@@ -358,6 +328,16 @@ ping_ipv6_blackhole()
ip -6 nexthop del id 1001
}
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 mpath
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 mpath
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
new file mode 100644
index 000000000000..7e7d62161c34
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+nh_stats_do_test()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local stats_get=$1; shift
+ local mz="$@"
+
+ local dp
+
+ RET=0
+
+ sleep 2
+ for ((dp=0; dp < 60000; dp += 10000)); do
+ local dd
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+ local t0_rp13=$(link_stats_tx_packets_get $rp13)
+ local t0_nh1=$($stats_get $group_id $nh1_id)
+ local t0_nh2=$($stats_get $group_id $nh2_id)
+
+ ip vrf exec vrf-h1 \
+ $mz -q -p 64 -d 0 -t udp \
+ "sp=1024,dp=$((dp))-$((dp + 10000))"
+ sleep 2
+
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+ local t1_rp13=$(link_stats_tx_packets_get $rp13)
+ local t1_nh1=$($stats_get $group_id $nh1_id)
+ local t1_nh2=$($stats_get $group_id $nh2_id)
+
+ local d_rp12=$((t1_rp12 - t0_rp12))
+ local d_rp13=$((t1_rp13 - t0_rp13))
+ local d_nh1=$((t1_nh1 - t0_nh1))
+ local d_nh2=$((t1_nh2 - t0_nh2))
+
+ dd=$(absval $((d_rp12 - d_nh1)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1"
+
+ dd=$(absval $((d_rp13 - d_nh2)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2"
+ done
+
+ log_test "NH stats test $what"
+}
+
+nh_stats_test_dispatch_swhw()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local used
+
+ nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get "${mz[@]}"
+
+ used=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.used')
+ kind=$(ip -j -d link show dev $rp11 |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $used == true ]]; then
+ nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get_hw "${mz[@]}"
+ elif [[ $kind == veth ]]; then
+ log_test_skip "HW stats not offloaded on veth topology"
+ fi
+}
+
+nh_stats_test_dispatch()
+{
+ local nhgtype=$1; shift
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local enabled
+ local kind
+
+ if ! ip nexthop help 2>&1 | grep -q hw_stats; then
+ log_test_skip "NH stats test: ip doesn't support HW stats"
+ return
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats on type $nhgtype
+ enabled=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.enabled')
+ if [[ $enabled == true ]]; then
+ nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \
+ "$group_id" "${mz[@]}"
+ elif [[ $enabled == false ]]; then
+ check_err 1 "HW stats still disabled after enabling"
+ log_test "NH stats test"
+ else
+ log_test_skip "NH stats test: ip doesn't report hw_stats info"
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats off type $nhgtype
+}
+
+__nh_stats_test_v4()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \
+ $MZ $h1 -A 192.0.2.2 -B 198.51.100.2
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+__nh_stats_test_v6()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \
+ $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index cb08ffe2356a..4b483d24ad00 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -5,9 +5,12 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ nh_stats_test_v4
+ nh_stats_test_v6
"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -205,7 +208,7 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -235,7 +238,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -333,6 +336,16 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient
}
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 resilient
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 resilient
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 464821c587a5..e2be354167a1 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -179,7 +179,7 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -195,7 +195,7 @@ multipath4_test()
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
-multipath6_l4_test()
+multipath6_test()
{
local desc="$1"
local weight_rp12=$2
@@ -216,7 +216,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -232,38 +232,6 @@ multipath6_l4_test()
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
-multipath6_test()
-{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
- local t0_rp12 t0_rp13 t1_rp12 t1_rp13
- local packets_rp12 packets_rp13
-
- ip route replace 2001:db8:2::/64 vrf vrf-r1 \
- nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
- nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
-
- t0_rp12=$(link_stats_tx_packets_get $rp12)
- t0_rp13=$(link_stats_tx_packets_get $rp13)
-
- # Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- t1_rp12=$(link_stats_tx_packets_get $rp12)
- t1_rp13=$(link_stats_tx_packets_get $rp13)
-
- let "packets_rp12 = $t1_rp12 - $t0_rp12"
- let "packets_rp13 = $t1_rp13 - $t0_rp13"
- multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
- ip route replace 2001:db8:2::/64 vrf vrf-r1 \
- nexthop via fe80:2::22 dev $rp12 \
- nexthop via fe80:3::23 dev $rp13
-}
-
multipath_test()
{
log_info "Running IPv4 multipath tests"
@@ -275,11 +243,6 @@ multipath_test()
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
-
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
}
setup_prepare()
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 0a51eef21b9e..5103f64a71d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -140,7 +140,7 @@ police_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
@@ -157,7 +157,7 @@ police_rx_test()
# Rule to police traffic destined to $h2 on ingress of $rp1
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/ok
+ action police rate 10mbit burst 16k conform-exceed drop/ok
police_common_test "police on rx"
@@ -169,7 +169,7 @@ police_tx_test()
# Rule to police traffic destined to $h2 on egress of $rp2
tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/ok
+ action police rate 10mbit burst 16k conform-exceed drop/ok
police_common_test "police on tx"
@@ -190,7 +190,7 @@ police_shared_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
@@ -211,7 +211,7 @@ police_shared_test()
# Rule to police traffic destined to $h2 on ingress of $rp1
tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/ok \
+ action police rate 10mbit burst 16k conform-exceed drop/ok \
index 10
# Rule to police a different flow destined to $h2 on egress of $rp2
@@ -250,7 +250,7 @@ police_mirror_common_test()
# Rule to police traffic destined to $h2 and mirror to $h3
tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
- action police rate 80mbit burst 16k conform-exceed drop/pipe \
+ action police rate 10mbit burst 16k conform-exceed drop/pipe \
action mirred egress mirror dev $rp3
mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
@@ -260,7 +260,7 @@ police_mirror_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
@@ -270,7 +270,7 @@ police_mirror_common_test()
sleep 10
local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
- local er=$((80 * 1000 * 1000))
+ local er=$((10 * 1000 * 1000))
local nr=$(rate $t0 $t1 10)
local nr_pct=$((100 * (nr - er) / er))
((-10 <= nr_pct && nr_pct <= 10))
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index eb307ca37bfa..6f0a2e452ba1 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -495,7 +495,7 @@ vxlan_ping_test()
local delta=$((t1 - t0))
# Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((expect <= delta && delta <= expect + 5))
check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
}
@@ -532,7 +532,7 @@ __test_ecn_encap()
RET=0
tc filter add dev v1 egress pref 77 prot ip \
- flower ip_tos $tos action pass
+ flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
sleep 1
vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10
tc filter del dev v1 egress pref 77 prot ip
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
index ac97f07e5ce8..a0bb4524e1e9 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -616,7 +616,7 @@ vxlan_ping_test()
local delta=$((t1 - t0))
# Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((expect <= delta && delta <= expect + 5))
check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
}
@@ -653,7 +653,7 @@ __test_ecn_encap()
RET=0
tc filter add dev v1 egress pref 77 protocol ipv6 \
- flower ip_tos $tos action pass
+ flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
sleep 1
vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10
tc filter del dev v1 egress pref 77 protocol ipv6
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index a596bbf3ed6a..fb9a34cb50c6 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -750,7 +750,7 @@ __test_learning()
expects[0]=0; expects[$idx1]=10; expects[$idx2]=0
vxlan_flood_test $mac $dst $vid "${expects[@]}"
- sleep 20
+ sleep 60
bridge fdb show brport $vx | grep $mac | grep -q self
check_fail $?
@@ -796,11 +796,11 @@ test_learning()
local dst=192.0.2.100
local vid=10
- # Enable learning on the VxLAN devices and set ageing time to 10 seconds
- ip link set dev br1 type bridge ageing_time 1000
- ip link set dev vx10 type vxlan ageing 10
+ # Enable learning on the VxLAN devices and set ageing time to 30 seconds
+ ip link set dev br1 type bridge ageing_time 3000
+ ip link set dev vx10 type vxlan ageing 30
ip link set dev vx10 type vxlan learning
- ip link set dev vx20 type vxlan ageing 10
+ ip link set dev vx20 type vxlan ageing 30
ip link set dev vx20 type vxlan learning
reapply_config
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
index 24b77bdf41ff..977070ed42b3 100755
--- a/tools/testing/selftests/net/fq_band_pktlimit.sh
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -8,7 +8,7 @@
# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped
# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
#
-# Send packets with a 100ms delay to ensure that previously sent
+# Send packets with a delay to ensure that previously sent
# packets are still queued when later ones are sent.
# Use SO_TXTIME for this.
@@ -29,19 +29,21 @@ ip -6 addr add fdaa::1/128 dev dummy0
ip -6 route add fdaa::/64 dev dummy0
tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
-./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+DELAY=400000
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
-./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
-./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000
OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
# Initial stats will report zero sent, as all packets are still
-# queued in FQ. Sleep for the delay period (100ms) and see that
+# queued in FQ. Sleep for at least the delay period and see that
# twenty are now sent.
-sleep 0.1
+sleep 0.6
OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
# Log the output after the test
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 6ebd58869a63..193b82745fd8 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -365,9 +365,6 @@ TEST_F(ip_local_port_range, late_bind)
__u32 range;
__u16 port;
- if (variant->so_protocol == IPPROTO_SCTP)
- SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT");
-
fd = socket(variant->so_domain, variant->so_type, 0);
ASSERT_GE(fd, 0) TH_LOG("socket failed");
@@ -414,6 +411,9 @@ TEST_F(ip_local_port_range, late_bind)
ASSERT_TRUE(!err) TH_LOG("close failed");
}
+XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind);
+XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind);
+
TEST_F(ip_local_port_range, get_port_range)
{
__u16 lo, hi;
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 75fc95675e2d..bc97ab33a00e 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -1,14 +1,15 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns="ns1-$rndh"
-ksft_skip=4
-test_cnt=1
-timeout_poll=100
+ns=""
+timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
ret=0
@@ -26,25 +27,17 @@ flush_pids()
done
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
- ip netns del $ns
+ mptcp_lib_ns_exit "${ns}"
}
mptcp_lib_check_mptcp
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-ss -h | grep -q MPTCP
-if [ $? -ne 0 ];then
- echo "SKIP: ss tool does not support MPTCP"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip ss
get_msk_inuse()
{
@@ -61,21 +54,20 @@ __chk_nr()
nr=$(eval $command)
- printf "%-50s" "$msg"
+ mptcp_lib_print_title "$msg"
if [ "$nr" != "$expected" ]; then
if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then
- echo "[ skip ] Feature probably not supported"
+ mptcp_lib_pr_skip "Feature probably not supported"
mptcp_lib_result_skip "${msg}"
else
- echo "[ fail ] expected $expected found $nr"
+ mptcp_lib_pr_fail "expected $expected found $nr"
mptcp_lib_result_fail "${msg}"
ret=${KSFT_FAIL}
fi
else
- echo "[ ok ]"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "${msg}"
fi
- test_cnt=$((test_cnt+1))
}
__chk_msk_nr()
@@ -120,20 +112,19 @@ wait_msk_nr()
sleep 1
done
- printf "%-50s" "$msg"
+ mptcp_lib_print_title "$msg"
if [ $i -ge $timeout ]; then
- echo "[ fail ] timeout while expecting $expected max $max last $nr"
+ mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr"
mptcp_lib_result_fail "${msg} # timeout"
ret=${KSFT_FAIL}
elif [ $nr != $expected ]; then
- echo "[ fail ] expected $expected found $nr"
+ mptcp_lib_pr_fail "expected $expected found $nr"
mptcp_lib_result_fail "${msg} # unexpected result"
ret=${KSFT_FAIL}
else
- echo "[ ok ]"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "${msg}"
fi
- test_cnt=$((test_cnt+1))
}
chk_msk_fallback_nr()
@@ -186,7 +177,7 @@ chk_msk_inuse()
expected=$((expected + listen_nr))
for _ in $(seq 10); do
- if [ $(get_msk_inuse) -eq $expected ];then
+ if [ "$(get_msk_inuse)" -eq $expected ]; then
break
fi
sleep 0.1
@@ -224,8 +215,7 @@ wait_connected()
}
trap cleanup EXIT
-ip netns add $ns
-ip -n $ns link set dev lo up
+mptcp_lib_ns_init ns
echo "a" | \
timeout ${timeout_test} \
@@ -273,7 +263,7 @@ chk_msk_inuse 0 "1->0"
chk_msk_cestab 0 "1->0"
NR_CLIENTS=100
-for I in `seq 1 $NR_CLIENTS`; do
+for I in $(seq 1 $NR_CLIENTS); do
echo "a" | \
timeout ${timeout_test} \
ip netns exec $ns \
@@ -282,7 +272,7 @@ for I in `seq 1 $NR_CLIENTS`; do
done
mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
-for I in `seq 1 $NR_CLIENTS`; do
+for I in $(seq 1 $NR_CLIENTS); do
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 7898d62fce0b..4c4248554826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -1,6 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
time_start=$(date +%s)
@@ -13,7 +18,6 @@ sout=""
cin_disconnect=""
cin=""
cout=""
-ksft_skip=4
capture=false
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
@@ -29,6 +33,7 @@ do_tcp=0
checksum=false
filesize=0
connect_per_transfer=1
+port=$((10000 - 1))
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -60,14 +65,14 @@ while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"d")
if [ $OPTARG -ge 0 ];then
tc_delay="$OPTARG"
else
echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"e")
@@ -91,7 +96,7 @@ while getopts "$optstring" option;do
sndbuf="$OPTARG"
else
echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"R")
@@ -99,7 +104,7 @@ while getopts "$optstring" option;do
rcvbuf="$OPTARG"
else
echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"m")
@@ -116,21 +121,20 @@ while getopts "$optstring" option;do
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-ns4="ns4-$rndh"
+ns1=""
+ns2=""
+ns3=""
+ns4=""
-TEST_COUNT=0
TEST_GROUP=""
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f "$cin_disconnect" "$cout_disconnect"
@@ -138,21 +142,12 @@ cleanup()
rm -f "$sin" "$sout"
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
- ip netns del $netns
- rm -f /tmp/$netns.{nstat,out}
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}"
}
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip
sin=$(mktemp)
sout=$(mktemp)
@@ -163,10 +158,7 @@ cin_disconnect="$cin".disconnect
cout_disconnect="$cout".disconnect
trap cleanup EXIT
-for i in "$ns1" "$ns2" "$ns3" "$ns4";do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+mptcp_lib_ns_init ns1 ns2 ns3 ns4
# "$ns1" ns2 ns3 ns4
# ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3
@@ -225,8 +217,9 @@ set_ethtool_flags() {
local dev="$2"
local flags="$3"
- ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
- [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
+ if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
+ mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags"
+ fi
}
set_random_ethtool_flags() {
@@ -254,16 +247,23 @@ else
set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
fi
+print_larger_title() {
+ # here we don't have the time, a bit longer for the alignment
+ MPTCP_LIB_TEST_FORMAT="%02u %-69s" \
+ mptcp_lib_print_title "${@}"
+}
+
check_mptcp_disabled()
{
- local disabled_ns="ns_disabled-$rndh"
- ip netns add ${disabled_ns} || exit $ksft_skip
+ local disabled_ns
+ mptcp_lib_ns_init disabled_ns
+ print_larger_title "New MPTCP socket can be blocked via sysctl"
# net.mptcp.enabled should be enabled by default
if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
- echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
+ mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default"
mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default"
- ret=1
+ ret=${KSFT_FAIL}
return 1
fi
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
@@ -271,16 +271,16 @@ check_mptcp_disabled()
local err=0
LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
- ip netns delete ${disabled_ns}
+ mptcp_lib_ns_exit "${disabled_ns}"
if [ ${err} -eq 0 ]; then
- echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
+ mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl"
mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl"
- ret=1
+ ret=${KSFT_FAIL}
return 1
fi
- echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl"
return 0
}
@@ -301,8 +301,8 @@ do_ping()
ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1
if [ $rc -ne 0 ] ; then
- echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
+ mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity"
+ ret=${KSFT_FAIL}
return 1
fi
@@ -320,24 +320,22 @@ do_transfer()
local local_addr="$6"
local extra_args="$7"
- local port
- port=$((10000+$TEST_COUNT))
- TEST_COUNT=$((TEST_COUNT+1))
+ port=$((port + 1))
if [ "$rcvbuf" -gt 0 ]; then
- extra_args="$extra_args -R $rcvbuf"
+ extra_args+=" -R $rcvbuf"
fi
if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -S $sndbuf"
+ extra_args+=" -S $sndbuf"
fi
if [ -n "$testmode" ]; then
- extra_args="$extra_args -m $testmode"
+ extra_args+=" -m $testmode"
fi
if [ -n "$extra_args" ] && $options_log; then
- echo "INFO: extra options: $extra_args"
+ mptcp_lib_pr_info "extra options: $extra_args"
fi
options_log=false
@@ -349,10 +347,11 @@ do_transfer()
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
local result_msg
result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
- printf "%s\t" "${result_msg}"
+ mptcp_lib_print_title "${result_msg}"
if $capture; then
local capuser
+ local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
@@ -378,12 +377,18 @@ do_transfer()
nstat -n
fi
- local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local stat_synrx_last_l
+ local stat_ackrx_last_l
+ local stat_cookietx_last
+ local stat_cookierx_last
+ local stat_csum_err_s
+ local stat_csum_err_c
+ stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -427,7 +432,7 @@ do_transfer()
result_msg+=" # time=${duration}ms"
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- echo "[ FAIL ] client exit code $retc, server $rets" 1>&2
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
cat /tmp/${listener_ns}.out
@@ -446,11 +451,17 @@ do_transfer()
mptcp_lib_check_transfer $cin $sout "file received by server"
rets=$?
- local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ local extra=""
+ local stat_synrx_now_l
+ local stat_ackrx_now_l
+ local stat_cookietx_now
+ local stat_cookierx_now
+ local stat_ooo_now
+ stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -459,75 +470,79 @@ do_transfer()
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
- expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
+ expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
+ expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
fi
if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
- printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \
- "${stat_synrx_now_l}" "${expect_synrx}" 1>&2
+ mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+ "than expected (${expect_synrx})"
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
if [ ${stat_ooo_now} -eq 0 ]; then
- printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
- "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+ mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+ "than expected (${expect_ackrx})"
rets=1
else
- printf "[ Note ] fallback due to TCP OoO"
+ extra+=" [ Note ] fallback due to TCP OoO"
fi
fi
if $checksum; then
- local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_s
+ local csum_err_c
+ csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
if [ $csum_err_s_nr -gt 0 ]; then
- printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
+ mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
rets=1
fi
local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
if [ $csum_err_c_nr -gt 0 ]; then
- printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
+ mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
retc=1
fi
fi
- if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
- printf "[ OK ]"
- mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
- else
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
- fi
-
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
- printf " WARN: CookieSent: did not advance"
+ extra+=" WARN: CookieSent: did not advance"
fi
if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
- printf " WARN: CookieRecv: did not advance"
+ extra+=" WARN: CookieRecv: did not advance"
fi
else
if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
- printf " WARN: CookieSent: changed"
+ extra+=" WARN: CookieSent: changed"
fi
if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
- printf " WARN: CookieRecv: changed"
+ extra+=" WARN: CookieRecv: changed"
fi
fi
if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
- printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \
- "${expect_synrx}" "${stat_synrx_now_l}"
+ extra+=" WARN: SYNRX: expect ${expect_synrx},"
+ extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
fi
if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
- printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \
- "${expect_ackrx}" "${stat_ackrx_now_l}"
+ extra+=" WARN: ACKRX: expect ${expect_ackrx},"
+ extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ fi
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
+ mptcp_lib_pr_ok "${extra:1}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ else
+ if [ -n "${extra}" ]; then
+ mptcp_lib_print_warn "${extra:1}"
+ fi
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
fi
- echo
cat "$capout"
[ $retc -eq 0 ] && [ $rets -eq 0 ]
}
@@ -653,12 +668,12 @@ run_test_transparent()
# following function has been exported (T). Not great but better than
# checking for a specific kernel version.
if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
- echo "INFO: ${msg} not supported by the kernel: SKIP"
+ mptcp_lib_pr_skip "${msg} not supported by the kernel"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
-ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+ if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
flush ruleset
table inet mangle {
chain divert {
@@ -669,8 +684,8 @@ table inet mangle {
}
}
EOF
- if [ $? -ne 0 ]; then
- echo "SKIP: $msg, could not load nft ruleset"
+ then
+ mptcp_lib_pr_skip "$msg, could not load nft ruleset"
mptcp_lib_fail_if_expected_feature "nft rules"
mptcp_lib_result_skip "${TEST_GROUP}"
return
@@ -684,28 +699,26 @@ EOF
local_addr="0.0.0.0"
fi
- ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
- if [ $? -ne 0 ]; then
+ if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
ip netns exec "$listener_ns" nft flush ruleset
- echo "SKIP: $msg, ip $r6flag rule failed"
+ mptcp_lib_pr_skip "$msg, ip $r6flag rule failed"
mptcp_lib_fail_if_expected_feature "ip rule"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
- ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
- if [ $? -ne 0 ]; then
+ if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
ip netns exec "$listener_ns" nft flush ruleset
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
- echo "SKIP: $msg, ip route add local $local_addr failed"
+ mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed"
mptcp_lib_fail_if_expected_feature "ip route"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
- echo "INFO: test $msg"
+ mptcp_lib_pr_info "test $msg"
- TEST_COUNT=10000
+ port=$((20000 - 1))
local extra_args="-o TRANSPARENT"
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
${connect_addr} ${local_addr} "${extra_args}"
@@ -716,12 +729,12 @@ EOF
ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
if [ $lret -ne 0 ]; then
- echo "FAIL: $msg, mptcp connection error" 1>&2
+ mptcp_lib_pr_fail "$msg, mptcp connection error"
ret=$lret
return 1
fi
- echo "PASS: $msg"
+ mptcp_lib_pr_info "$msg pass"
return 0
}
@@ -730,7 +743,7 @@ run_tests_peekmode()
local peekmode="$1"
TEST_GROUP="peek mode: ${peekmode}"
- echo "INFO: with peek mode: ${peekmode}"
+ mptcp_lib_pr_info "with peek mode: ${peekmode}"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
@@ -740,12 +753,12 @@ run_tests_mptfo()
TEST_GROUP="MPTFO"
if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
- echo "INFO: TFO not supported by the kernel: SKIP"
+ mptcp_lib_pr_skip "TFO not supported by the kernel"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
- echo "INFO: with MPTFO start"
+ mptcp_lib_pr_info "with MPTFO start"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
@@ -757,7 +770,7 @@ run_tests_mptfo()
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
- echo "INFO: with MPTFO end"
+ mptcp_lib_pr_info "with MPTFO end"
}
run_tests_disconnect()
@@ -768,7 +781,7 @@ run_tests_disconnect()
TEST_GROUP="full disconnect"
if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
- echo "INFO: Full disconnect not supported: SKIP"
+ mptcp_lib_pr_skip "Full disconnect not supported"
mptcp_lib_result_skip "${TEST_GROUP}"
return
fi
@@ -781,7 +794,7 @@ run_tests_disconnect()
cin_disconnect="$old_cin"
connect_per_transfer=3
- echo "INFO: disconnect"
+ mptcp_lib_pr_info "disconnect"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
@@ -805,10 +818,10 @@ log_if_error()
local msg="$1"
if [ ${ret} -ne 0 ]; then
- echo "FAIL: ${msg}" 1>&2
+ mptcp_lib_pr_fail "${msg}"
final_ret=${ret}
- ret=0
+ ret=${KSFT_PASS}
return ${final_ret}
fi
@@ -830,7 +843,7 @@ check_mptcp_disabled
stop_if_error "The kernel configuration is not valid for MPTCP"
-echo "INFO: validating network environment with pings"
+print_larger_title "Validating network environment with pings"
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns1" $sender 10.0.1.1
do_ping "$ns1" $sender dead:beef:1::1
@@ -852,12 +865,13 @@ done
mptcp_lib_result_code "${ret}" "ping tests"
stop_if_error "Could not even run ping tests"
+mptcp_lib_pr_ok
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
-echo -n "INFO: Using loss of $tc_loss "
-test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+tc_info="loss of $tc_loss "
+test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms "
-reorder_delay=$(($tc_delay / 4))
+reorder_delay=$((tc_delay / 4))
if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
@@ -866,17 +880,17 @@ if [ -z "${tc_reorder}" ]; then
if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder with delay ${reorder_delay}ms "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder with delay ${reorder_delay}ms "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
-echo "on ns3eth4"
+mptcp_lib_pr_info "Using ${tc_info}on ns3eth4"
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e4581b0dfb96..5e9211e89825 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -21,19 +21,19 @@ cinfail=""
cinsent=""
tmpfile=""
cout=""
+err=""
capout=""
ns1=""
ns2=""
-ksft_skip=4
iptables="iptables"
ip6tables="ip6tables"
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
-capture=0
-checksum=0
+capture=false
+checksum=false
ip_mptcp=0
check_invert=0
-validate_checksum=0
+validate_checksum=false
init=0
evts_ns1=""
evts_ns2=""
@@ -47,7 +47,7 @@ declare -A all_tests
declare -a only_tests_ids
declare -a only_tests_names
declare -A failed_tests
-TEST_COUNT=0
+MPTCP_LIB_TEST_FORMAT="%03u %s\n"
TEST_NAME=""
nr_blank=6
@@ -85,22 +85,12 @@ init_partial()
{
capout=$(mktemp)
- local sec rndh
- sec=$(date +%s)
- rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-
- ns1="ns1-$rndh"
- ns2="ns2-$rndh"
+ mptcp_lib_ns_init ns1 ns2
local netns
for netns in "$ns1" "$ns2"; do
- ip netns add $netns || exit $ksft_skip
- ip -net $netns link set lo up
- ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true
- ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
- if [ $checksum -eq 1 ]; then
+ if $checksum; then
ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
fi
done
@@ -144,51 +134,22 @@ cleanup_partial()
{
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2"; do
- ip netns del $netns
- rm -f /tmp/$netns.{nstat,out}
- done
-}
-
-check_tools()
-{
- mptcp_lib_check_mptcp
- mptcp_lib_check_kallsyms
-
- if ! ip -Version &> /dev/null; then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
- fi
-
- if ! ss -h | grep -q MPTCP; then
- echo "SKIP: ss tool does not support MPTCP"
- exit $ksft_skip
- fi
-
- # Use the legacy version if available to support old kernel versions
- if iptables-legacy -V &> /dev/null; then
- iptables="iptables-legacy"
- ip6tables="ip6tables-legacy"
- elif ! iptables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without iptables tool"
- exit $ksft_skip
- elif ! ip6tables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without ip6tables tool"
- exit $ksft_skip
- fi
+ mptcp_lib_ns_exit "${ns1}" "${ns2}"
}
init() {
init=1
- check_tools
+ mptcp_lib_check_mptcp
+ mptcp_lib_check_kallsyms
+ mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
cinsent=$(mktemp)
cout=$(mktemp)
+ err=$(mktemp)
evts_ns1=$(mktemp)
evts_ns2=$(mktemp)
@@ -204,14 +165,10 @@ cleanup()
rm -f "$sin" "$sout" "$cinsent" "$cinfail"
rm -f "$tmpfile"
rm -rf $evts_ns1 $evts_ns2
+ rm -f "$err"
cleanup_partial
}
-print_title()
-{
- printf "%03u %s\n" "${TEST_COUNT}" "${TEST_NAME}"
-}
-
print_check()
{
printf "%-${nr_blank}s%-36s" " " "${*}"
@@ -227,17 +184,17 @@ print_info()
print_ok()
{
- mptcp_lib_print_ok "[ ok ]${1:+ ${*}}"
+ mptcp_lib_pr_ok "${@}"
}
print_fail()
{
- mptcp_lib_print_err "[fail]${1:+ ${*}}"
+ mptcp_lib_pr_fail "${@}"
}
print_skip()
{
- mptcp_lib_print_warn "[skip]${1:+ ${*}}"
+ mptcp_lib_pr_skip "${@}"
}
# [ $1: fail msg ]
@@ -270,7 +227,7 @@ skip_test()
local i
for i in "${only_tests_ids[@]}"; do
- if [ "${TEST_COUNT}" -eq "${i}" ]; then
+ if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then
return 1
fi
done
@@ -305,14 +262,13 @@ reset()
TEST_NAME="${1}"
- TEST_COUNT=$((TEST_COUNT+1))
-
if skip_test; then
+ MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
last_test_ignored=1
return 1
fi
- print_title
+ mptcp_lib_print_title "${TEST_NAME}"
if [ "${init}" != "1" ]; then
init
@@ -385,7 +341,7 @@ reset_with_checksum()
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
- validate_checksum=1
+ validate_checksum=true
}
reset_with_allow_join_id0()
@@ -418,7 +374,7 @@ reset_with_allow_join_id0()
setup_fail_rules()
{
check_invert=1
- validate_checksum=1
+ validate_checksum=true
local i="$1"
local ip="${2:-4}"
local tables
@@ -435,15 +391,15 @@ setup_fail_rules()
-p tcp \
-m length --length 150:9999 \
-m statistic --mode nth --packet 1 --every 99999 \
- -j MARK --set-mark 42 || return ${ksft_skip}
+ -j MARK --set-mark 42 || return ${KSFT_SKIP}
- tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip}
+ tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP}
tc -n $ns2 filter add dev ns2eth$i egress \
protocol ip prio 1000 \
handle 42 fw \
action pedit munge offset 148 u8 invert \
pipe csum tcp \
- index 100 || return ${ksft_skip}
+ index 100 || return ${KSFT_SKIP}
}
reset_with_fail()
@@ -457,7 +413,7 @@ reset_with_fail()
local rc=0
setup_fail_rules "${@}" || rc=$?
- if [ ${rc} -eq ${ksft_skip} ]; then
+ if [ ${rc} -eq ${KSFT_SKIP} ]; then
mark_as_skipped "unable to set the 'fail' rules"
return 1
fi
@@ -467,12 +423,8 @@ reset_with_events()
{
reset "${1}" || return 1
- :> "$evts_ns1"
- :> "$evts_ns2"
- ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 &
- evts_ns1_pid=$!
- ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 &
- evts_ns2_pid=$!
+ mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+ mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
}
reset_with_tcp_filter()
@@ -497,13 +449,15 @@ reset_with_tcp_filter()
# $1: err msg
fail_test()
{
- ret=1
+ ret=${KSFT_FAIL}
- print_fail "${@}"
+ if [ ${#} -gt 0 ]; then
+ print_fail "${@}"
+ fi
# just in case a test is marked twice as failed
if [ ${last_test_failed} -eq 0 ]; then
- failed_tests[${TEST_COUNT}]="${TEST_NAME}"
+ failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}"
dump_stats
last_test_failed=1
fi
@@ -645,7 +599,9 @@ wait_mpj()
kill_events_pids()
{
mptcp_lib_kill_wait $evts_ns1_pid
+ evts_ns1_pid=0
mptcp_lib_kill_wait $evts_ns2_pid
+ evts_ns2_pid=0
}
pm_nl_set_limits()
@@ -799,18 +755,18 @@ pm_nl_check_endpoint()
line="${line% }"
# the dump order is: address id flags port dev
[ -n "$addr" ] && expected_line="$addr"
- expected_line="$expected_line $id"
- [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
- [ -n "$dev" ] && expected_line="$expected_line $dev"
- [ -n "$port" ] && expected_line="$expected_line $port"
+ expected_line+=" $id"
+ [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$port" ] && expected_line+=" $port"
else
line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
# the dump order is: id flags dev address port
expected_line="$id"
- [ -n "$flags" ] && expected_line="$expected_line $flags"
- [ -n "$dev" ] && expected_line="$expected_line $dev"
- [ -n "$addr" ] && expected_line="$expected_line $addr"
- [ -n "$_port" ] && expected_line="$expected_line $_port"
+ [ -n "$flags" ] && expected_line+=" $flags"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$addr" ] && expected_line+=" $addr"
+ [ -n "$_port" ] && expected_line+=" $_port"
fi
if [ "$line" = "$expected_line" ]; then
print_ok
@@ -1012,7 +968,7 @@ do_transfer()
local srv_proto="$4"
local connect_addr="$5"
- local port=$((10000 + TEST_COUNT - 1))
+ local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1))
local cappid
local FAILING_LINKS=${FAILING_LINKS:-""}
local fastclose=${fastclose:-""}
@@ -1022,7 +978,7 @@ do_transfer()
:> "$sout"
:> "$capout"
- if [ $capture -eq 1 ]; then
+ if $capture; then
local capuser
if [ -z $SUDO_USER ] ; then
capuser=""
@@ -1030,9 +986,9 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}")
- echo "Capturing traffic for test $TEST_COUNT into $capfile"
+ echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile"
ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
cappid=$!
@@ -1124,7 +1080,7 @@ do_transfer()
wait $spid
local rets=$?
- if [ $capture -eq 1 ]; then
+ if $capture; then
sleep 1
kill $cappid
fi
@@ -1261,7 +1217,7 @@ chk_csum_nr()
print_check "sum"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns1" ]; then
- extra_msg="$extra_msg ns1=$count"
+ extra_msg+=" ns1=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1274,7 +1230,7 @@ chk_csum_nr()
print_check "csum"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns2" ]; then
- extra_msg="$extra_msg ns2=$count"
+ extra_msg+=" ns2=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1318,7 +1274,7 @@ chk_fail_nr()
print_check "ftx"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
if [ "$count" != "$fail_tx" ]; then
- extra_msg="$extra_msg,tx=$count"
+ extra_msg+=",tx=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1332,7 +1288,7 @@ chk_fail_nr()
print_check "failrx"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
if [ "$count" != "$fail_rx" ]; then
- extra_msg="$extra_msg,rx=$count"
+ extra_msg+=",rx=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1367,7 +1323,7 @@ chk_fclose_nr()
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
- extra_msg="$extra_msg,tx=$count"
+ extra_msg+=",tx=$count"
fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
else
print_ok
@@ -1378,7 +1334,7 @@ chk_fclose_nr()
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
- extra_msg="$extra_msg,rx=$count"
+ extra_msg+=",rx=$count"
fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
else
print_ok
@@ -1512,7 +1468,7 @@ chk_join_nr()
else
print_ok
fi
- if [ $validate_checksum -eq 1 ]; then
+ if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
chk_rst_nr $rst_nr $rst_nr
@@ -1747,7 +1703,7 @@ chk_rm_nr()
count=$((count + cnt))
if [ "$count" != "$rm_subflow_nr" ]; then
suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
- extra_msg="$extra_msg simult"
+ extra_msg+=" simult"
fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
@@ -2828,29 +2784,16 @@ backup_tests()
fi
}
-SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
-LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
-LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED
-
-AF_INET=2
-AF_INET6=10
-
verify_listener_events()
{
- local evt=$1
local e_type=$2
- local e_family=$3
local e_saddr=$4
local e_sport=$5
- local type
- local family
- local saddr
- local sport
local name
- if [ $e_type = $LISTENER_CREATED ]; then
+ if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then
name="LISTENER_CREATED"
- elif [ $e_type = $LISTENER_CLOSED ]; then
+ elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then
name="LISTENER_CLOSED "
else
name="$e_type"
@@ -2863,23 +2806,11 @@ verify_listener_events()
return
fi
- type=$(mptcp_lib_evts_get_info type "$evt" "$e_type")
- family=$(mptcp_lib_evts_get_info family "$evt" "$e_type")
- sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type")
- if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type")
- else
- saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type")
- fi
-
- if [ $type ] && [ $type = $e_type ] &&
- [ $family ] && [ $family = $e_family ] &&
- [ $saddr ] && [ $saddr = $e_saddr ] &&
- [ $sport ] && [ $sport = $e_sport ]; then
+ if mptcp_lib_verify_listener_events "${@}"; then
print_ok
return 0
fi
- fail_test "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport"
+ fail_test
}
add_addr_ports_tests()
@@ -2917,8 +2848,10 @@ add_addr_ports_tests()
chk_add_nr 1 1 1
chk_rm_nr 1 1 invert
- verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100
- verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100
+ verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \
+ $MPTCP_LIB_AF_INET 10.0.2.1 10100
+ verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \
+ $MPTCP_LIB_AF_INET 10.0.2.1 10100
kill_events_pids
fi
@@ -3356,6 +3289,77 @@ userspace_pm_rm_sf()
wait_rm_sf $1 "${cnt}"
}
+check_output()
+{
+ local cmd="$1"
+ local expected="$2"
+ local msg="$3"
+ local rc=0
+
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ if [ ${rc} -eq 2 ]; then
+ fail_test "fail to check output # error ${rc}"
+ elif [ ${rc} -eq 0 ]; then
+ print_ok
+ elif [ ${rc} -eq 1 ]; then
+ fail_test "fail to check output # different output"
+ fi
+}
+
+# $1: ns
+userspace_pm_dump()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl dump token $tk
+}
+
+# $1: ns ; $2: id
+userspace_pm_get_addr()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl get $2 token $tk
+}
+
+userspace_pm_chk_dump_addr()
+{
+ local ns="${1}"
+ local exp="${2}"
+ local check="${3}"
+
+ print_check "dump addrs ${check}"
+
+ if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then
+ check_output "userspace_pm_dump ${ns}" "${exp}"
+ else
+ print_skip
+ fi
+}
+
+userspace_pm_chk_get_addr()
+{
+ local ns="${1}"
+ local id="${2}"
+ local exp="${3}"
+
+ print_check "get id ${id} addr"
+
+ if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then
+ check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}"
+ else
+ print_skip
+ fi
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3447,10 +3451,18 @@ userspace_tests()
chk_mptcp_info subflows 2 subflows 2
chk_subflows_total 3 3
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+ userspace_pm_chk_dump_addr "${ns1}" \
+ $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \
+ "signal"
+ userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
+ userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
userspace_pm_rm_addr $ns1 10
- userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
+ userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns1}" \
+ "id 20 flags signal 10.0.3.1" "after rm_addr 10"
userspace_pm_rm_addr $ns1 20
- userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED
+ userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
chk_rm_nr 2 2 invert
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
@@ -3471,8 +3483,15 @@ userspace_tests()
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "id 20 flags subflow 10.0.3.2" \
+ "subflow"
+ userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
userspace_pm_rm_addr $ns2 20
- userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED
+ userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "" \
+ "after rm_addr 20"
chk_rm_nr 1 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
@@ -3492,6 +3511,8 @@ userspace_tests()
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
userspace_pm_add_sf $ns2 10.0.3.2 0
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "id 0 flags subflow 10.0.3.2" "id 0 subflow"
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -3610,7 +3631,7 @@ usage()
{
if [ -n "${1}" ]; then
echo "${1}"
- ret=1
+ ret=${KSFT_FAIL}
fi
echo "mptcp_join usage:"
@@ -3673,10 +3694,10 @@ while getopts "${all_tests_args}cCih" opt; do
tests+=("${all_tests[${opt}]}")
;;
c)
- capture=1
+ capture=true
;;
C)
- checksum=1
+ checksum=true
;;
i)
ip_mptcp=1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 3777d66fc56d..d529b4b37af8 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -8,7 +8,21 @@ readonly KSFT_SKIP=4
# shellcheck disable=SC2155 # declare and assign separately
readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
+# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
+declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED
+declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED
+
+declare -rx MPTCP_LIB_AF_INET=2
+declare -rx MPTCP_LIB_AF_INET6=10
+
MPTCP_LIB_SUBTESTS=()
+MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_TEST_COUNTER=0
+MPTCP_LIB_TEST_FORMAT="%02u %-50s"
# only if supported (or forced) and not disabled, see no-color.org
if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
@@ -47,6 +61,23 @@ mptcp_lib_print_err() {
mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}"
}
+# shellcheck disable=SC2120 # parameters are optional
+mptcp_lib_pr_ok() {
+ mptcp_lib_print_ok "[ OK ]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_skip() {
+ mptcp_lib_print_warn "[SKIP]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_fail() {
+ mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_info() {
+ mptcp_lib_print_info "INFO: ${*}"
+}
+
# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
# features using the last version of the kernel and the selftests to make sure
# a test is not being skipped by mistake.
@@ -77,14 +108,14 @@ mptcp_lib_has_file() {
mptcp_lib_check_mptcp() {
if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then
- echo "SKIP: MPTCP support is not available"
+ mptcp_lib_pr_skip "MPTCP support is not available"
exit ${KSFT_SKIP}
fi
}
mptcp_lib_check_kallsyms() {
if ! mptcp_lib_has_file "/proc/kallsyms"; then
- echo "SKIP: CONFIG_KALLSYMS is missing"
+ mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing"
exit ${KSFT_SKIP}
fi
}
@@ -146,12 +177,26 @@ mptcp_lib_kversion_ge() {
mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
}
+__mptcp_lib_result_check_duplicated() {
+ local subtest
+
+ for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+ if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then
+ MPTCP_LIB_SUBTESTS_DUPLICATED=1
+ mptcp_lib_print_err "Duplicated entry: ${*}"
+ break
+ fi
+ done
+}
+
__mptcp_lib_result_add() {
local result="${1}"
shift
local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
+ __mptcp_lib_result_check_duplicated "${*}"
+
MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
}
@@ -206,6 +251,12 @@ mptcp_lib_result_print_all_tap() {
for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
printf "%s\n" "${subtest}"
done
+
+ if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] &&
+ mptcp_lib_expect_all_features; then
+ mptcp_lib_print_err "Duplicated test entries"
+ exit ${KSFT_FAIL}
+ fi
}
# get the value of keyword $1 in the line marked by keyword $2
@@ -271,7 +322,7 @@ mptcp_lib_check_transfer() {
local what="${3}"
if ! cmp "$in" "$out" > /dev/null 2>&1; then
- echo "[ FAIL ] $what does not match (in, out):"
+ mptcp_lib_pr_fail "$what does not match (in, out):"
mptcp_lib_print_file_err "$in"
mptcp_lib_print_file_err "$out"
@@ -298,3 +349,159 @@ mptcp_lib_wait_local_port_listen() {
sleep 0.1
done
}
+
+mptcp_lib_check_output() {
+ local err="${1}"
+ local cmd="${2}"
+ local expected="${3}"
+ local cmd_ret=0
+ local out
+
+ if ! out=$(${cmd} 2>"${err}"); then
+ cmd_ret=${?}
+ fi
+
+ if [ ${cmd_ret} -ne 0 ]; then
+ mptcp_lib_pr_fail "command execution '${cmd}' stderr"
+ cat "${err}"
+ return 2
+ elif [ "${out}" = "${expected}" ]; then
+ return 0
+ else
+ mptcp_lib_pr_fail "expected '${expected}' got '${out}'"
+ return 1
+ fi
+}
+
+mptcp_lib_check_tools() {
+ local tool
+
+ for tool in "${@}"; do
+ case "${tool}" in
+ "ip")
+ if ! ip -Version &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run test without ip tool"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ "ss")
+ if ! ss -h | grep -q MPTCP; then
+ mptcp_lib_pr_skip "ss tool does not support MPTCP"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ "iptables"* | "ip6tables"*)
+ if ! "${tool}" -V &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run all tests without ${tool}"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ *)
+ mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+ done
+}
+
+mptcp_lib_ns_init() {
+ local sec rndh
+
+ sec=$(date +%s)
+ rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX)
+
+ local netns
+ for netns in "${@}"; do
+ eval "${netns}=${netns}-${rndh}"
+
+ ip netns add "${!netns}" || exit ${KSFT_SKIP}
+ ip -net "${!netns}" link set lo up
+ ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
+ ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
+ done
+}
+
+mptcp_lib_ns_exit() {
+ local netns
+ for netns in "${@}"; do
+ ip netns del "${netns}"
+ rm -f /tmp/"${netns}".{nstat,out}
+ done
+}
+
+mptcp_lib_events() {
+ local ns="${1}"
+ local evts="${2}"
+ declare -n pid="${3}"
+
+ :>"${evts}"
+
+ mptcp_lib_kill_wait "${pid:-0}"
+ ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 &
+ pid=$!
+}
+
+mptcp_lib_print_title() {
+ : "${MPTCP_LIB_TEST_COUNTER:?}"
+ : "${MPTCP_LIB_TEST_FORMAT:?}"
+
+ # shellcheck disable=SC2059 # the format is in a variable
+ printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}"
+}
+
+# $1: var name ; $2: prev ret
+mptcp_lib_check_expected_one() {
+ local var="${1}"
+ local exp="e_${var}"
+ local prev_ret="${2}"
+
+ if [ "${!var}" = "${!exp}" ]; then
+ return 0
+ fi
+
+ if [ "${prev_ret}" = "0" ]; then
+ mptcp_lib_pr_fail
+ fi
+
+ mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'."
+ return 1
+}
+
+# $@: all var names to check
+mptcp_lib_check_expected() {
+ local rc=0
+ local var
+
+ for var in "${@}"; do
+ mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1
+ done
+
+ return "${rc}"
+}
+
+# shellcheck disable=SC2034 # Some variables are used below but indirectly
+mptcp_lib_verify_listener_events() {
+ local evt=${1}
+ local e_type=${2}
+ local e_family=${3}
+ local e_saddr=${4}
+ local e_sport=${5}
+ local type
+ local family
+ local saddr
+ local sport
+ local rc=0
+
+ type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}")
+ family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}")
+ if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then
+ saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}")
+ else
+ saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}")
+ fi
+ sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}")
+
+ mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
+ return "${rc}"
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index c643872ddf47..e2d70c18786e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -1,6 +1,11 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
ret=0
@@ -8,17 +13,14 @@ sin=""
sout=""
cin=""
cout=""
-ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
iptables="iptables"
ip6tables="ip6tables"
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns_sbox="ns_sbox-$rndh"
+ns1=""
+ns2=""
+ns_sbox=""
add_mark_rules()
{
@@ -40,17 +42,10 @@ add_mark_rules()
init()
{
- local netns
- for netns in "$ns1" "$ns2" "$ns_sbox";do
- ip netns add $netns || exit $ksft_skip
- ip -net $netns link set lo up
- ip netns exec $netns sysctl -q net.mptcp.enabled=1
- ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
- done
+ mptcp_lib_ns_init ns1 ns2 ns_sbox
local i
- for i in `seq 1 4`; do
+ for i in $(seq 1 4); do
ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
@@ -77,36 +72,18 @@ init()
add_mark_rules $ns2 2
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
- local netns
- for netns in "$ns1" "$ns2" "$ns_sbox"; do
- ip netns del $netns
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
}
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-# Use the legacy version if available to support old kernel versions
-if iptables-legacy -V &> /dev/null; then
- iptables="iptables-legacy"
- ip6tables="ip6tables-legacy"
-elif ! iptables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without iptables tool"
- exit $ksft_skip
-elif ! ip6tables -V &> /dev/null; then
- echo "SKIP: Could not run all tests without ip6tables tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip "${iptables}" "${ip6tables}"
check_mark()
{
@@ -126,8 +103,9 @@ check_mark()
local v
for v in $values; do
if [ $v -ne 0 ]; then
- echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
- ret=1
+ mptcp_lib_pr_fail "got $tables $values in ns $ns," \
+ "not 0 - not all expected packets marked"
+ ret=${KSFT_FAIL}
return 1
fi
done
@@ -135,6 +113,11 @@ check_mark()
return 0
}
+print_title()
+{
+ mptcp_lib_print_title "${@}"
+}
+
do_transfer()
{
local listener_ns="$1"
@@ -184,8 +167,9 @@ do_transfer()
wait $spid
local rets=$?
+ print_title "Transfer ${ip:2}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- echo " client exit code $retc, server $rets" 1>&2
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
@@ -194,10 +178,17 @@ do_transfer()
mptcp_lib_result_fail "transfer ${ip}"
- ret=1
+ ret=${KSFT_FAIL}
return 1
fi
+ if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then
+ rets=1
+ else
+ mptcp_lib_pr_ok
+ fi
+ mptcp_lib_result_code "${rets}" "transfer ${ip}"
+ print_title "Mark ${ip:2}"
if [ $local_addr = "::" ];then
check_mark $listener_ns 6 || retc=1
check_mark $connector_ns 6 || retc=1
@@ -206,15 +197,13 @@ do_transfer()
check_mark $connector_ns 4 || retc=1
fi
- mptcp_lib_check_transfer $cin $sout "file received by server"
- rets=$?
-
mptcp_lib_result_code "${retc}" "mark ${ip}"
- mptcp_lib_result_code "${rets}" "transfer ${ip}"
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ mptcp_lib_pr_ok
return 0
fi
+ mptcp_lib_pr_fail
return 1
}
@@ -235,7 +224,7 @@ do_mptcp_sockopt_tests()
local lret=0
if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then
- echo "INFO: MPTCP sockopt not supported: SKIP"
+ mptcp_lib_pr_skip "MPTCP sockopt not supported"
mptcp_lib_result_skip "sockopt"
return
fi
@@ -243,23 +232,27 @@ do_mptcp_sockopt_tests()
ip netns exec "$ns_sbox" ./mptcp_sockopt
lret=$?
+ print_title "SOL_MPTCP sockopt v4"
if [ $lret -ne 0 ]; then
- echo "FAIL: SOL_MPTCP getsockopt" 1>&2
+ mptcp_lib_pr_fail
mptcp_lib_result_fail "sockopt v4"
ret=$lret
return
fi
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "sockopt v4"
ip netns exec "$ns_sbox" ./mptcp_sockopt -6
lret=$?
+ print_title "SOL_MPTCP sockopt v6"
if [ $lret -ne 0 ]; then
- echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2
+ mptcp_lib_pr_fail
mptcp_lib_result_fail "sockopt v6"
ret=$lret
return
fi
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "sockopt v6"
}
@@ -282,16 +275,17 @@ run_tests()
do_tcpinq_test()
{
+ print_title "TCP_INQ cmsg/ioctl $*"
ip netns exec "$ns_sbox" ./mptcp_inq "$@"
local lret=$?
if [ $lret -ne 0 ];then
ret=$lret
- echo "FAIL: mptcp_inq $@" 1>&2
+ mptcp_lib_pr_fail
mptcp_lib_result_fail "TCP_INQ: $*"
return $lret
fi
- echo "PASS: TCP_INQ cmsg/ioctl $@"
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "TCP_INQ: $*"
return $lret
}
@@ -301,7 +295,7 @@ do_tcpinq_tests()
local lret=0
if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
- echo "INFO: TCP_INQ not supported: SKIP"
+ mptcp_lib_pr_skip "TCP_INQ not supported"
mptcp_lib_result_skip "TCP_INQ"
return
fi
@@ -337,15 +331,7 @@ trap cleanup EXIT
run_tests $ns1 $ns2 10.0.1.1
run_tests $ns1 $ns2 dead:beef:1::1
-if [ $ret -eq 0 ];then
- echo "PASS: all packets had packet mark set"
-fi
-
do_mptcp_sockopt_tests
-if [ $ret -eq 0 ];then
- echo "PASS: SOL_MPTCP getsockopt has expected information"
-fi
-
do_tcpinq_tests
mptcp_lib_result_print_all_tap
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 71899a3ffa7a..6ab8c5d36340 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,77 +1,69 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
-ksft_skip=4
ret=0
usage() {
echo "Usage: $0 [ -h ]"
}
-
+optstring=h
while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
+ns1=""
err=$(mktemp)
-ret=0
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f $err
- ip netns del $ns1
+ mptcp_lib_ns_exit "${ns1}"
}
mptcp_lib_check_mptcp
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip
trap cleanup EXIT
-ip netns add $ns1 || exit $ksft_skip
-ip -net $ns1 link set lo up
-ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+mptcp_lib_ns_init ns1
check()
{
local cmd="$1"
local expected="$2"
local msg="$3"
- local out=`$cmd 2>$err`
- local cmd_ret=$?
-
- printf "%-50s" "$msg"
- if [ $cmd_ret -ne 0 ]; then
- echo "[FAIL] command execution '$cmd' stderr "
- cat $err
- mptcp_lib_result_fail "${msg} # error ${cmd_ret}"
- ret=1
- elif [ "$out" = "$expected" ]; then
- echo "[ OK ]"
+ local rc=0
+
+ mptcp_lib_print_title "$msg"
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ if [ ${rc} -eq 2 ]; then
+ mptcp_lib_result_fail "${msg} # error ${rc}"
+ ret=${KSFT_FAIL}
+ elif [ ${rc} -eq 0 ]; then
+ mptcp_lib_print_ok "[ OK ]"
mptcp_lib_result_pass "${msg}"
- else
- echo -n "[FAIL] "
- echo "expected '$expected' got '$out'"
+ elif [ ${rc} -eq 1 ]; then
mptcp_lib_result_fail "${msg} # different output"
- ret=1
+ ret=${KSFT_FAIL}
fi
}
@@ -105,14 +97,14 @@ check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
-for i in `seq 5 9`; do
+for i in $(seq 5 9); do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
ip netns exec $ns1 ./pm_nl_ctl del 9
-for i in `seq 10 255`; do
+for i in $(seq 10 255); do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
ip netns exec $ns1 ./pm_nl_ctl del $i
done
@@ -197,7 +189,8 @@ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
else
for st in fullmesh nofullmesh backup,fullmesh; do
st=" (${st})"
- printf "%-50s%s\n" "${st}" "[SKIP]"
+ mptcp_lib_print_title "${st}"
+ mptcp_lib_pr_skip
mptcp_lib_result_skip "${st}"
done
fi
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 49369c4a5f26..7426a2cbd4a0 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -453,6 +453,7 @@ int csf(int fd, int pm_family, int argc, char *argv[])
char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1024];
+ u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW;
const char *params[5];
struct nlmsghdr *nh;
struct rtattr *addr;
@@ -558,6 +559,13 @@ int csf(int fd, int pm_family, int argc, char *argv[])
off += NLMSG_ALIGN(rta->rta_len);
}
+ /* addr flags */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
addr->rta_len = off - addr_start;
}
@@ -1079,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int32_t token = 0;
int nest_start;
u_int8_t id;
int off = 0;
@@ -1089,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
MPTCP_PM_VER);
/* the only argument is the address id */
- if (argc != 3)
+ if (argc != 3 && argc != 5)
syntax(argv);
id = atoi(argv[2]);
+ if (argc == 5 && !strcmp(argv[3], "token"))
+ token = strtoul(argv[4], NULL, 10);
nest_start = off;
nest = (void *)(data + off);
@@ -1108,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
off += NLMSG_ALIGN(rta->rta_len);
nest->rta_len = off - nest_start;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
return 0;
}
@@ -1119,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
1024];
pid_t pid = getpid();
struct nlmsghdr *nh;
+ u_int32_t token = 0;
+ struct rtattr *rta;
int off = 0;
+ if (argc != 2 && argc != 4)
+ syntax(argv);
+
+ if (argc == 4 && !strcmp(argv[2], "token"))
+ token = strtoul(argv[3], NULL, 10);
+
memset(data, 0, sizeof(data));
nh = (void *)data;
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
@@ -1130,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
nh->nlmsg_pid = pid;
nh->nlmsg_len = off;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 8f9ddb3ad4fe..1b2366220388 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -1,21 +1,30 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
. "$(dirname "${0}")/mptcp_lib.sh"
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
+ns1=""
+ns2=""
+ns3=""
capture=false
-ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
-test_cnt=1
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-60s"
ret=0
bail=0
slack=50
+large=""
+small=""
+sout=""
+cout=""
+capout=""
+size=0
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
@@ -24,25 +33,19 @@ usage() {
echo -e "\t-d: debug this script"
}
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f "$cout" "$sout"
rm -f "$large" "$small"
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2" "$ns3";do
- ip netns del $netns
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}"
}
mptcp_lib_check_mptcp
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_tools ip
# "$ns1" ns2 ns3
# ns1eth1 ns2eth1 ns2eth3 ns3eth1
@@ -64,12 +67,7 @@ setup()
trap cleanup EXIT
- for i in "$ns1" "$ns2" "$ns3";do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
- ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
- done
+ mptcp_lib_ns_init ns1 ns2 ns3
ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
@@ -129,8 +127,7 @@ do_transfer()
local sin=$2
local max_time=$3
local port
- port=$((10000+$test_cnt))
- test_cnt=$((test_cnt+1))
+ port=$((10000+MPTCP_LIB_TEST_COUNTER))
:> "$cout"
:> "$sout"
@@ -138,6 +135,7 @@ do_transfer()
if $capture; then
local capuser
+ local rndh="${ns1:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
@@ -189,12 +187,12 @@ do_transfer()
printf "%-16s" " max $max_time "
if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
[ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
- echo "[ OK ]"
+ mptcp_lib_pr_ok
cat "$capout"
return 0
fi
- echo " [ fail ]"
+ mptcp_lib_pr_fail
echo "client exit code $retc, server $rets" 1>&2
echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
@@ -241,7 +239,7 @@ run_test()
# completion (see mptcp_connect): 200ms on each side, add some slack
time=$((time + 400 + slack))
- printf "%-60s" "$msg"
+ mptcp_lib_print_title "$msg"
do_transfer $small $large $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
@@ -251,7 +249,7 @@ run_test()
fi
msg+=" - reverse direction"
- printf "%-60s" "${msg}"
+ mptcp_lib_print_title "${msg}"
do_transfer $large $small $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
@@ -265,7 +263,7 @@ while getopts "bcdh" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"b")
bail=1
@@ -278,7 +276,7 @@ while getopts "bcdh" option;do
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 1b94a75604fe..9e2981f2d7f5 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -5,7 +5,7 @@
# code but we accept it.
#shellcheck disable=SC2086
-# Some variables are used below but indirectly, see check_expected_one()
+# Some variables are used below but indirectly, see verify_*_event()
#shellcheck disable=SC2034
. "$(dirname "${0}")/mptcp_lib.sh"
@@ -17,21 +17,17 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
echo "userspace pm tests are not supported by the kernel: SKIP"
exit ${KSFT_SKIP}
fi
+mptcp_lib_check_tools ip
-if ! ip -Version &> /dev/null; then
- echo "SKIP: Cannot not run test without ip tool"
- exit ${KSFT_SKIP}
-fi
+ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED}
+REMOVED=${MPTCP_LIB_EVENT_REMOVED}
+SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED}
+SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED}
+LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED}
+LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED}
-ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
-REMOVED=7 # MPTCP_EVENT_REMOVED
-SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
-SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
-LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
-LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED
-
-AF_INET=2
-AF_INET6=10
+AF_INET=${MPTCP_LIB_AF_INET}
+AF_INET6=${MPTCP_LIB_AF_INET6}
file=""
server_evts=""
@@ -54,20 +50,16 @@ app6_port=50004
client_addr_id=${RANDOM:0:2}
server_addr_id=${RANDOM:0:2}
-sec=$(date +%s)
-rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
+ns1=""
+ns2=""
ret=0
test_name=""
-
-_printf() {
- stdbuf -o0 -e0 printf "${@}"
-}
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-68s"
print_title()
{
- _printf "INFO: %s\n" "${1}"
+ mptcp_lib_pr_info "${1}"
}
# $1: test name
@@ -75,36 +67,29 @@ print_test()
{
test_name="${1}"
- _printf "%-68s" "${test_name}"
-}
-
-print_results()
-{
- _printf "[%s]\n" "${1}"
+ mptcp_lib_print_title "${test_name}"
}
test_pass()
{
- print_results " OK "
+ mptcp_lib_pr_ok
mptcp_lib_result_pass "${test_name}"
}
test_skip()
{
- print_results "SKIP"
+ mptcp_lib_pr_skip
mptcp_lib_result_skip "${test_name}"
}
# $1: msg
test_fail()
{
- print_results "FAIL"
- ret=1
-
- if [ -n "${1}" ]; then
- _printf "\t%s\n" "${1}"
+ if [ ${#} -gt 0 ]
+ then
+ mptcp_lib_pr_fail "${@}"
fi
-
+ ret=${KSFT_FAIL}
mptcp_lib_result_fail "${test_name}"
}
@@ -122,23 +107,18 @@ cleanup()
mptcp_lib_kill_wait $pid
done
- local netns
- for netns in "$ns1" "$ns2" ;do
- ip netns del "$netns"
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}"
rm -rf $file $client_evts $server_evts
- _printf "Done\n"
+ mptcp_lib_pr_info "Done"
}
trap cleanup EXIT
# Create and configure network namespaces for testing
+mptcp_lib_ns_init ns1 ns2
for i in "$ns1" "$ns2" ;do
- ip netns add "$i" || exit 1
- ip -net "$i" link set lo up
- ip netns exec "$i" sysctl -q net.mptcp.enabled=1
ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
done
@@ -160,17 +140,23 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
ip -net "$ns2" link set ns2eth1 up
+file=$(mktemp)
+mptcp_lib_make_file "$file" 2 1
+
+# Capture netlink events over the two network namespaces running
+# the MPTCP client and server
+client_evts=$(mktemp)
+mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
+server_evts=$(mktemp)
+mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
+sleep 0.5
+
print_title "Init"
print_test "Created network namespaces ns1, ns2"
test_pass
make_connection()
{
- if [ -z "$file" ]; then
- file=$(mktemp)
- fi
- mptcp_lib_make_file "$file" 2 1
-
local is_v6=$1
local app_port=$app4_port
local connect_addr="10.0.1.1"
@@ -184,27 +170,8 @@ make_connection()
is_v6="v4"
fi
- # Capture netlink events over the two network namespaces running
- # the MPTCP client and server
- if [ -z "$client_evts" ]; then
- client_evts=$(mktemp)
- fi
:>"$client_evts"
- if [ $client_evts_pid -ne 0 ]; then
- mptcp_lib_kill_wait $client_evts_pid
- fi
- ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
- client_evts_pid=$!
- if [ -z "$server_evts" ]; then
- server_evts=$(mktemp)
- fi
:>"$server_evts"
- if [ $server_evts_pid -ne 0 ]; then
- mptcp_lib_kill_wait $server_evts_pid
- fi
- ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
- server_evts_pid=$!
- sleep 0.5
# Run the server
ip netns exec "$ns1" \
@@ -242,7 +209,7 @@ make_connection()
else
test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
mptcp_lib_result_print_all_tap
- exit 1
+ exit ${KSFT_FAIL}
fi
if [ "$is_v6" = "v6" ]
@@ -261,45 +228,16 @@ make_connection()
fi
}
-# $1: var name ; $2: prev ret
-check_expected_one()
-{
- local var="${1}"
- local exp="e_${var}"
- local prev_ret="${2}"
-
- if [ "${!var}" = "${!exp}" ]
- then
- return 0
- fi
-
- if [ "${prev_ret}" = "0" ]
- then
- test_fail
- fi
-
- _printf "\tExpected value for '%s': '%s', got '%s'.\n" \
- "${var}" "${!exp}" "${!var}"
- return 1
-}
-
# $@: all var names to check
check_expected()
{
- local rc=0
- local var
-
- for var in "${@}"
- do
- check_expected_one "${var}" "${rc}" || rc=1
- done
-
- if [ ${rc} -eq 0 ]
+ if mptcp_lib_check_expected "${@}"
then
test_pass
return 0
fi
+ test_fail
return 1
}
@@ -449,7 +387,7 @@ test_remove()
then
test_pass
else
- test_fail
+ test_fail "unexpected type: ${type}"
fi
# RM_ADDR using an invalid addr id should result in no action
@@ -462,7 +400,7 @@ test_remove()
then
test_pass
else
- test_fail
+ test_fail "unexpected type: ${type}"
fi
# RM_ADDR from the client to server machine
@@ -897,32 +835,11 @@ test_prio()
verify_listener_events()
{
- local evt=$1
- local e_type=$2
- local e_family=$3
- local e_saddr=$4
- local e_sport=$5
- local type
- local family
- local saddr
- local sport
-
- if [ $e_type = $LISTENER_CREATED ]; then
- print_test "CREATE_LISTENER $e_saddr:$e_sport"
- elif [ $e_type = $LISTENER_CLOSED ]; then
- print_test "CLOSE_LISTENER $e_saddr:$e_sport"
- fi
-
- type=$(mptcp_lib_evts_get_info type $evt $e_type)
- family=$(mptcp_lib_evts_get_info family $evt $e_type)
- sport=$(mptcp_lib_evts_get_info sport $evt $e_type)
- if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type)
+ if mptcp_lib_verify_listener_events "${@}"; then
+ test_pass
else
- saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type)
+ test_fail
fi
-
- check_expected "type" "family" "saddr" "sport"
}
test_listener()
@@ -944,6 +861,7 @@ test_listener()
local listener_pid=$!
sleep 0.5
+ print_test "CREATE_LISTENER 10.0.2.2:$client4_port"
verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
# ADD_ADDR from client to server machine reusing the subflow port
@@ -960,6 +878,7 @@ test_listener()
mptcp_lib_kill_wait $listener_pid
sleep 0.5
+ print_test "CLOSE_LISTENER 10.0.2.2:$client4_port"
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 36e40256ab92..5cae53543849 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -17,6 +17,7 @@ tests="
ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct
connect_v4 ip4-xon: Basic ipv4 ping between two NS
nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT
+ nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
drop_reason drop: test drop reasons are emitted"
@@ -473,6 +474,67 @@ test_nat_connect_v4 () {
return 0
}
+# nat_related_v4 test
+# - client->server ip packets go via SNAT
+# - client solicits ICMP destination unreachable packet from server
+# - undo NAT for ICMP reply and test dst ip has been updated
+test_nat_related_v4 () {
+ which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+ sbx_add "test_nat_related_v4" || return $?
+
+ ovs_add_dp "test_nat_related_v4" natrelated4 || return 1
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10
+
+ # Allow ARP
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1
+
+ # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \
+ "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \
+ "2" || return 1
+
+ # Allow related ICMP responses back from server and undo NAT to restore original IP
+ # Drop any ICMP related packets where dst ip hasn't been restored back to original IP
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ "ct(commit,nat),recirc(0x2)" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \
+ "1" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \
+ "drop" || return 1
+
+ # Solicit destination unreachable response from server
+ ovs_sbx "test_nat_related_v4" ip netns exec client \
+ bash -c "echo a | nc -u -w 1 172.31.110.20 10000"
+
+ # Check to make sure no packets matched the drop rule with incorrect dst ip
+ python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \
+ | grep "drop" | grep "packets:0" >/dev/null || return 1
+
+ info "done..."
+ return 0
+}
+
# netlink_validation
# - Create a dp
# - check no warning with "old version" simulation
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 874a2952aa8e..bdf6f10d0558 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -801,6 +801,8 @@ kci_test_ipsec_offload()
end_test "FAIL: ipsec_offload SA offload missing from list output"
fi
+ # we didn't create a peer, make sure we can Tx
+ ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55
# use ping to exercise the Tx path
ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 2672ac0b6d1f..8457b7ccbc09 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -134,8 +134,11 @@ static void do_recv_one(int fdr, struct timed_send *ts)
if (rbuf[0] != ts->data)
error(1, 0, "payload mismatch. expected %c", ts->data);
- if (llabs(tstop - texpect) > cfg_variance_us)
- error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+ if (llabs(tstop - texpect) > cfg_variance_us) {
+ fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us);
+ if (!getenv("KSFT_MACHINE_SLOW"))
+ exit(1);
+ }
}
static void do_recv_verify_empty(int fdr)
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 84a05a9e46d8..74ff9fb2a6f0 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -1014,10 +1014,10 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
- log_test $? 254 "Flush by port"
+ log_test $? 254 "Flush by port - matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0"
- log_test $? 255 "Flush by wrong port"
+ log_test $? 255 "Flush by port - non-matching"
# Check that when flushing by source VNI only entries programmed with
# the specified source VNI are flushed and the rest are not.
@@ -1030,9 +1030,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
- log_test $? 254 "Flush by specified source VNI"
+ log_test $? 254 "Flush by source VNI - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011"
- log_test $? 0 "Flush by unspecified source VNI"
+ log_test $? 0 "Flush by source VNI - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1058,9 +1058,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\""
- log_test $? 1 "Flush by specified routing protocol"
+ log_test $? 1 "Flush by routing protocol - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\""
- log_test $? 0 "Flush by unspecified routing protocol"
+ log_test $? 0 "Flush by routing protocol - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1075,9 +1075,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
- log_test $? 1 "Flush by specified destination IP - IPv4"
+ log_test $? 1 "Flush by IPv4 destination IP - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
- log_test $? 0 "Flush by unspecified destination IP - IPv4"
+ log_test $? 0 "Flush by IPv4 destination IP - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1089,9 +1089,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2"
- log_test $? 1 "Flush by specified destination IP - IPv6"
+ log_test $? 1 "Flush by IPv6 destination IP - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1"
- log_test $? 0 "Flush by unspecified destination IP - IPv6"
+ log_test $? 0 "Flush by IPv6 destination IP - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1104,9 +1104,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\""
- log_test $? 1 "Flush by specified UDP destination port"
+ log_test $? 1 "Flush by UDP destination port - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\""
- log_test $? 0 "Flush by unspecified UDP destination port"
+ log_test $? 0 "Flush by UDP destination port - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1121,9 +1121,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
- log_test $? 1 "Flush by device's UDP destination port"
+ log_test $? 1 "Flush by device's UDP destination port - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
- log_test $? 0 "Flush by unspecified UDP destination port"
+ log_test $? 0 "Flush by device's UDP destination port - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1136,9 +1136,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\""
- log_test $? 1 "Flush by specified destination VNI"
+ log_test $? 1 "Flush by destination VNI - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\""
- log_test $? 0 "Flush by unspecified destination VNI"
+ log_test $? 0 "Flush by destination VNI - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
@@ -1153,9 +1153,9 @@ flush()
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
- log_test $? 1 "Flush by destination VNI equal to source VNI"
+ log_test $? 1 "Flush by destination VNI equal to source VNI - matching"
run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
- log_test $? 0 "Flush by unspecified destination VNI"
+ log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching"
run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index b95c249f81c2..c6eda21cefb6 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1927,7 +1927,7 @@ TEST_F(tls_err, poll_partial_rec_async)
pfd.events = POLLIN;
EXPECT_EQ(poll(&pfd, 1, 20), 1);
- exit(!_metadata->passed);
+ exit(!__test_passed(_metadata));
}
}
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 10f2fde3686b..ec60a16c9307 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -163,7 +163,8 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n",
cur64 - start64, min_delay, max_delay);
- test_failed = true;
+ if (!getenv("KSFT_MACHINE_SLOW"))
+ test_failed = true;
}
}
diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index 31637769f59f..25baca4b148e 100755
--- a/tools/testing/selftests/net/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -8,13 +8,13 @@ set -e
setup() {
# set 1ms delay on lo egress
- tc qdisc add dev lo root netem delay 1ms
+ tc qdisc add dev lo root netem delay 10ms
# set 2ms delay on ifb0 egress
modprobe ifb
ip link add ifb_netem0 type ifb
ip link set dev ifb_netem0 up
- tc qdisc add dev ifb_netem0 root netem delay 2ms
+ tc qdisc add dev ifb_netem0 root netem delay 20ms
# redirect lo ingress through ifb0 egress
tc qdisc add dev lo handle ffff: ingress
@@ -24,9 +24,11 @@ setup() {
}
run_test_v4v6() {
- # SND will be delayed 1000us
- # ACK will be delayed 6000us: 1 + 2 ms round-trip
- local -r args="$@ -v 1000 -V 6000"
+ # SND will be delayed 10ms
+ # ACK will be delayed 60ms: 10 + 20 ms round-trip
+ # allow +/- tolerance of 8ms
+ # wait for ACK to be queued
+ local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000"
./txtimestamp ${args} -4 -L 127.0.0.1
./txtimestamp ${args} -6 -L ::1
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 7badaf215de2..1d975bf52af3 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -56,7 +56,6 @@ static bool cfg_do_msgmore;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
-static const char cfg_ifname[] = "lo";
static unsigned short cfg_port = 9000;
static char buf[ETH_MAX_MTU];
@@ -69,8 +68,13 @@ struct testcase {
int r_len_last; /* recv(): size of last non-mss dgram, if any */
};
-const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
-const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = {
+ { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */
+};
+
+const struct in_addr addr4 = {
+ __constant_htonl(0x0a000001), /* 10.0.0.1 */
+};
struct testcase testcases_v4[] = {
{
@@ -274,48 +278,6 @@ struct testcase testcases_v6[] = {
}
};
-static unsigned int get_device_mtu(int fd, const char *ifname)
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof(ifr));
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFMTU, &ifr))
- error(1, errno, "ioctl get mtu");
-
- return ifr.ifr_mtu;
-}
-
-static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof(ifr));
-
- ifr.ifr_mtu = mtu;
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCSIFMTU, &ifr))
- error(1, errno, "ioctl set mtu");
-}
-
-static void set_device_mtu(int fd, int mtu)
-{
- int val;
-
- val = get_device_mtu(fd, cfg_ifname);
- fprintf(stderr, "device mtu (orig): %u\n", val);
-
- __set_device_mtu(fd, cfg_ifname, mtu);
- val = get_device_mtu(fd, cfg_ifname);
- if (val != mtu)
- error(1, 0, "unable to set device mtu to %u\n", val);
-
- fprintf(stderr, "device mtu (test): %u\n", val);
-}
-
static void set_pmtu_discover(int fd, bool is_ipv4)
{
int level, name, val;
@@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4)
return mtu;
}
-/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
-static void set_route_mtu(int mtu, bool is_ipv4)
-{
- struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
- struct nlmsghdr *nh;
- struct rtattr *rta;
- struct rtmsg *rt;
- char data[NLMSG_ALIGN(sizeof(*nh)) +
- NLMSG_ALIGN(sizeof(*rt)) +
- NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
- NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
- NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
- int fd, ret, alen, off = 0;
-
- alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
-
- fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (fd == -1)
- error(1, errno, "socket netlink");
-
- memset(data, 0, sizeof(data));
-
- nh = (void *)data;
- nh->nlmsg_type = RTM_NEWROUTE;
- nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
- off += NLMSG_ALIGN(sizeof(*nh));
-
- rt = (void *)(data + off);
- rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
- rt->rtm_table = RT_TABLE_MAIN;
- rt->rtm_dst_len = alen << 3;
- rt->rtm_protocol = RTPROT_BOOT;
- rt->rtm_scope = RT_SCOPE_UNIVERSE;
- rt->rtm_type = RTN_UNICAST;
- off += NLMSG_ALIGN(sizeof(*rt));
-
- rta = (void *)(data + off);
- rta->rta_type = RTA_DST;
- rta->rta_len = RTA_LENGTH(alen);
- if (is_ipv4)
- memcpy(RTA_DATA(rta), &addr4, alen);
- else
- memcpy(RTA_DATA(rta), &addr6, alen);
- off += NLMSG_ALIGN(rta->rta_len);
-
- rta = (void *)(data + off);
- rta->rta_type = RTA_OIF;
- rta->rta_len = RTA_LENGTH(sizeof(int));
- *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
- off += NLMSG_ALIGN(rta->rta_len);
-
- /* MTU is a subtype in a metrics type */
- rta = (void *)(data + off);
- rta->rta_type = RTA_METRICS;
- rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
- off += NLMSG_ALIGN(rta->rta_len);
-
- /* now fill MTU subtype. Note that it fits within above rta_len */
- rta = (void *)(((char *) rta) + RTA_LENGTH(0));
- rta->rta_type = RTAX_MTU;
- rta->rta_len = RTA_LENGTH(sizeof(int));
- *((int *)(RTA_DATA(rta))) = mtu;
-
- nh->nlmsg_len = off;
-
- ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
- if (ret != off)
- error(1, errno, "send netlink: %uB != %uB\n", ret, off);
-
- if (close(fd))
- error(1, errno, "close netlink");
-
- fprintf(stderr, "route mtu (test): %u\n", mtu);
-}
-
static bool __send_one(int fd, struct msghdr *msg, int flags)
{
int ret;
@@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
/* Do not fragment these datagrams: only succeed if GSO works */
set_pmtu_discover(fdt, addr->sa_family == AF_INET);
- if (cfg_do_connectionless) {
- set_device_mtu(fdt, CONST_MTU_TEST);
+ if (cfg_do_connectionless)
run_all(fdt, fdr, addr, alen);
- }
if (cfg_do_connected) {
- set_device_mtu(fdt, CONST_MTU_TEST + 100);
- set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
-
if (connect(fdt, addr, alen))
error(1, errno, "connect");
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index fec24f584fe9..6c63178086b0 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -3,27 +3,56 @@
#
# Run a series of udpgso regression tests
+set -o errexit
+set -o nounset
+
+setup_loopback() {
+ ip addr add dev lo 10.0.0.1/32
+ ip addr add dev lo fd00::1/128 nodad noprefixroute
+}
+
+test_dev_mtu() {
+ setup_loopback
+ # Reduce loopback MTU
+ ip link set dev lo mtu 1500
+}
+
+test_route_mtu() {
+ setup_loopback
+ # Remove default local routes
+ ip route del local 10.0.0.1/32 table local dev lo
+ ip route del local fd00::1/128 table local dev lo
+ # Install local routes with reduced MTU
+ ip route add local 10.0.0.1/32 table local dev lo mtu 1500
+ ip route add local fd00::1/128 table local dev lo mtu 1500
+}
+
+if [ "$#" -gt 0 ]; then
+ "$1"
+ shift 2 # pop "test_*" arg and "--" delimiter
+ exec "$@"
+fi
+
echo "ipv4 cmsg"
-./in_netns.sh ./udpgso -4 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C
echo "ipv4 setsockopt"
-./in_netns.sh ./udpgso -4 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s
echo "ipv6 cmsg"
-./in_netns.sh ./udpgso -6 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C
echo "ipv6 setsockopt"
-./in_netns.sh ./udpgso -6 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s
echo "ipv4 connected"
-./in_netns.sh ./udpgso -4 -c
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c
-# blocked on 2nd loopback address
-# echo "ipv6 connected"
-# ./in_netns.sh ./udpgso -6 -c
+echo "ipv6 connected"
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c
echo "ipv4 msg_more"
-./in_netns.sh ./udpgso -4 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m
echo "ipv6 msg_more"
-./in_netns.sh ./udpgso -6 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index cacf6507f690..783ebce8c4de 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1576,7 +1576,7 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
ASSERT_EQ(0, ret);
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
/* Common tracer setup/teardown functions. */
@@ -1623,7 +1623,7 @@ void teardown_trace_fixture(struct __test_metadata *_metadata,
ASSERT_EQ(0, kill(tracer, SIGUSR1));
ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
if (WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
}
@@ -3088,8 +3088,7 @@ TEST(syscall_restart)
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
- : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
EXPECT_EQ(0, close(pipefd[0]));
@@ -3174,7 +3173,7 @@ TEST(syscall_restart)
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
if (WIFSIGNALED(status) || WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
TEST_SIGNAL(filter_flag_log, SIGSYS)
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c60acba951c2..db176fe7d0c3 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -8,6 +8,7 @@ CONFIG_VETH=y
#
# Core Netfilter Configuration
#
+CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index b53d12909962..b73bd255ea36 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -649,5 +649,408 @@
"teardown": [
"$TC actions flush action mirred"
]
+ },
+ {
+ "id": "456d",
+ "name": "Add mirred mirror to egress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 egress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 egress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2358",
+ "name": "Add mirred mirror to ingress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "ingress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "fdb1",
+ "name": "Add mirred redirect to egress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "20cc",
+ "name": "Add mirred redirect to ingress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "ingress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e739",
+ "name": "Try to add mirred action with both dev and block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21 dev $DEV1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC -j actions list action mirred",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2f47",
+ "name": "Try to add mirred action without specifying neither dev nor block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC -j actions list action mirred",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3188",
+ "name": "Replace mirred redirect to dev action with redirect to block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ],
+ [
+ "$TC actions add action mirred ingress redirect index 1 dev $DEV1",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred egress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "83cc",
+ "name": "Replace mirred redirect to block action with mirror to dev",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ],
+ [
+ "$TC actions add action mirred egress redirect index 1 blockid 21",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred ingress mirror index 1 dev lo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "ingress",
+ "to_dev": "lo",
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
index be293e7c6d18..3a537b2ec4c9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -77,7 +77,7 @@
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq quantum 9000",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
- "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*orphan_mask 1023 quantum 9000b",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*quantum 9000b",
"matchCount": "1",
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 2d603ef2e375..12da0a939e3e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -167,6 +167,7 @@
"plugins": {
"requires": "nsPlugin"
},
+ "dependsOn": "echo '' | jq",
"setup": [
"echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
"$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI",
@@ -192,6 +193,7 @@
"plugins": {
"requires": "nsPlugin"
},
+ "dependsOn": "echo '' | jq",
"setup": [
"echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
"$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2",
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index caeacc691587..ee349187636f 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -541,7 +541,7 @@ def test_runner(pm, args, filtered_tests):
message = pmtf.message
output = pmtf.output
res = TestResult(tidx['id'], tidx['name'])
- res.set_result(ResultState.skip)
+ res.set_result(ResultState.fail)
res.set_errormsg(pmtf.message)
res.set_failmsg(pmtf.output)
tsr.add_resultdata(res)
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index c53ede8b730d..cddff1772e10 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -63,5 +63,4 @@ try_modprobe sch_hfsc
try_modprobe sch_hhf
try_modprobe sch_htb
try_modprobe sch_teql
-./tdc.py -J`nproc` -c actions
-./tdc.py -J`nproc` -c qdisc
+./tdc.py -J`nproc`
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index ae2b33c21c45..554b290fefdc 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -33,8 +33,7 @@ void init_signals(void)
signal(SIGPIPE, SIG_IGN);
}
-/* Parse a CID in string representation */
-unsigned int parse_cid(const char *str)
+static unsigned int parse_uint(const char *str, const char *err_str)
{
char *endptr = NULL;
unsigned long n;
@@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str)
errno = 0;
n = strtoul(str, &endptr, 10);
if (errno || *endptr != '\0') {
- fprintf(stderr, "malformed CID \"%s\"\n", str);
+ fprintf(stderr, "malformed %s \"%s\"\n", err_str, str);
exit(EXIT_FAILURE);
}
return n;
}
+/* Parse a CID in string representation */
+unsigned int parse_cid(const char *str)
+{
+ return parse_uint(str, "CID");
+}
+
+/* Parse a port in string representation */
+unsigned int parse_port(const char *str)
+{
+ return parse_uint(str, "port");
+}
+
/* Wait for the remote to close the connection */
void vsock_wait_remote_close(int fd)
{
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index 03c88d0cb861..e95e62485959 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -12,10 +12,13 @@ enum test_mode {
TEST_MODE_SERVER
};
+#define DEFAULT_PEER_PORT 1234
+
/* Test runner options */
struct test_opts {
enum test_mode mode;
unsigned int peer_cid;
+ unsigned int peer_port;
};
/* A test case definition. Test functions must print failures to stderr and
@@ -35,6 +38,7 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
+unsigned int parse_port(const char *str);
int vsock_stream_connect(unsigned int cid, unsigned int port);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
index fa927ad16f8a..081e045f4696 100644
--- a/tools/testing/vsock/vsock_diag_test.c
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -39,6 +39,8 @@ static const char *sock_type_str(int type)
return "DGRAM";
case SOCK_STREAM:
return "STREAM";
+ case SOCK_SEQPACKET:
+ return "SEQPACKET";
default:
return "INVALID TYPE";
}
@@ -342,7 +344,7 @@ static void test_listen_socket_server(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = VMADDR_CID_ANY,
},
};
@@ -378,7 +380,7 @@ static void test_connect_client(const struct test_opts *opts)
LIST_HEAD(sockets);
struct vsock_stat *st;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -403,7 +405,7 @@ static void test_connect_server(const struct test_opts *opts)
LIST_HEAD(sockets);
int client_fd;
- client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (client_fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -462,6 +464,11 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "peer-port",
+ .has_arg = required_argument,
+ .val = 'q',
+ },
+ {
.name = "list",
.has_arg = no_argument,
.val = 'l',
@@ -481,7 +488,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n"
+ fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n"
"\n"
" Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -503,9 +510,11 @@ static void usage(void)
" --control-port <port> Server port to listen on/connect to\n"
" --mode client|server Server or client mode\n"
" --peer-cid <cid> CID of the other side\n"
+ " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n"
" --list List of tests that will be executed\n"
" --skip <test_id> Test ID to skip;\n"
- " use multiple --skip options to skip more tests\n"
+ " use multiple --skip options to skip more tests\n",
+ DEFAULT_PEER_PORT
);
exit(EXIT_FAILURE);
}
@@ -517,6 +526,7 @@ int main(int argc, char **argv)
struct test_opts opts = {
.mode = TEST_MODE_UNSET,
.peer_cid = VMADDR_CID_ANY,
+ .peer_port = DEFAULT_PEER_PORT,
};
init_signals();
@@ -544,6 +554,9 @@ int main(int argc, char **argv)
case 'p':
opts.peer_cid = parse_cid(optarg);
break;
+ case 'q':
+ opts.peer_port = parse_port(optarg);
+ break;
case 'P':
control_port = optarg;
break;
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 66246d81d654..f851f8961247 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -34,7 +34,7 @@ static void test_stream_connection_reset(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = opts->peer_cid,
},
};
@@ -70,7 +70,7 @@ static void test_stream_bind_only_client(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = opts->peer_cid,
},
};
@@ -112,7 +112,7 @@ static void test_stream_bind_only_server(const struct test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
- .svm_port = 1234,
+ .svm_port = opts->peer_port,
.svm_cid = VMADDR_CID_ANY,
},
};
@@ -138,7 +138,7 @@ static void test_stream_client_close_client(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -152,7 +152,7 @@ static void test_stream_client_close_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -173,7 +173,7 @@ static void test_stream_server_close_client(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -194,7 +194,7 @@ static void test_stream_server_close_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -215,7 +215,7 @@ static void test_stream_multiconn_client(const struct test_opts *opts)
int i;
for (i = 0; i < MULTICONN_NFDS; i++) {
- fds[i] = vsock_stream_connect(opts->peer_cid, 1234);
+ fds[i] = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fds[i] < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -239,7 +239,7 @@ static void test_stream_multiconn_server(const struct test_opts *opts)
int i;
for (i = 0; i < MULTICONN_NFDS; i++) {
- fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fds[i] = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fds[i] < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -267,9 +267,9 @@ static void test_msg_peek_client(const struct test_opts *opts,
int i;
if (seqpacket)
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
else
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
@@ -295,9 +295,9 @@ static void test_msg_peek_server(const struct test_opts *opts,
int fd;
if (seqpacket)
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
else
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
@@ -363,7 +363,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
int msg_count;
int fd;
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -434,7 +434,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
struct msghdr msg = {0};
struct iovec iov = {0};
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -505,7 +505,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
int fd;
char buf[MESSAGE_TRUNC_SZ];
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -524,7 +524,7 @@ static void test_seqpacket_msg_trunc_server(const struct test_opts *opts)
struct msghdr msg = {0};
struct iovec iov = {0};
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -575,7 +575,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts)
time_t read_enter_ns;
time_t read_overhead_ns;
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -620,7 +620,7 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -639,7 +639,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
len = sizeof(sock_buf_size);
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -671,7 +671,7 @@ static void test_seqpacket_bigmsg_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -692,7 +692,7 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt
unsigned char *buf2;
int buf_size = getpagesize() * 3;
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -732,7 +732,7 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
int i;
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -808,7 +808,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts)
int fd;
int i;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -839,7 +839,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
short poll_flags;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -906,9 +906,9 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream)
int fd;
if (stream)
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
else
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
@@ -941,9 +941,9 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream)
int fd;
if (stream)
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
else
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
@@ -986,7 +986,7 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1015,7 +1015,7 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
unsigned char buf[64];
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1108,7 +1108,7 @@ static void test_stream_shutwr_client(const struct test_opts *opts)
sigaction(SIGPIPE, &act, NULL);
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1130,7 +1130,7 @@ static void test_stream_shutwr_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1151,7 +1151,7 @@ static void test_stream_shutrd_client(const struct test_opts *opts)
sigaction(SIGPIPE, &act, NULL);
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1170,7 +1170,7 @@ static void test_stream_shutrd_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1193,7 +1193,7 @@ static void test_double_bind_connect_server(const struct test_opts *opts)
struct sockaddr_vm sa_client;
socklen_t socklen_client = sizeof(sa_client);
- listen_fd = vsock_stream_listen(VMADDR_CID_ANY, 1234);
+ listen_fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
for (i = 0; i < 2; i++) {
control_writeln("LISTENING");
@@ -1226,7 +1226,13 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
/* Wait until server is ready to accept a new connection */
control_expectln("LISTENING");
- client_fd = vsock_bind_connect(opts->peer_cid, 1234, 4321, SOCK_STREAM);
+ /* We use 'peer_port + 1' as "some" port for the 'bind()'
+ * call. It is safe for overflow, but must be considered,
+ * when running multiple test applications simultaneously
+ * where 'peer-port' argument differs by 1.
+ */
+ client_fd = vsock_bind_connect(opts->peer_cid, opts->peer_port,
+ opts->peer_port + 1, SOCK_STREAM);
close(client_fd);
}
@@ -1246,7 +1252,7 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt
void *buf;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -1282,7 +1288,7 @@ static void test_stream_credit_update_test(const struct test_opts *opts,
void *buf;
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -1543,6 +1549,11 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "peer-port",
+ .has_arg = required_argument,
+ .val = 'q',
+ },
+ {
.name = "list",
.has_arg = no_argument,
.val = 'l',
@@ -1562,7 +1573,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n"
+ fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n"
"\n"
" Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -1577,6 +1588,9 @@ static void usage(void)
"connect to.\n"
"\n"
"The CID of the other side must be given with --peer-cid=<cid>.\n"
+ "During the test, two AF_VSOCK ports will be used: the port\n"
+ "specified with --peer-port=<port> (or the default port)\n"
+ "and the next one.\n"
"\n"
"Options:\n"
" --help This help message\n"
@@ -1584,9 +1598,11 @@ static void usage(void)
" --control-port <port> Server port to listen on/connect to\n"
" --mode client|server Server or client mode\n"
" --peer-cid <cid> CID of the other side\n"
+ " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n"
" --list List of tests that will be executed\n"
" --skip <test_id> Test ID to skip;\n"
- " use multiple --skip options to skip more tests\n"
+ " use multiple --skip options to skip more tests\n",
+ DEFAULT_PEER_PORT
);
exit(EXIT_FAILURE);
}
@@ -1598,6 +1614,7 @@ int main(int argc, char **argv)
struct test_opts opts = {
.mode = TEST_MODE_UNSET,
.peer_cid = VMADDR_CID_ANY,
+ .peer_port = DEFAULT_PEER_PORT,
};
srand(time(NULL));
@@ -1626,6 +1643,9 @@ int main(int argc, char **argv)
case 'p':
opts.peer_cid = parse_cid(optarg);
break;
+ case 'q':
+ opts.peer_port = parse_port(optarg);
+ break;
case 'P':
control_port = optarg;
break;
diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c
index a16ff76484e6..04c376b6937f 100644
--- a/tools/testing/vsock/vsock_test_zerocopy.c
+++ b/tools/testing/vsock/vsock_test_zerocopy.c
@@ -152,9 +152,9 @@ static void test_client(const struct test_opts *opts,
int fd;
if (sock_seqpacket)
- fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+ fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port);
else
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
@@ -248,9 +248,9 @@ static void test_server(const struct test_opts *opts,
int fd;
if (sock_seqpacket)
- fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
else
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
@@ -323,7 +323,7 @@ void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts)
ssize_t res;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -347,7 +347,7 @@ void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts)
{
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c
index d976d35f0ba9..6c3e6f70c457 100644
--- a/tools/testing/vsock/vsock_uring_test.c
+++ b/tools/testing/vsock/vsock_uring_test.c
@@ -66,7 +66,7 @@ static void vsock_io_uring_client(const struct test_opts *opts,
struct msghdr msg;
int fd;
- fd = vsock_stream_connect(opts->peer_cid, 1234);
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -120,7 +120,7 @@ static void vsock_io_uring_server(const struct test_opts *opts,
void *data;
int fd;
- fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -248,6 +248,11 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "peer-port",
+ .has_arg = required_argument,
+ .val = 'q',
+ },
+ {
.name = "help",
.has_arg = no_argument,
.val = '?',
@@ -257,7 +262,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+ fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>]\n"
"\n"
" Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -271,6 +276,8 @@ static void usage(void)
" --control-port <port> Server port to listen on/connect to\n"
" --mode client|server Server or client mode\n"
" --peer-cid <cid> CID of the other side\n"
+ " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n",
+ DEFAULT_PEER_PORT
);
exit(EXIT_FAILURE);
}
@@ -282,6 +289,7 @@ int main(int argc, char **argv)
struct test_opts opts = {
.mode = TEST_MODE_UNSET,
.peer_cid = VMADDR_CID_ANY,
+ .peer_port = DEFAULT_PEER_PORT,
};
init_signals();
@@ -309,6 +317,9 @@ int main(int argc, char **argv)
case 'p':
opts.peer_cid = parse_cid(optarg);
break;
+ case 'q':
+ opts.peer_port = parse_port(optarg);
+ break;
case 'P':
control_port = optarg;
break;
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 9934d48d9a55..7e47b281c442 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
*.d
virtio_test
+vhost_net_test
vringh_test
virtio-trace/trace-agent
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index d128925980e0..e25e99c1c3b7 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
all: test mod
-test: virtio_test vringh_test
+test: virtio_test vringh_test vhost_net_test
virtio_test: virtio_ring.o virtio_test.o
vringh_test: vringh_test.o vringh.o virtio_ring.o
+vhost_net_test: virtio_ring.o vhost_net_test.o
try-run = $(shell set -e; \
if ($(1)) >/dev/null 2>&1; \
@@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean
.PHONY: all test mod clean vhost oot oot-clean oot-build
clean:
- ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
- vhost_test/Module.symvers vhost_test/modules.order *.d
+ ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \
+ vhost_test/.*.cmd vhost_test/Module.symvers \
+ vhost_test/modules.order *.d
-include *.d
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 2a8a70e2a950..42a564f22f2d 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VIRTIO_CONFIG_H
+#define LINUX_VIRTIO_CONFIG_H
#include <linux/virtio_byteorder.h>
#include <linux/virtio.h>
#include <uapi/linux/virtio_config.h>
@@ -95,3 +97,5 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
{
return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
}
+
+#endif
diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c
new file mode 100644
index 000000000000..389d99a6d7c7
--- /dev/null
+++ b/tools/virtio/vhost_net_test.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <limits.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <linux/vhost.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/in.h>
+#include <linux/if_packet.h>
+#include <linux/virtio_net.h>
+#include <netinet/ether.h>
+
+#define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf)
+#define TEST_BUF_LEN 256
+#define TEST_PTYPE ETH_P_LOOPBACK
+#define DESC_NUM 256
+
+/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+struct vq_info {
+ int kick;
+ int call;
+ int idx;
+ long started;
+ long completed;
+ struct pollfd fds;
+ void *ring;
+ /* copy used for control */
+ struct vring vring;
+ struct virtqueue *vq;
+};
+
+struct vdev_info {
+ struct virtio_device vdev;
+ int control;
+ struct vq_info vqs[2];
+ int nvqs;
+ void *buf;
+ size_t buf_size;
+ char *test_buf;
+ char *res_buf;
+ struct vhost_memory *mem;
+ int sock;
+ int ifindex;
+ unsigned char mac[ETHER_ADDR_LEN];
+};
+
+static int tun_alloc(struct vdev_info *dev, char *tun_name)
+{
+ struct ifreq ifr;
+ int len = HDR_LEN;
+ int fd, e;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ perror("Cannot open /dev/net/tun");
+ return fd;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
+
+ e = ioctl(fd, TUNSETIFF, &ifr);
+ if (e < 0) {
+ perror("ioctl[TUNSETIFF]");
+ close(fd);
+ return e;
+ }
+
+ e = ioctl(fd, TUNSETVNETHDRSZ, &len);
+ if (e < 0) {
+ perror("ioctl[TUNSETVNETHDRSZ]");
+ close(fd);
+ return e;
+ }
+
+ e = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (e < 0) {
+ perror("ioctl[SIOCGIFHWADDR]");
+ close(fd);
+ return e;
+ }
+
+ memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+ return fd;
+}
+
+static void vdev_create_socket(struct vdev_info *dev, char *tun_name)
+{
+ struct ifreq ifr;
+
+ dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE));
+ assert(dev->sock != -1);
+
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
+ assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0);
+
+ dev->ifindex = ifr.ifr_ifindex;
+
+ /* Set the flags that bring the device up */
+ assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0);
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+ assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0);
+}
+
+static void vdev_send_packet(struct vdev_info *dev)
+{
+ char *sendbuf = dev->test_buf + HDR_LEN;
+ struct sockaddr_ll saddrll = {0};
+ int sockfd = dev->sock;
+ int ret;
+
+ saddrll.sll_family = PF_PACKET;
+ saddrll.sll_ifindex = dev->ifindex;
+ saddrll.sll_halen = ETH_ALEN;
+ saddrll.sll_protocol = htons(TEST_PTYPE);
+
+ ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0,
+ (struct sockaddr *)&saddrll,
+ sizeof(struct sockaddr_ll));
+ assert(ret >= 0);
+}
+
+static bool vq_notify(struct virtqueue *vq)
+{
+ struct vq_info *info = vq->priv;
+ unsigned long long v = 1;
+ int r;
+
+ r = write(info->kick, &v, sizeof(v));
+ assert(r == sizeof(v));
+
+ return true;
+}
+
+static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+ struct vhost_vring_addr addr = {
+ .index = info->idx,
+ .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+ .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+ .used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+ };
+ struct vhost_vring_state state = { .index = info->idx };
+ struct vhost_vring_file file = { .index = info->idx };
+ int r;
+
+ state.num = info->vring.num;
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ assert(r >= 0);
+
+ state.num = 0;
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ assert(r >= 0);
+
+ r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ assert(r >= 0);
+
+ file.fd = info->kick;
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ assert(r >= 0);
+}
+
+static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
+{
+ if (info->vq)
+ vring_del_virtqueue(info->vq);
+
+ memset(info->ring, 0, vring_size(num, 4096));
+ vring_init(&info->vring, num, info->ring, 4096);
+ info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
+ info->ring, vq_notify, NULL, "test");
+ assert(info->vq);
+ info->vq->priv = info;
+}
+
+static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd)
+{
+ struct vhost_vring_file backend = { .index = idx, .fd = fd };
+ struct vq_info *info = &dev->vqs[idx];
+ int r;
+
+ info->idx = idx;
+ info->kick = eventfd(0, EFD_NONBLOCK);
+ r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+ assert(r >= 0);
+ vq_reset(info, num, &dev->vdev);
+ vhost_vq_setup(dev, info);
+
+ r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend);
+ assert(!r);
+}
+
+static void vdev_info_init(struct vdev_info *dev, unsigned long long features)
+{
+ struct ether_header *eh;
+ int i, r;
+
+ dev->vdev.features = features;
+ INIT_LIST_HEAD(&dev->vdev.vqs);
+ spin_lock_init(&dev->vdev.vqs_list_lock);
+
+ dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2;
+ dev->buf = malloc(dev->buf_size);
+ assert(dev->buf);
+ dev->test_buf = dev->buf;
+ dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN;
+
+ memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN);
+ eh = (struct ether_header *)(dev->test_buf + HDR_LEN);
+ eh->ether_type = htons(TEST_PTYPE);
+ memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN);
+ memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN);
+
+ for (i = sizeof(*eh); i < TEST_BUF_LEN; i++)
+ dev->test_buf[i + HDR_LEN] = (char)i;
+
+ dev->control = open("/dev/vhost-net", O_RDWR);
+ assert(dev->control >= 0);
+
+ r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+ assert(r >= 0);
+
+ dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+ sizeof(dev->mem->regions[0]));
+ assert(dev->mem);
+ memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+ sizeof(dev->mem->regions[0]));
+ dev->mem->nregions = 1;
+ dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+ dev->mem->regions[0].userspace_addr = (long)dev->buf;
+ dev->mem->regions[0].memory_size = dev->buf_size;
+
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+
+ r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ assert(r >= 0);
+
+ dev->nvqs = 2;
+}
+
+static void wait_for_interrupt(struct vq_info *vq)
+{
+ unsigned long long val;
+
+ poll(&vq->fds, 1, 100);
+
+ if (vq->fds.revents & POLLIN)
+ read(vq->fds.fd, &val, sizeof(val));
+}
+
+static void verify_res_buf(char *res_buf)
+{
+ int i;
+
+ for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++)
+ assert(res_buf[i] == (char)i);
+}
+
+static void run_tx_test(struct vdev_info *dev, struct vq_info *vq,
+ bool delayed, int bufs)
+{
+ long long spurious = 0;
+ struct scatterlist sl;
+ unsigned int len;
+ int r;
+
+ for (;;) {
+ long started_before = vq->started;
+ long completed_before = vq->completed;
+
+ virtqueue_disable_cb(vq->vq);
+ do {
+ while (vq->started < bufs &&
+ (vq->started - vq->completed) < 1) {
+ sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN);
+ r = virtqueue_add_outbuf(vq->vq, &sl, 1,
+ dev->test_buf + vq->started,
+ GFP_ATOMIC);
+ if (unlikely(r != 0))
+ break;
+
+ ++vq->started;
+
+ if (unlikely(!virtqueue_kick(vq->vq))) {
+ r = -1;
+ break;
+ }
+ }
+
+ if (vq->started >= bufs)
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ while (virtqueue_get_buf(vq->vq, &len)) {
+ int n;
+
+ n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL);
+ assert(n == TEST_BUF_LEN);
+ verify_res_buf(dev->res_buf);
+
+ ++vq->completed;
+ r = 0;
+ }
+ } while (r == 0);
+
+ if (vq->completed == completed_before && vq->started == started_before)
+ ++spurious;
+
+ assert(vq->completed <= bufs);
+ assert(vq->started <= bufs);
+ if (vq->completed == bufs)
+ break;
+
+ if (delayed) {
+ if (virtqueue_enable_cb_delayed(vq->vq))
+ wait_for_interrupt(vq);
+ } else {
+ if (virtqueue_enable_cb(vq->vq))
+ wait_for_interrupt(vq);
+ }
+ }
+ printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
+ spurious, vq->started, vq->completed);
+}
+
+static void run_rx_test(struct vdev_info *dev, struct vq_info *vq,
+ bool delayed, int bufs)
+{
+ long long spurious = 0;
+ struct scatterlist sl;
+ unsigned int len;
+ int r;
+
+ for (;;) {
+ long started_before = vq->started;
+ long completed_before = vq->completed;
+
+ do {
+ while (vq->started < bufs &&
+ (vq->started - vq->completed) < 1) {
+ sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN);
+
+ r = virtqueue_add_inbuf(vq->vq, &sl, 1,
+ dev->res_buf + vq->started,
+ GFP_ATOMIC);
+ if (unlikely(r != 0))
+ break;
+
+ ++vq->started;
+
+ vdev_send_packet(dev);
+
+ if (unlikely(!virtqueue_kick(vq->vq))) {
+ r = -1;
+ break;
+ }
+ }
+
+ if (vq->started >= bufs)
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ while (virtqueue_get_buf(vq->vq, &len)) {
+ struct ether_header *eh;
+
+ eh = (struct ether_header *)(dev->res_buf + HDR_LEN);
+
+ /* tun netdev is up and running, only handle the
+ * TEST_PTYPE packet.
+ */
+ if (eh->ether_type == htons(TEST_PTYPE)) {
+ assert(len == TEST_BUF_LEN + HDR_LEN);
+ verify_res_buf(dev->res_buf + HDR_LEN);
+ }
+
+ ++vq->completed;
+ r = 0;
+ }
+ } while (r == 0);
+
+ if (vq->completed == completed_before && vq->started == started_before)
+ ++spurious;
+
+ assert(vq->completed <= bufs);
+ assert(vq->started <= bufs);
+ if (vq->completed == bufs)
+ break;
+ }
+
+ printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
+ spurious, vq->started, vq->completed);
+}
+
+static const char optstring[] = "h";
+static const struct option longopts[] = {
+ {
+ .name = "help",
+ .val = 'h',
+ },
+ {
+ .name = "event-idx",
+ .val = 'E',
+ },
+ {
+ .name = "no-event-idx",
+ .val = 'e',
+ },
+ {
+ .name = "indirect",
+ .val = 'I',
+ },
+ {
+ .name = "no-indirect",
+ .val = 'i',
+ },
+ {
+ .name = "virtio-1",
+ .val = '1',
+ },
+ {
+ .name = "no-virtio-1",
+ .val = '0',
+ },
+ {
+ .name = "delayed-interrupt",
+ .val = 'D',
+ },
+ {
+ .name = "no-delayed-interrupt",
+ .val = 'd',
+ },
+ {
+ .name = "buf-num",
+ .val = 'n',
+ .has_arg = required_argument,
+ },
+ {
+ .name = "batch",
+ .val = 'b',
+ .has_arg = required_argument,
+ },
+ {
+ }
+};
+
+static void help(int status)
+{
+ fprintf(stderr, "Usage: vhost_net_test [--help]"
+ " [--no-indirect]"
+ " [--no-event-idx]"
+ " [--no-virtio-1]"
+ " [--delayed-interrupt]"
+ " [--buf-num]"
+ "\n");
+
+ exit(status);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
+ char tun_name[IFNAMSIZ];
+ long nbufs = 0x100000;
+ struct vdev_info dev;
+ bool delayed = false;
+ int o, fd;
+
+ for (;;) {
+ o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help(2);
+ case 'e':
+ features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+ break;
+ case 'h':
+ help(0);
+ case 'i':
+ features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+ break;
+ case '0':
+ features &= ~(1ULL << VIRTIO_F_VERSION_1);
+ break;
+ case 'D':
+ delayed = true;
+ break;
+ case 'n':
+ nbufs = strtol(optarg, NULL, 10);
+ assert(nbufs > 0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+done:
+ memset(&dev, 0, sizeof(dev));
+ snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid());
+
+ fd = tun_alloc(&dev, tun_name);
+ assert(fd >= 0);
+
+ vdev_info_init(&dev, features);
+ vq_info_add(&dev, 0, DESC_NUM, fd);
+ vq_info_add(&dev, 1, DESC_NUM, fd);
+ vdev_create_socket(&dev, tun_name);
+
+ run_rx_test(&dev, &dev.vqs[0], delayed, nbufs);
+ run_tx_test(&dev, &dev.vqs[1], delayed, nbufs);
+
+ return 0;
+}