summaryrefslogtreecommitdiff
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/Makefile9
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c16
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c140
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c77
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c12
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c63
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c12
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c8
-rw-r--r--tools/testing/selftests/arm64/mte/check_hugetlb_options.c10
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c6
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c896
-rw-r--r--tools/testing/selftests/arm64/mte/check_prctl.c29
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c10
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c4
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c84
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h9
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h8
-rw-r--r--tools/testing/selftests/bpf/DENYLIST1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch641
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_atomic.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h2
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c21
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/config4
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c118
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c617
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c458
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c127
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c13
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h22
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_mprog.c30
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c158
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c16
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c174
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_noreturns.c15
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c277
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c229
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c14
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c5
-rw-r--r--tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c60
-rw-r--r--tools/testing/selftests/bpf/progs/security_bpf_map.c69
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c56
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c36
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c79
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c33
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c87
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c23
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c37
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sig_in_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_change_tail.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c8
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_failure.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_and.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c106
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c98
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c360
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c11
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c25
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_overflow.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c128
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_in_map.c118
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c70
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_private_stack.c89
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ref_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_tailcall.c31
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c233
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c38
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_map.sh398
-rw-r--r--tools/testing/selftests/bpf/test_loader.c30
-rw-r--r--tools/testing/selftests/bpf/test_maps.c4
-rw-r--r--tools/testing/selftests/bpf/test_progs.h28
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c94
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c24
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c33
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c10
-rw-r--r--tools/testing/selftests/bpf/veristat.c610
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh9
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c56
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h1
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c41
-rw-r--r--tools/testing/selftests/coredump/Makefile2
-rw-r--r--tools/testing/selftests/coredump/config3
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c1697
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh4
-rw-r--r--tools/testing/selftests/drivers/net/Makefile3
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py465
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py98
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py17
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c9
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_api.py476
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py8
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py101
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py14
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py25
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh165
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py4
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c35
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh57
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh52
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh30
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh23
-rwxr-xr-xtools/testing/selftests/drivers/net/netpoll_basic.py396
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py2
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py45
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py658
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c38
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc30
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc28
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc2
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c113
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h8
-rw-r--r--tools/testing/selftests/futex/include/futextest.h11
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/iommu/iommufd.c541
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c15
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h89
-rw-r--r--tools/testing/selftests/ipc/msgque.c47
-rw-r--r--tools/testing/selftests/kexec/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm4
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c7
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c7
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c23
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c4
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c330
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c203
-rw-r--r--tools/testing/selftests/kvm/arm64/mmio_abort.c159
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c14
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c259
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c12
-rw-r--r--tools/testing/selftests/kvm/config1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h10
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h72
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h6
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c135
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c51
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c4
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c10
-rw-r--r--tools/testing/selftests/kvm/x86/aperfmperf_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c2
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c21
-rw-r--r--tools/testing/selftests/landlock/audit.h7
-rw-r--r--tools/testing/selftests/landlock/audit_test.c1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c40
-rw-r--r--tools/testing/selftests/lkdtm/config2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c3
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c125
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c217
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rw-r--r--tools/testing/selftests/net/config11
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh69
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh771
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/lib.sh35
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py2
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py7
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py39
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py5
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c621
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rw-r--r--tools/testing/selftests/net/mptcp/config2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rw-r--r--tools/testing/selftests/net/netfilter/config7
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh45
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh5
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py127
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh92
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh50
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh2
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c2
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh4
-rw-r--r--tools/testing/selftests/nolibc/Makefile343
-rw-r--r--tools/testing/selftests/nolibc/Makefile.include10
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc383
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c53
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh14
-rw-r--r--tools/testing/selftests/pidfd/.gitignore2
-rw-r--r--tools/testing/selftests/pidfd/Makefile5
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h15
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c60
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c69
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c132
-rw-r--r--tools/testing/selftests/ptp/testptp.c11
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh15
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mktestid.sh29
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh78
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-L10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot3
-rw-r--r--tools/testing/selftests/sched_ext/exit.c8
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c140
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh2
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json5
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json254
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json81
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json36
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh6
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c15
-rw-r--r--tools/testing/selftests/ublk/file_backed.c32
-rw-r--r--tools/testing/selftests/ublk/kublk.c140
-rw-r--r--tools/testing/selftests/ublk/kublk.h135
-rw-r--r--tools/testing/selftests/ublk/null.c32
-rw-r--r--tools/testing/selftests/ublk/stripe.c33
-rw-r--r--tools/testing/selftests/ublk/utils.h70
-rw-r--r--tools/testing/selftests/vDSO/Makefile2
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h2
l---------[-rw-r--r--]tools/testing/selftests/vDSO/vdso_standalone_test_x86.c59
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c3
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c1
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c10
-rw-r--r--tools/testing/selftests/vsock/.gitignore2
-rw-r--r--tools/testing/selftests/vsock/Makefile17
-rw-r--r--tools/testing/selftests/vsock/config111
-rw-r--r--tools/testing/selftests/vsock/settings1
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh487
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config4
306 files changed, 19595 insertions, 2555 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 339b31e6a6b5..030da61dbff3 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -77,6 +77,7 @@ TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
+TARGETS += nolibc
TARGETS += nsfs
TARGETS += pci_endpoint
TARGETS += pcie_bwctrl
@@ -293,6 +294,14 @@ ifdef INSTALL_PATH
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
-C $$TARGET emit_tests >> $(TEST_LIST); \
done;
+ @VERSION=$$(git describe HEAD 2>/dev/null); \
+ if [ -n "$$VERSION" ]; then \
+ echo "$$VERSION" > $(INSTALL_PATH)/VERSION; \
+ printf "Version saved to $(INSTALL_PATH)/VERSION\n"; \
+ else \
+ printf "Unable to get version from git describe\n"; \
+ fi
+ @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**"
else
$(error Error: set INSTALL_PATH to use install)
endif
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
index a6d30c620908..483488f8c2ad 100644
--- a/tools/testing/selftests/arm64/abi/Makefile
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
$(OUTPUT)/tpidr2: tpidr2.c
$(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
-static -include ../../../../include/nolibc/nolibc.h \
- -ffreestanding -Wall $^ -o $@ -lgcc
+ -I../.. -ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 35f521e5f41c..002ec38a8bbb 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -21,6 +21,10 @@
#define TESTS_PER_HWCAP 3
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
/*
* Function expected to generate exception when the feature is not
* supported and return when it is supported. If the specific exception
@@ -1098,6 +1102,18 @@ static const struct hwcap_data {
.sigill_fn = hbc_sigill,
.sigill_reliable = true,
},
+ {
+ .name = "MTE_FAR",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_FAR,
+ .cpuinfo = "mtefar",
+ },
+ {
+ .name = "MTE_STOREONLY",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_STORE_ONLY,
+ .cpuinfo = "mtestoreonly",
+ },
};
typedef void (*sighandler_fn)(int, siginfo_t *, void *);
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index eb19dcc37a75..f58a9f89b952 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -3,31 +3,12 @@
#include <linux/sched.h>
#include <linux/wait.h>
+#include "kselftest.h"
+
#define SYS_TPIDR2 "S3_3_C13_C0_5"
#define EXPECTED_TESTS 5
-static void putstr(const char *str)
-{
- write(1, str, strlen(str));
-}
-
-static void putnum(unsigned int num)
-{
- char c;
-
- if (num / 10)
- putnum(num / 10);
-
- c = '0' + (num % 10);
- write(1, &c, 1);
-}
-
-static int tests_run;
-static int tests_passed;
-static int tests_failed;
-static int tests_skipped;
-
static void set_tpidr2(uint64_t val)
{
asm volatile (
@@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void)
return val;
}
-static void print_summary(void)
-{
- if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
- putstr("# UNEXPECTED TEST COUNT: ");
-
- putstr("# Totals: pass:");
- putnum(tests_passed);
- putstr(" fail:");
- putnum(tests_failed);
- putstr(" xfail:0 xpass:0 skip:");
- putnum(tests_skipped);
- putstr(" error:0\n");
-}
-
/* Processes should start with TPIDR2 == 0 */
static int default_value(void)
{
@@ -105,9 +72,8 @@ static int write_fork_read(void)
if (newpid == 0) {
/* In child */
if (get_tpidr2() != oldpid) {
- putstr("# TPIDR2 changed in child: ");
- putnum(get_tpidr2());
- putstr("\n");
+ ksft_print_msg("TPIDR2 changed in child: %llx\n",
+ get_tpidr2());
exit(0);
}
@@ -115,14 +81,12 @@ static int write_fork_read(void)
if (get_tpidr2() == getpid()) {
exit(1);
} else {
- putstr("# Failed to set TPIDR2 in child\n");
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
exit(0);
}
}
if (newpid < 0) {
- putstr("# fork() failed: -");
- putnum(-newpid);
- putstr("\n");
+ ksft_print_msg("fork() failed: %d\n", newpid);
return 0;
}
@@ -132,23 +96,22 @@ static int write_fork_read(void)
if (waiting < 0) {
if (errno == EINTR)
continue;
- putstr("# waitpid() failed: ");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("waitpid() failed: %d\n", errno);
return 0;
}
if (waiting != newpid) {
- putstr("# waitpid() returned wrong PID\n");
+ ksft_print_msg("waitpid() returned wrong PID: %d != %d\n",
+ waiting, newpid);
return 0;
}
if (!WIFEXITED(status)) {
- putstr("# child did not exit\n");
+ ksft_print_msg("child did not exit\n");
return 0;
}
if (getpid() != get_tpidr2()) {
- putstr("# TPIDR2 corrupted in parent\n");
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
return 0;
}
@@ -188,35 +151,32 @@ static int write_clone_read(void)
stack = malloc(__STACK_SIZE);
if (!stack) {
- putstr("# malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
return 0;
}
ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE,
&parent_tid, 0, &child_tid);
if (ret == -1) {
- putstr("# clone() failed\n");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("clone() failed: %d\n", errno);
return 0;
}
if (ret == 0) {
/* In child */
if (get_tpidr2() != 0) {
- putstr("# TPIDR2 non-zero in child: ");
- putnum(get_tpidr2());
- putstr("\n");
+ ksft_print_msg("TPIDR2 non-zero in child: %llx\n",
+ get_tpidr2());
exit(0);
}
if (gettid() == 0)
- putstr("# Child TID==0\n");
+ ksft_print_msg("Child TID==0\n");
set_tpidr2(gettid());
if (get_tpidr2() == gettid()) {
exit(1);
} else {
- putstr("# Failed to set TPIDR2 in child\n");
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
exit(0);
}
}
@@ -227,25 +187,22 @@ static int write_clone_read(void)
if (waiting < 0) {
if (errno == EINTR)
continue;
- putstr("# wait4() failed: ");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("wait4() failed: %d\n", errno);
return 0;
}
if (waiting != ret) {
- putstr("# wait4() returned wrong PID ");
- putnum(waiting);
- putstr("\n");
+ ksft_print_msg("wait4() returned wrong PID %d\n",
+ waiting);
return 0;
}
if (!WIFEXITED(status)) {
- putstr("# child did not exit\n");
+ ksft_print_msg("child did not exit\n");
return 0;
}
if (parent != get_tpidr2()) {
- putstr("# TPIDR2 corrupted in parent\n");
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
return 0;
}
@@ -253,35 +210,14 @@ static int write_clone_read(void)
}
}
-#define run_test(name) \
- if (name()) { \
- tests_passed++; \
- } else { \
- tests_failed++; \
- putstr("not "); \
- } \
- putstr("ok "); \
- putnum(++tests_run); \
- putstr(" " #name "\n");
-
-#define skip_test(name) \
- tests_skipped++; \
- putstr("ok "); \
- putnum(++tests_run); \
- putstr(" # SKIP " #name "\n");
-
int main(int argc, char **argv)
{
int ret;
- putstr("TAP version 13\n");
- putstr("1..");
- putnum(EXPECTED_TESTS);
- putstr("\n");
+ ksft_print_header();
+ ksft_set_plan(5);
- putstr("# PID: ");
- putnum(getpid());
- putstr("\n");
+ ksft_print_msg("PID: %d\n", getpid());
/*
* This test is run with nolibc which doesn't support hwcap and
@@ -290,23 +226,21 @@ int main(int argc, char **argv)
*/
ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
if (ret >= 0) {
- run_test(default_value);
- run_test(write_read);
- run_test(write_sleep_read);
- run_test(write_fork_read);
- run_test(write_clone_read);
+ ksft_test_result(default_value(), "default_value\n");
+ ksft_test_result(write_read, "write_read\n");
+ ksft_test_result(write_sleep_read, "write_sleep_read\n");
+ ksft_test_result(write_fork_read, "write_fork_read\n");
+ ksft_test_result(write_clone_read, "write_clone_read\n");
} else {
- putstr("# SME support not present\n");
+ ksft_print_msg("SME support not present\n");
- skip_test(default_value);
- skip_test(write_read);
- skip_test(write_sleep_read);
- skip_test(write_fork_read);
- skip_test(write_clone_read);
+ ksft_test_result_skip("default_value\n");
+ ksft_test_result_skip("write_read\n");
+ ksft_test_result_skip("write_sleep_read\n");
+ ksft_test_result_skip("write_fork_read\n");
+ ksft_test_result_skip("write_clone_read\n");
}
- print_summary();
-
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 191c47ca0ed8..124bc883365e 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -1061,11 +1061,31 @@ static bool sve_write_supported(struct test_config *config)
if (config->sme_vl_in != config->sme_vl_expected) {
return false;
}
+
+ if (!sve_supported())
+ return false;
}
return true;
}
+static bool sve_write_fpsimd_supported(struct test_config *config)
+{
+ if (!sve_supported())
+ return false;
+
+ if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
+ return false;
+
+ if (config->svcr_expected & SVCR_SM)
+ return false;
+
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+
+ return true;
+}
+
static void fpsimd_write_expected(struct test_config *config)
{
int vl;
@@ -1134,6 +1154,9 @@ static void sve_write_expected(struct test_config *config)
int vl = vl_expected(config);
int sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+ if (!vl)
+ return;
+
fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
@@ -1152,7 +1175,7 @@ static void sve_write_expected(struct test_config *config)
}
}
-static void sve_write(pid_t child, struct test_config *config)
+static void sve_write_sve(pid_t child, struct test_config *config)
{
struct user_sve_header *sve;
struct iovec iov;
@@ -1161,6 +1184,9 @@ static void sve_write(pid_t child, struct test_config *config)
vl = vl_expected(config);
vq = __sve_vq_from_vl(vl);
+ if (!vl)
+ return;
+
iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
@@ -1195,6 +1221,45 @@ static void sve_write(pid_t child, struct test_config *config)
free(iov.iov_base);
}
+static void sve_write_fpsimd(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vl, vq;
+
+ vl = vl_expected(config);
+ vq = __sve_vq_from_vl(vl);
+
+ if (!vl)
+ return;
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq,
+ SVE_PT_REGS_FPSIMD);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ sve = iov.iov_base;
+ sve->size = iov.iov_len;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->vl = vl;
+
+ fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET;
+ memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected));
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write SVE: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
static bool za_write_supported(struct test_config *config)
{
if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
@@ -1386,7 +1451,13 @@ static struct test_definition sve_test_defs[] = {
.name = "SVE write",
.supported = sve_write_supported,
.set_expected_values = sve_write_expected,
- .modify_values = sve_write,
+ .modify_values = sve_write_sve,
+ },
+ {
+ .name = "SVE write FPSIMD format",
+ .supported = sve_write_fpsimd_supported,
+ .set_expected_values = fpsimd_write_expected,
+ .modify_values = sve_write_fpsimd,
},
};
@@ -1607,7 +1678,7 @@ int main(void)
* Run the test set if there is no SVE or SME, with those we
* have to pick a VL for each run.
*/
- if (!sve_supported()) {
+ if (!sve_supported() && !sme_supported()) {
test_config.sve_vl_in = 0;
test_config.sve_vl_expected = 0;
test_config.sme_vl_in = 0;
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 577b6e05e860..b22303778fb0 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -170,7 +170,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type)
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
- sve.flags = SVE_PT_VL_INHERIT;
+ sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE;
ret = set_sve(child, type, &sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n",
@@ -235,6 +235,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
/* Set the VL by doing a set with no register payload */
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
sve.vl = vl;
ret = set_sve(child, type, &sve);
if (ret != 0) {
@@ -253,7 +254,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
return;
}
- ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n",
+ ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n",
type->name, vl);
free(new_sve);
@@ -301,8 +302,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
p[j] = j;
}
+ /* This should only succeed for SVE */
ret = set_sve(child, type, sve);
- ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+ ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0),
+ "%s FPSIMD set via SVE: %d\n",
type->name, ret);
if (ret)
goto out;
@@ -750,9 +753,6 @@ int main(void)
ksft_print_header();
ksft_set_plan(EXPECTED_TESTS);
- if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available\n");
-
child = fork();
if (!child)
return do_child();
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
index 3fb9742342a3..54f9c888249d 100644
--- a/tools/testing/selftests/arm64/gcs/basic-gcs.c
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -298,6 +298,68 @@ out:
return pass;
}
+/* A vfork()ed process can run and exit */
+static bool test_vfork(void)
+{
+ unsigned long child_mode;
+ int ret, status;
+ pid_t pid;
+ bool pass = true;
+
+ pid = vfork();
+ if (pid == -1) {
+ ksft_print_msg("vfork() failed: %d\n", errno);
+ pass = false;
+ goto out;
+ }
+ if (pid == 0) {
+ /*
+ * In child, make sure we can call a function, read
+ * the GCS pointer and status and then exit.
+ */
+ valid_gcs_function();
+ get_gcspr();
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &child_mode, 0, 0, 0);
+ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
+ ksft_print_msg("GCS not enabled in child\n");
+ ret = EXIT_FAILURE;
+ }
+
+ _exit(ret);
+ }
+
+ /*
+ * In parent, check we can still do function calls then check
+ * on the child.
+ */
+ valid_gcs_function();
+
+ ksft_print_msg("Waiting for child %d\n", pid);
+
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ ksft_print_msg("Failed to wait for child: %d\n",
+ errno);
+ return false;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child exited due to signal %d\n",
+ WTERMSIG(status));
+ pass = false;
+ } else if (WEXITSTATUS(status)) {
+ ksft_print_msg("Child exited with status %d\n",
+ WEXITSTATUS(status));
+ pass = false;
+ }
+
+out:
+
+ return pass;
+}
+
typedef bool (*gcs_test)(void);
static struct {
@@ -314,6 +376,7 @@ static struct {
{ "enable_invalid", enable_invalid, true },
{ "map_guarded_stack", map_guarded_stack },
{ "fork", test_fork },
+ { "vfork", test_vfork },
};
int main(void)
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index 2ee7f114d7fa..ff4e07503349 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -31,7 +31,7 @@ static int check_buffer_by_byte(int mem_type, int mode)
int i, j, item;
bool err;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
@@ -68,7 +68,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
bool err;
char *und_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
@@ -164,7 +164,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode,
size_t tagged_size, overflow_size;
char *over_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
@@ -337,7 +337,7 @@ static int check_buffer_by_block(int mem_type, int mode)
{
int i, item, result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
cur_mte_cxt.fault_valid = false;
for (i = 0; i < item; i++) {
@@ -368,7 +368,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping)
int run, fd;
int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
/* check initial tags for anonymous mmap */
ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
@@ -415,7 +415,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(20);
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 7597fc632cad..5e97ee792e4d 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -88,7 +88,7 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
int item = ARRAY_SIZE(sizes);
item = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < item; run++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
UNDERFLOW, OVERFLOW);
@@ -109,7 +109,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
int run, fd, map_size, result = KSFT_PASS;
int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -160,8 +160,8 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
- mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(12);
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
index 3bfcd3848432..aad1234c7e0f 100644
--- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -151,7 +151,7 @@ static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int
map_size = default_huge_page_size();
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -180,7 +180,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
unsigned long map_size;
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
map_size = default_huge_page_size();
map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
0, 0);
@@ -210,7 +210,7 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
map_size = default_huge_page_size();
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
0, 0);
if (check_allocated_memory_range(ptr, map_size, mem_type,
@@ -235,8 +235,8 @@ int main(int argc, char *argv[])
return err;
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
allocate_hugetlb();
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 88c74bc46d4f..0cf5faef1724 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -106,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping)
return err;
}
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -141,8 +141,8 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
index 17694caaff53..c100af3012cb 100644
--- a/tools/testing/selftests/arm64/mte/check_mmap_options.c
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -23,6 +24,35 @@
#define OVERFLOW MT_GRANULE_SIZE
#define TAG_CHECK_ON 0
#define TAG_CHECK_OFF 1
+#define ATAG_CHECK_ON 1
+#define ATAG_CHECK_OFF 0
+
+#define TEST_NAME_MAX 256
+
+enum mte_mem_check_type {
+ CHECK_ANON_MEM = 0,
+ CHECK_FILE_MEM = 1,
+ CHECK_CLEAR_PROT_MTE = 2,
+};
+
+enum mte_tag_op_type {
+ TAG_OP_ALL = 0,
+ TAG_OP_STONLY = 1,
+};
+
+struct check_mmap_testcase {
+ int check_type;
+ int mem_type;
+ int mte_sync;
+ int mapping;
+ int tag_check;
+ int atag_check;
+ int tag_op;
+ bool enable_tco;
+};
+
+#define TAG_OP_ALL 0
+#define TAG_OP_STONLY 1
static size_t page_size;
static int sizes[] = {
@@ -30,8 +60,17 @@ static int sizes[] = {
/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
};
-static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+static int check_mte_memory(char *ptr, int size, int mode,
+ int tag_check,int atag_check, int tag_op)
{
+ char buf[MT_GRANULE_SIZE];
+
+ if (!mtefar_support && atag_check == ATAG_CHECK_ON)
+ return KSFT_SKIP;
+
+ if (atag_check == ATAG_CHECK_ON)
+ ptr = mte_insert_atag(ptr);
+
mte_initialize_current_context(mode, (uintptr_t)ptr, size);
memset(ptr, '1', size);
mte_wait_after_trig();
@@ -54,16 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
return KSFT_FAIL;
+ if (tag_op == TAG_OP_STONLY) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memcpy(buf, ptr + size, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+ }
+
return KSFT_PASS;
}
-static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, result, map_size;
int item = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < item; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
@@ -79,23 +136,27 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i
munmap((void *)map_ptr, map_size);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
mte_free_memory((void *)map_ptr, map_size, mem_type, false);
- if (result == KSFT_FAIL)
- return KSFT_FAIL;
+ if (result != KSFT_PASS)
+ return result;
}
return KSFT_PASS;
}
-static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_file_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, fd, map_size;
int total = ARRAY_SIZE(sizes);
int result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -117,24 +178,24 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
munmap((void *)map_ptr, map_size);
close(fd);
- if (result == KSFT_FAIL)
- break;
+ if (result != KSFT_PASS)
+ return result;
}
- return result;
+ return KSFT_PASS;
}
-static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check)
{
char *ptr, *map_ptr;
int run, prot_flag, result, fd, map_size;
int total = ARRAY_SIZE(sizes);
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -150,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
fd = create_temp_file();
if (fd == -1)
@@ -174,19 +235,715 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
close(fd);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
}
return KSFT_PASS;
}
+const char *format_test_name(struct check_mmap_testcase *tc)
+{
+ static char test_name[TEST_NAME_MAX];
+ const char *check_type_str;
+ const char *mem_type_str;
+ const char *sync_str;
+ const char *mapping_str;
+ const char *tag_check_str;
+ const char *atag_check_str;
+ const char *tag_op_str;
+
+ switch (tc->check_type) {
+ case CHECK_ANON_MEM:
+ check_type_str = "anonymous memory";
+ break;
+ case CHECK_FILE_MEM:
+ check_type_str = "file memory";
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ check_type_str = "clear PROT_MTE flags";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mem_type) {
+ case USE_MMAP:
+ mem_type_str = "mmap";
+ break;
+ case USE_MPROTECT:
+ mem_type_str = "mmap/mprotect";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mte_sync) {
+ case MTE_NONE_ERR:
+ sync_str = "no error";
+ break;
+ case MTE_SYNC_ERR:
+ sync_str = "sync error";
+ break;
+ case MTE_ASYNC_ERR:
+ sync_str = "async error";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mapping) {
+ case MAP_SHARED:
+ mapping_str = "shared";
+ break;
+ case MAP_PRIVATE:
+ mapping_str = "private";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->tag_check) {
+ case TAG_CHECK_ON:
+ tag_check_str = "tag check on";
+ break;
+ case TAG_CHECK_OFF:
+ tag_check_str = "tag check off";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->atag_check) {
+ case ATAG_CHECK_ON:
+ atag_check_str = "with address tag [63:60]";
+ break;
+ case ATAG_CHECK_OFF:
+ atag_check_str = "without address tag [63:60]";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, sizeof(test_name),
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str);
+
+ switch (tc->tag_op) {
+ case TAG_OP_ALL:
+ tag_op_str = "";
+ break;
+ case TAG_OP_STONLY:
+ tag_op_str = " / store-only";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, TEST_NAME_MAX,
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str, tag_op_str);
+
+ return test_name;
+}
+
int main(int argc, char *argv[])
{
- int err;
+ int err, i;
int item = ARRAY_SIZE(sizes);
+ struct check_mmap_testcase test_cases[]= {
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ };
err = mte_default_setup();
if (err)
@@ -200,64 +957,51 @@ int main(int argc, char *argv[])
sizes[item - 2] = page_size;
sizes[item - 1] = page_size + 1;
- /* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
-
/* Set test plan */
- ksft_set_plan(22);
-
- mte_enable_pstate_tco();
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
-
- mte_disable_pstate_tco();
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
- evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+ ksft_set_plan(ARRAY_SIZE(test_cases));
+
+ for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) {
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+ mte_register_signal(SIGSEGV, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+
+ if (test_cases[i].enable_tco)
+ mte_enable_pstate_tco();
+ else
+ mte_disable_pstate_tco();
+
+ switch (test_cases[i].check_type) {
+ case CHECK_ANON_MEM:
+ evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_FILE_MEM:
+ evaluate_test(check_file_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].atag_check),
+ format_test_name(&test_cases[i]));
+ break;
+ default:
+ exit(KSFT_FAIL);
+ }
+ }
mte_restore_setup();
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
index 4c89e9538ca0..f7f320defa7b 100644
--- a/tools/testing/selftests/arm64/mte/check_prctl.c
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -12,6 +12,10 @@
#include "kselftest.h"
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
static int set_tagged_addr_ctrl(int val)
{
int ret;
@@ -60,7 +64,7 @@ void check_basic_read(void)
/*
* Attempt to set a specified combination of modes.
*/
-void set_mode_test(const char *name, int hwcap2, int mask)
+void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask)
{
int ret;
@@ -69,6 +73,11 @@ void set_mode_test(const char *name, int hwcap2, int mask)
return;
}
+ if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
ret = set_tagged_addr_ctrl(mask);
if (ret < 0) {
ksft_test_result_fail("%s\n", name);
@@ -81,7 +90,7 @@ void set_mode_test(const char *name, int hwcap2, int mask)
return;
}
- if ((ret & PR_MTE_TCF_MASK) == mask) {
+ if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) {
ksft_test_result_pass("%s\n", name);
} else {
ksft_print_msg("Got %x, expected %x\n",
@@ -93,12 +102,16 @@ void set_mode_test(const char *name, int hwcap2, int mask)
struct mte_mode {
int mask;
int hwcap2;
+ int hwcap3;
const char *name;
} mte_modes[] = {
- { PR_MTE_TCF_NONE, 0, "NONE" },
- { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" },
- { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
- { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" },
+ { PR_MTE_TCF_NONE, 0, 0, "NONE" },
+ { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" },
+ { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" },
+ { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" },
};
int main(void)
@@ -106,11 +119,11 @@ int main(void)
int i;
ksft_print_header();
- ksft_set_plan(5);
+ ksft_set_plan(ARRAY_SIZE(mte_modes));
check_basic_read();
for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
- set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+ set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3,
mte_modes[i].mask);
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index a3d1e23fe02a..4b764f2a8185 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -57,7 +57,7 @@ static int check_single_included_tags(int mem_type, int mode)
return KSFT_FAIL;
for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
- ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false);
if (ret != 0)
result = KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -91,7 +91,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
excl_mask |= 1 << tag;
- mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false);
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
ptr = mte_insert_tags(ptr, BUFFER_SIZE);
@@ -120,7 +120,7 @@ static int check_all_included_tags(int mem_type, int mode)
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false);
if (ret != 0)
return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -145,7 +145,7 @@ static int check_none_included_tags(int mem_type, int mode)
if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false);
if (ret != 0)
return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index f4ae5f87a3b7..fb7936c4e097 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -44,7 +44,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
err = KSFT_PASS;
len = 2 * page_sz;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
fd = create_temp_file();
if (fd == -1)
return KSFT_FAIL;
@@ -211,7 +211,7 @@ int main(int argc, char *argv[])
return err;
/* Register signal handlers */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(64);
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index a1dc2fe5285b..397e57dd946a 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -6,6 +6,7 @@
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
+#include <time.h>
#include <unistd.h>
#include <linux/auxvec.h>
@@ -19,20 +20,40 @@
#include "mte_common_util.h"
#include "mte_def.h"
+#ifndef SA_EXPOSE_TAGBITS
+#define SA_EXPOSE_TAGBITS 0x00000800
+#endif
+
#define INIT_BUFFER_SIZE 256
struct mte_fault_cxt cur_mte_cxt;
+bool mtefar_support;
+bool mtestonly_support;
static unsigned int mte_cur_mode;
static unsigned int mte_cur_pstate_tco;
+static bool mte_cur_stonly;
void mte_default_handler(int signum, siginfo_t *si, void *uc)
{
+ struct sigaction sa;
unsigned long addr = (unsigned long)si->si_addr;
+ unsigned char si_tag, si_atag;
+
+ sigaction(signum, NULL, &sa);
+
+ if (sa.sa_flags & SA_EXPOSE_TAGBITS) {
+ si_tag = MT_FETCH_TAG(addr);
+ si_atag = MT_FETCH_ATAG(addr);
+ addr = MT_CLEAR_TAGS(addr);
+ } else {
+ si_tag = 0;
+ si_atag = 0;
+ }
if (signum == SIGSEGV) {
#ifdef DEBUG
- ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
- ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag);
#endif
if (si->si_code == SEGV_MTEAERR) {
if (cur_mte_cxt.trig_si_code == si->si_code)
@@ -45,13 +66,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
/* Compare the context for precise error */
else if (si->si_code == SEGV_MTESERR) {
+ if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) {
+ ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag);
+ exit(KSFT_FAIL);
+ }
+
if (cur_mte_cxt.trig_si_code == si->si_code &&
((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -67,11 +93,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n",
((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
if ((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -79,12 +105,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
}
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags)
{
struct sigaction sa;
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO;
+
+ if (export_tags && signal == SIGSEGV)
+ sa.sa_flags |= SA_EXPOSE_TAGBITS;
+
sigemptyset(&sa.sa_mask);
sigaction(signal, &sa, NULL);
}
@@ -120,6 +151,19 @@ void mte_clear_tags(void *ptr, size_t size)
mte_clear_tag_address_range(ptr, size);
}
+void *mte_insert_atag(void *ptr)
+{
+ unsigned char atag;
+
+ atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0;
+ return (void *)MT_SET_ATAG((unsigned long)ptr, atag);
+}
+
+void *mte_clear_atag(void *ptr)
+{
+ return (void *)MT_CLEAR_ATAG((unsigned long)ptr);
+}
+
static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
size_t range_before, size_t range_after,
bool tags, int fd)
@@ -272,7 +316,7 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
cur_mte_cxt.trig_si_code = 0;
}
-int mte_switch_mode(int mte_option, unsigned long incl_mask)
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly)
{
unsigned long en = 0;
@@ -304,6 +348,9 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
break;
}
+ if (mtestonly_support && stonly)
+ en |= PR_MTE_STORE_ONLY;
+
en |= (incl_mask << PR_MTE_TAG_SHIFT);
/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
@@ -316,12 +363,21 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
int mte_default_setup(void)
{
unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+ unsigned long hwcaps3 = getauxval(AT_HWCAP3);
unsigned long en = 0;
int ret;
+ /* To generate random address tag */
+ srandom(time(NULL));
+
if (!(hwcaps2 & HWCAP2_MTE))
ksft_exit_skip("MTE features unavailable\n");
+ mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR);
+
+ if (hwcaps3 & HWCAP3_MTE_STORE_ONLY)
+ mtestonly_support = true;
+
/* Get current mte mode */
ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
if (ret < 0) {
@@ -335,6 +391,8 @@ int mte_default_setup(void)
else if (ret & PR_MTE_TCF_NONE)
mte_cur_mode = MTE_NONE_ERR;
+ mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false;
+
mte_cur_pstate_tco = mte_get_pstate_tco();
/* Disable PSTATE.TCO */
mte_disable_pstate_tco();
@@ -343,7 +401,7 @@ int mte_default_setup(void)
void mte_restore_setup(void)
{
- mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly);
if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
mte_enable_pstate_tco();
else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index a0017a303beb..250d671329a5 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -37,10 +37,13 @@ struct mte_fault_cxt {
};
extern struct mte_fault_cxt cur_mte_cxt;
+extern bool mtefar_support;
+extern bool mtestonly_support;
/* MTE utility functions */
void mte_default_handler(int signum, siginfo_t *si, void *uc);
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags);
void mte_wait_after_trig(void);
void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
@@ -54,9 +57,11 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
size_t range_before, size_t range_after);
void *mte_insert_tags(void *ptr, size_t size);
void mte_clear_tags(void *ptr, size_t size);
+void *mte_insert_atag(void *ptr);
+void *mte_clear_atag(void *ptr);
int mte_default_setup(void);
void mte_restore_setup(void);
-int mte_switch_mode(int mte_option, unsigned long incl_mask);
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly);
void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
/* Common utility functions */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
index 9b188254b61a..6ad22f07c9b8 100644
--- a/tools/testing/selftests/arm64/mte/mte_def.h
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -42,6 +42,8 @@
#define MT_TAG_COUNT 16
#define MT_INCLUDE_TAG_MASK 0xFFFF
#define MT_EXCLUDE_TAG_MASK 0x0
+#define MT_ATAG_SHIFT 60
+#define MT_ATAG_MASK 0xFUL
#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
@@ -49,6 +51,12 @@
#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT))
+#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT))
+#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK))
+
+#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x)))
+
#define MT_PSTATE_TCO_SHIFT 25
#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
#define MT_PSTATE_TCO_EN 1
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
index 1789a61d0a9b..f748f2c33b22 100644
--- a/tools/testing/selftests/bpf/DENYLIST
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -1,6 +1,5 @@
# TEMPORARY
# Alphabetical order
-dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next
get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
stacktrace_build_id
stacktrace_build_id_nmi
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
deleted file mode 100644
index 12e99c0277a8..000000000000
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ /dev/null
@@ -1 +0,0 @@
-tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 910d8d6402ef..4863106034df 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -109,6 +109,7 @@ TEST_PROGS := test_kmod.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
+ test_bpftool_map.sh \
test_bpftool_metadata.sh \
test_doc_build.sh \
test_xsk.sh \
@@ -840,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+# This works around GCC warning about snprintf truncating strings like:
+#
+# char a[PATH_MAX], b[PATH_MAX];
+# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation
+$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation
$(OUTPUT)/veristat.o: $(BPFOBJ)
$(OUTPUT)/veristat: $(OUTPUT)/veristat.o
$(call msg,BINARY,,$@)
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index 68a51dcc0669..16f8ce832004 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -46,8 +46,11 @@
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
int node_id, __u64 flags) __ksym __weak;
+int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
+
#else /* when compiled as user space code */
#define __arena
diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h
index a9674e544322..c550e5711967 100644
--- a/tools/testing/selftests/bpf/bpf_atomic.h
+++ b/tools/testing/selftests/bpf/bpf_atomic.h
@@ -61,7 +61,7 @@ extern bool CONFIG_X86_64 __kconfig __weak;
#define smp_mb() \
({ \
- unsigned long __val; \
+ volatile unsigned long __val; \
__sync_fetch_and_add(&__val, 0); \
})
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 5e512a1d09d1..da7e230f2781 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -596,4 +596,7 @@ extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
+ struct bpf_dynptr *value_p) __weak __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 8215c9b3115e..9386dfe8b884 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -69,7 +69,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name,
struct bpf_dynptr *value_ptr) __ksym;
extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e4535451322e..15f626014872 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -4,6 +4,7 @@
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <sys/xattr.h>
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
@@ -319,6 +320,26 @@ int join_parent_cgroup(const char *relative_path)
}
/**
+ * set_cgroup_xattr() - Set xattr on a cgroup dir
+ * @relative_path: The cgroup path, relative to the workdir, to set xattr
+ * @name: xattr name
+ * @value: xattr value
+ *
+ * This function set xattr on cgroup dir.
+ *
+ * On success, it returns 0, otherwise on failure it returns -1.
+ */
+int set_cgroup_xattr(const char *relative_path,
+ const char *name,
+ const char *value)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return setxattr(cgroup_path, name, value, strlen(value) + 1, 0);
+}
+
+/**
* __cleanup_cgroup_environment() - Delete temporary cgroups
*
* This is a helper for cleanup_cgroup_environment() that is responsible for
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 502845160d88..182e1ac36c95 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -26,6 +26,10 @@ int join_cgroup(const char *relative_path);
int join_root_cgroup(void);
int join_parent_cgroup(const char *relative_path);
+int set_cgroup_xattr(const char *relative_path,
+ const char *name,
+ const char *value);
+
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f74e1ea0ad3b..8916ab814a3e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -97,6 +97,9 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NETFILTER_INGRESS=y
+CONFIG_IP_NF_IPTABLES_LEGACY=y
+CONFIG_IP6_NF_IPTABLES_LEGACY=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_FLOW_TABLE=y
CONFIG_NF_FLOW_TABLE_INET=y
CONFIG_NETFILTER_NETLINK=y
@@ -105,6 +108,7 @@ CONFIG_IP_NF_IPTABLES=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
+CONFIG_PACKET=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
new file mode 100644
index 000000000000..9acf389dc4ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -0,0 +1,93 @@
+CONFIG_ALTIVEC=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPUSETS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1280
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NONPORTABLE=y
+CONFIG_NR_CPUS=256
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PPC64=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_PSERIES=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SMP=y
+CONFIG_SOC_VIRT=y
+CONFIG_SYSVIPC=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_THREAD_SHIFT=14
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VSX=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index 67557cda2208..42b49870e520 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -13,7 +13,7 @@
static void test_fail_cases(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
- __u32 value;
+ __u32 value = 0;
int fd, err;
/* Invalid key size */
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 6befa870434b..4a0670c056ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -489,10 +489,28 @@ cleanup:
bpf_link__destroy(link);
}
+static int verify_tracing_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie");
+
+ return 0;
+}
+
static void tracing_subtest(struct test_bpf_cookie *skel)
{
__u64 cookie;
- int prog_fd;
+ int prog_fd, err;
int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
LIBBPF_OPTS(bpf_test_run_opts, opts);
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
@@ -507,6 +525,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel)
if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
goto cleanup;
+ err = verify_tracing_link_info(fentry_fd, cookie);
+ if (!ASSERT_OK(err, "verify_tracing_link_info"))
+ goto cleanup;
+
cookie = 0x20000000000000L;
prog_fd = bpf_program__fd(skel->progs.fexit_test1);
link_opts.tracing.cookie = cookie;
@@ -635,10 +657,29 @@ cleanup:
bpf_link__destroy(link);
}
+static int verify_raw_tp_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie");
+
+ return 0;
+}
+
static void raw_tp_subtest(struct test_bpf_cookie *skel)
{
__u64 cookie;
- int prog_fd, link_fd = -1;
+ int err, prog_fd, link_fd = -1;
struct bpf_link *link = NULL;
LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
@@ -656,6 +697,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel)
goto cleanup;
usleep(1); /* trigger */
+
+ err = verify_raw_tp_link_info(link_fd, cookie);
+ if (!ASSERT_OK(err, "verify_raw_tp_link_info"))
+ goto cleanup;
+
close(link_fd); /* detach */
link_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index add4a18c33bd..5225d69bf79b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -323,7 +323,7 @@ static void test_task_pidfd(void)
static void test_task_sleepable(void)
{
struct bpf_iter_tasks *skel;
- int pid, status, err, data_pipe[2], finish_pipe[2], c;
+ int pid, status, err, data_pipe[2], finish_pipe[2], c = 0;
char *test_data = NULL;
char *test_data_long = NULL;
char *data[2];
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index fe2c502e5089..ecc3d47919ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr)
}
uffd_register.range.start = (unsigned long)fault_addr;
- uffd_register.range.len = 4096;
+ uffd_register.range.len = getpagesize();
uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
close(uffd);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index c0a776feec23..82903585c870 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -879,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
"static int bpf_cgrp_storage_busy = (int)2", 2);
}
+struct btf_dump_string_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *str;
+ struct btf_dump_type_data_opts *opts;
+ int array_id;
+};
+
+static int btf_dump_one_string(struct btf_dump_string_ctx *ctx,
+ char *ptr, size_t ptr_sz,
+ const char *expected_val)
+{
+ size_t type_sz;
+ int ret;
+
+ ctx->str[0] = '\0';
+ type_sz = btf__resolve_size(ctx->btf, ctx->array_id);
+ ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+static void btf_dump_strings(struct btf_dump_string_ctx *ctx)
+{
+ struct btf_dump_type_data_opts *opts = ctx->opts;
+
+ opts->emit_strings = true;
+
+ opts->compact = true;
+ opts->emit_zeroes = false;
+
+ opts->skip_names = false;
+ btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\"");
+
+ opts->skip_names = true;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->emit_zeroes = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->compact = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* Non-printable characters come out as hex. */
+ btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\"");
+ btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\"");
+
+ /*
+ * Strings that are too long for the specified type ("char[4]")
+ * should fall back to the current behavior.
+ */
+ opts->compact = true;
+ btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]");
+
+ /*
+ * Strings that are too short for the specified type ("char[4]")
+ * should work normally.
+ */
+ btf_dump_one_string(ctx, "ab", 3, "\"ab\"");
+
+ /* Non-NUL-terminated arrays don't get printed as strings. */
+ char food[4] = { 'f', 'o', 'o', 'd' };
+ char bye[3] = { 'b', 'y', 'e' };
+
+ btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]");
+ btf_dump_one_string(ctx, bye, 3, "['b','y','e',]");
+
+ /* The embedded NUL should terminate the string. */
+ char embed[4] = { 'f', 'o', '\0', 'd' };
+
+ btf_dump_one_string(ctx, embed, 4, "\"fo\"");
+}
+
+static void test_btf_dump_string_data(void)
+{
+ struct test_ctx t = {};
+ char str[STRSIZE];
+ struct btf_dump *d;
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ struct btf_dump_string_ctx ctx;
+ int char_id, int_id, array_id;
+
+ if (test_ctx__init(&t))
+ return;
+
+ d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Generate BTF for a four-element char array. */
+ char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR);
+ ASSERT_EQ(char_id, 1, "char_id");
+ int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(int_id, 2, "int_id");
+ array_id = btf__add_array(t.btf, int_id, char_id, 4);
+ ASSERT_EQ(array_id, 3, "array_id");
+
+ ctx.btf = t.btf;
+ ctx.d = d;
+ ctx.str = str;
+ ctx.opts = &opts;
+ ctx.array_id = array_id;
+
+ btf_dump_strings(&ctx);
+
+ btf_dump__free(d);
+ test_ctx__free(&t);
+}
+
static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
const char *name, const char *expected_val,
void *data, size_t data_sz)
@@ -970,6 +1086,8 @@ void test_btf_dump() {
test_btf_dump_struct_data(btf, d, str);
if (test__start_subtest("btf_dump: var_data"))
test_btf_dump_var_data(btf, d, str);
+ if (test__start_subtest("btf_dump: string_data"))
+ test_btf_dump_string_data();
btf_dump__free(d);
btf__free(btf);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
new file mode 100644
index 000000000000..bb60704a3ef9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_mprog.skel.h"
+
+static void assert_mprog_count(int cg, int atype, int expected)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(cg, atype, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static void test_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ optd.expected_revision = 5;
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ LIBBPF_OPTS_RESET(optd);
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ LIBBPF_OPTS(bpf_cgroup_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = id_from_link_fd(bpf_link__fd(link1)),
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "link_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, fd3, fd4;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/preorder_prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, -EINVAL, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ struct cgroup_mprog *skel;
+ __u32 fd2;
+ int cg;
+
+ cg = test__join_cgroup("/preorder_link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_ERR_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_invalid_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ __u32 fd1, fd2, id2;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/invalid_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ id2 = id_from_prog_fd(fd2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+void test_cgroup_mprog_opts(void)
+{
+ if (test__start_subtest("prog_attach_detach"))
+ test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("link_attach_detach"))
+ test_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_prog_attach_detach"))
+ test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_link_attach_detach"))
+ test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("invalid_attach_detach"))
+ test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
new file mode 100644
index 000000000000..a36d2e968bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_p_atype, prog_p2_atype;
+ int prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen = 1;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ LIBBPF_OPTS_RESET(opts);
+ opts.flags = BPF_F_ALLOW_MULTI;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE;
+ if (has_relative_fd)
+ opts.relative_fd = prog_p_fd;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ result = skel->bss->result;
+ ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_mprog_ordering(void)
+{
+ int cg_parent = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
new file mode 100644
index 000000000000..e0dd966e4a3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#include "read_cgroupfs_xattr.skel.h"
+#include "cgroup_read_xattr.skel.h"
+
+#define CGROUP_FS_PARENT "foo/"
+#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/"
+#define TMP_FILE "/tmp/selftests_cgroup_xattr"
+
+static const char xattr_value_a[] = "bpf_selftest_value_a";
+static const char xattr_value_b[] = "bpf_selftest_value_b";
+static const char xattr_name[] = "user.bpf_test";
+
+static void test_read_cgroup_xattr(void)
+{
+ int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1;
+ struct read_cgroupfs_xattr *skel = NULL;
+
+ parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT);
+ if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup"))
+ return;
+ if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a),
+ "set parent xattr"))
+ goto out;
+
+ child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD);
+ if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup"))
+ goto out;
+ if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b),
+ "set child xattr"))
+ goto out;
+
+ skel = read_cgroupfs_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
+ goto out;
+
+ skel->bss->target_pid = gettid();
+
+ if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
+ goto out;
+
+ tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT);
+ ASSERT_OK_FD(tmp_fd, "open tmp file");
+ close(tmp_fd);
+
+ ASSERT_TRUE(skel->bss->found_value_a, "found_value_a");
+ ASSERT_TRUE(skel->bss->found_value_b, "found_value_b");
+
+out:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+ read_cgroupfs_xattr__destroy(skel);
+ unlink(TMP_FILE);
+}
+
+void test_cgroup_xattr(void)
+{
+ RUN_TESTS(cgroup_read_xattr);
+
+ if (test__start_subtest("read_cgroupfs_xattr"))
+ test_read_cgroup_xattr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 62e7ec775f24..9b2d9ceda210 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -21,6 +21,14 @@ static struct {
{"test_dynptr_data", SETUP_SYSCALL_SLEEP},
{"test_dynptr_copy", SETUP_SYSCALL_SLEEP},
{"test_dynptr_copy_xdp", SETUP_XDP_PROG},
+ {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG},
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
@@ -43,6 +51,8 @@ static struct {
{"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
};
+#define PAGE_SIZE_64K 65536
+
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
char user_data[384] = {[0 ... 382] = 'a', '\0'};
@@ -138,14 +148,18 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
}
case SETUP_XDP_PROG:
{
- char data[5000];
+ char data[90000];
int err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = sizeof(data),
.repeat = 1,
);
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = sizeof(data);
+ else
+ opts.data_size_in = 5000;
+
prog_fd = bpf_program__fd(prog);
err = bpf_prog_test_run_opts(prog_fd, &opts);
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index 9add890c2d37..241b2c8c6e0f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void)
/* btf should still exist when original file descriptor is closed */
err = get_btf_id_by_fd(extra_fds[0], &btf_id);
- if (!ASSERT_GE(err, 0, "get_btf_id_by_fd"))
+ if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd"))
goto cleanup;
Close(extra_fds[0]);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c
deleted file mode 100644
index 568d3aa48a78..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <test_progs.h>
-#include "fexit_noreturns.skel.h"
-
-void test_fexit_noreturns(void)
-{
- RUN_TESTS(fexit_noreturns);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 5266c7022863..14c5a7ef0e87 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -72,7 +72,7 @@ static struct {
{ "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
{ "obj_new_acq", "Unreleased reference id=" },
{ "use_after_drop", "invalid mem access 'scalar'" },
- { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" },
+ { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" },
{ "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
{ "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
{ "direct_read_head", "direct access to bpf_list_head is disallowed" },
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index 169ce689b97c..d6f14a232002 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -7,6 +7,10 @@
#include "test_log_buf.skel.h"
#include "bpf_util.h"
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
static size_t libbpf_log_pos;
static char libbpf_log_buf[1024 * 1024];
static bool libbpf_log_error;
diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..40d4f687bd9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "mem_rdonly_untrusted.skel.h"
+
+void test_mem_rdonly_untrusted(void)
+{
+ RUN_TESTS(mem_rdonly_untrusted);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 39d42271cc46..e261b0e872db 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -465,6 +465,20 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t,
return range_improve(x_t, x, x_swap);
}
+ if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) {
+ if (x_t == S64 && x.a > x.b) {
+ if (x.b < y.a && x.a <= y.b)
+ return range(x_t, x.a, y.b);
+ if (x.a > y.b && x.b >= y.a)
+ return range(x_t, y.a, x.b);
+ } else if (x_t == U64 && y.a > y.b) {
+ if (y.b < x.a && y.a <= x.b)
+ return range(x_t, y.a, x.b);
+ if (y.a > x.b && y.b >= x.a)
+ return range(x_t, x.a, y.b);
+ }
+ }
+
/* otherwise, plain range cast and intersection works */
return range_improve(x_t, x, y_cast);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index da430df45aa4..d1e4cb28a72c 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- skel->maps.ringbuf.max_entries = 0x4000;
+ skel->maps.ringbuf.max_entries = 0x40000;
err = test_ringbuf_write_lskel__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void)
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
goto cleanup;
- *mmap_ptr = 0x3000;
+ *mmap_ptr = 0x30000;
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index a4517bee34d5..27781df8f2fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2024 Meta
+#include <poll.h>
#include <test_progs.h>
#include "network_helpers.h"
#include "sock_iter_batch.skel.h"
#define TEST_NS "sock_iter_batch_netns"
+#define TEST_CHILD_NS "sock_iter_batch_child_netns"
static const int init_batch_size = 16;
static const int nr_soreuse = 4;
@@ -118,6 +120,45 @@ done:
return nth_sock_idx;
}
+static void destroy(int fd)
+{
+ struct sock_iter_batch *skel = NULL;
+ __u64 cookie = socket_cookie(fd);
+ struct bpf_link *link = NULL;
+ int iter_fd = -1;
+ int nread;
+ __u64 out;
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ skel->rodata->destroy_cookie = cookie;
+
+ if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ /* Delete matching socket. */
+ nread = read(iter_fd, &out, sizeof(out));
+ ASSERT_GE(nread, 0, "nread");
+ if (nread)
+ ASSERT_EQ(out, cookie, "cookie matches");
+done:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+ close(fd);
+}
+
static int get_seen_count(int fd, struct sock_count counts[], int n)
{
__u64 cookie = socket_cookie(fd);
@@ -152,8 +193,71 @@ static void check_n_were_seen_once(int *fds, int fds_len, int n,
ASSERT_EQ(seen_once, n, "seen_once");
}
+static int accept_from_one(struct pollfd *server_poll_fds,
+ int server_poll_fds_len)
+{
+ static const int poll_timeout_ms = 5000; /* 5s */
+ int ret;
+ int i;
+
+ ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
+ if (!ASSERT_EQ(ret, 1, "poll"))
+ return -1;
+
+ for (i = 0; i < server_poll_fds_len; i++)
+ if (server_poll_fds[i].revents & POLLIN)
+ return accept(server_poll_fds[i].fd, NULL, NULL);
+
+ return -1;
+}
+
+static int *connect_to_server(int family, int sock_type, const char *addr,
+ __u16 port, int nr_connects, int *server_fds,
+ int server_fds_len)
+{
+ struct pollfd *server_poll_fds = NULL;
+ int *established_socks = NULL;
+ int i;
+
+ server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
+ if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
+ return NULL;
+
+ for (i = 0; i < server_fds_len; i++) {
+ server_poll_fds[i].fd = server_fds[i];
+ server_poll_fds[i].events = POLLIN;
+ }
+
+ i = 0;
+
+ established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
+ if (!ASSERT_OK_PTR(established_socks, "established_socks"))
+ goto error;
+
+ while (nr_connects--) {
+ established_socks[i] = connect_to_addr_str(family, sock_type,
+ addr, port, NULL);
+ if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
+ goto error;
+ i++;
+ established_socks[i] = accept_from_one(server_poll_fds,
+ server_fds_len);
+ if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
+ goto error;
+ i++;
+ }
+
+ free(server_poll_fds);
+ return established_socks;
+error:
+ free_fds(established_socks, i);
+ free(server_poll_fds);
+ return NULL;
+}
+
static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd)
{
int close_idx;
@@ -182,8 +286,46 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
counts_len);
}
+static void remove_seen_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Leave one established socket. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
static void remove_unseen(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -214,8 +356,54 @@ static void remove_unseen(int family, int sock_type, const char *addr,
counts_len);
}
+static void remove_unseen_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
static void remove_all(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -242,8 +430,57 @@ static void remove_all(int family, int sock_type, const char *addr,
ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
}
+static void remove_all_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *close_idx = NULL;
+ int i;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ close_idx = malloc(sizeof(int) * (established_socks_len - 1));
+ if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
+ return;
+ for (i = 0; i < established_socks_len - 1; i++) {
+ close_idx[i] = get_nth_socket(established_socks,
+ established_socks_len, link,
+ listen_socks_len + i);
+ if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
+ return;
+ }
+
+ for (i = 0; i < established_socks_len - 1; i++) {
+ destroy(established_socks[close_idx[i]]);
+ established_socks[close_idx[i]] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+ free(close_idx);
+}
+
static void add_some(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd)
{
int *new_socks = NULL;
@@ -271,8 +508,52 @@ done:
free_fds(new_socks, socks_len);
}
+static void add_some_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts,
+ int counts_len, struct bpf_link *link,
+ int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established_socks_len - 1 sockets. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw established_socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+
+ /* Double the number of established sockets in the bucket. */
+ new_socks = connect_to_server(family, sock_type, addr, port,
+ established_socks_len / 2, listen_socks,
+ listen_socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+done:
+ free_fds(new_socks, established_socks_len);
+}
+
static void force_realloc(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -299,11 +580,32 @@ done:
free_fds(new_socks, socks_len);
}
+static void force_realloc_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ /* Iterate through all sockets to trigger a realloc. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+}
+
struct test_case {
void (*test)(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd);
const char *description;
+ int ehash_buckets;
+ int connections;
int init_socks;
int max_socks;
int sock_type;
@@ -358,18 +660,140 @@ static struct test_case resume_tests[] = {
.family = AF_INET6,
.test = force_realloc,
},
+ {
+ .description = "tcp: resume after removing a seen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "tcp: force a realloc to occur (listening)",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen_established,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen_established,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all_established,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = add_some_established,
+ },
+ {
+ .description = "tcp: force a realloc to occur (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ /* Bucket size will need to double when going from listening to
+ * established sockets.
+ */
+ .connections = init_batch_size,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse + (init_batch_size * 2),
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = force_realloc_established,
+ },
};
static void do_resume_test(struct test_case *tc)
{
struct sock_iter_batch *skel = NULL;
+ struct sock_count *counts = NULL;
static const __u16 port = 10001;
+ struct nstoken *nstoken = NULL;
struct bpf_link *link = NULL;
- struct sock_count *counts;
+ int *established_fds = NULL;
int err, iter_fd = -1;
const char *addr;
int *fds = NULL;
- int local_port;
+
+ if (tc->ehash_buckets) {
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
+ tc->ehash_buckets);
+ SYS(done, "ip netns add %s", TEST_CHILD_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
+ nstoken = open_netns(TEST_CHILD_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
+ goto done;
+ }
counts = calloc(tc->max_socks, sizeof(*counts));
if (!ASSERT_OK_PTR(counts, "counts"))
@@ -384,11 +808,18 @@ static void do_resume_test(struct test_case *tc)
tc->init_socks);
if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
goto done;
- local_port = get_socket_local_port(*fds);
- if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
- goto done;
- skel->rodata->ports[0] = ntohs(local_port);
+ if (tc->connections) {
+ established_fds = connect_to_server(tc->family, tc->sock_type,
+ addr, port,
+ tc->connections, fds,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
+ goto done;
+ }
+ skel->rodata->ports[0] = 0;
+ skel->rodata->ports[1] = 0;
skel->rodata->sf = tc->family;
+ skel->rodata->ss = 0;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -406,10 +837,15 @@ static void do_resume_test(struct test_case *tc)
goto done;
tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
- counts, tc->max_socks, link, iter_fd);
+ established_fds, tc->connections*2, counts, tc->max_socks,
+ link, iter_fd);
done:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
free(counts);
free_fds(fds, tc->init_socks);
+ free_fds(established_fds, tc->connections*2);
if (iter_fd >= 0)
close(iter_fd);
bpf_link__destroy(link);
@@ -454,6 +890,8 @@ static void do_test(int sock_type, bool onebyone)
skel->rodata->ports[i] = ntohs(local_port);
}
skel->rodata->sf = AF_INET6;
+ if (sock_type == SOCK_STREAM)
+ skel->rodata->ss = TCP_LISTEN;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index b6c471da5c28..b87e7f39e15a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -314,6 +314,95 @@ out:
test_sockmap_ktls__destroy(skel);
}
+static void test_sockmap_ktls_tx_pop(int family, int sotype)
+{
+ char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
+ int c = 0, p = 0, one = 1, sent, recvd;
+ struct test_sockmap_ktls *skel;
+ int prog_fd, map_fd;
+ char rcv[50] = {0};
+ int err;
+ int i, m, r;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ struct {
+ int pop_start;
+ int pop_len;
+ } pop_policy[] = {
+ /* trim the start */
+ {0, 2},
+ {0, 10},
+ {1, 2},
+ {1, 10},
+ /* trim the end */
+ {35, 2},
+ /* New entries should be added before this line */
+ {-1, -1},
+ };
+
+ i = 0;
+ while (pop_policy[i].pop_start >= 0) {
+ skel->bss->pop_start = pop_policy[i].pop_start;
+ skel->bss->pop_end = pop_policy[i].pop_len;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
+ goto out;
+
+ /* verify the data
+ * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
+ * | |
+ * popped data
+ */
+ for (m = 0, r = 0; m < sizeof(msg);) {
+ /* skip checking the data that has been popped */
+ if (m >= pop_policy[i].pop_start &&
+ m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
+ m++;
+ continue;
+ }
+
+ if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
+ goto out;
+ m++;
+ r++;
+ }
+ i++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype)
test_sockmap_ktls_tx_cork(family, sotype, true);
if (test__start_subtest("tls tx egress with no buf"))
test_sockmap_ktls_tx_no_buf(family, sotype, true);
+ if (test__start_subtest("tls tx with pop"))
+ test_sockmap_ktls_tx_pop(family, sotype);
}
void test_sockmap_ktls(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 1d98eee7a2c3..f1bdccc7e4e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
goto close;
n = xsend(c1, buf, sizeof(buf), 0);
+ if (n == -1)
+ goto close;
if (n < sizeof(buf))
FAIL("incomplete write");
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
new file mode 100644
index 000000000000..d9f0185dca61
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <regex.h>
+
+#include "stream.skel.h"
+#include "stream_fail.skel.h"
+
+void test_stream_failure(void)
+{
+ RUN_TESTS(stream_fail);
+}
+
+void test_stream_success(void)
+{
+ RUN_TESTS(stream);
+ return;
+}
+
+struct {
+ int prog_off;
+ const char *errstr;
+} stream_error_arr[] = {
+ {
+ offsetof(struct stream, progs.stream_cond_break),
+ "ERROR: Timeout detected for may_goto instruction\n"
+ "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
+ "Call trace:\n"
+ "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+ "|[ \t]+[^\n]+\n)*",
+ },
+ {
+ offsetof(struct stream, progs.stream_deadlock),
+ "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n"
+ "Attempted lock = (0x[0-9a-fA-F]+)\n"
+ "Total held locks = 1\n"
+ "Held lock\\[ 0\\] = \\1\n" // Lock address must match
+ "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
+ "Call trace:\n"
+ "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+ "|[ \t]+[^\n]+\n)*",
+ },
+};
+
+static int match_regex(const char *pattern, const char *string)
+{
+ int err, rc;
+ regex_t re;
+
+ err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE);
+ if (err)
+ return -1;
+ rc = regexec(&re, string, 0, NULL, 0);
+ regfree(&re);
+ return rc == 0 ? 1 : 0;
+}
+
+void test_stream_errors(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ struct stream *skel;
+ int ret, prog_fd;
+ char buf[1024];
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) {
+ struct bpf_program **prog;
+
+ prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off);
+ prog_fd = bpf_program__fd(*prog);
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+#if !defined(__x86_64__)
+ ASSERT_TRUE(1, "Timed may_goto unsupported, skip.");
+ if (i == 0) {
+ ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
+ ASSERT_EQ(ret, 0, "stream read");
+ continue;
+ }
+#endif
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ ASSERT_LE(ret, 1023, "len for buf");
+ buf[ret] = '\0';
+
+ ret = match_regex(stream_error_arr[i].errstr, buf);
+ if (!ASSERT_TRUE(ret == 1, "regex match"))
+ fprintf(stderr, "Output from stream:\n%s\n", buf);
+ }
+
+ stream__destroy(skel);
+}
+
+void test_stream_syscall(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ struct stream *skel;
+ int ret, prog_fd;
+ char buf[64];
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.stream_syscall);
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -EINVAL, "bad prog_fd");
+
+ ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -ENOENT, "bad stream id");
+
+ ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -EFAULT, "bad stream buf");
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+ ASSERT_EQ(ret, 2, "bytes");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+ ASSERT_EQ(ret, 1, "bytes");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts);
+ ASSERT_EQ(ret, 0, "no bytes stdout");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts);
+ ASSERT_EQ(ret, 0, "no bytes stderr");
+
+ stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
new file mode 100644
index 000000000000..35af8044d059
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include <test_progs.h>
+#include "string_kfuncs_success.skel.h"
+#include "string_kfuncs_failure1.skel.h"
+#include "string_kfuncs_failure2.skel.h"
+#include <sys/mman.h>
+
+static const char * const test_cases[] = {
+ "strcmp",
+ "strchr",
+ "strchrnul",
+ "strnchr",
+ "strrchr",
+ "strlen",
+ "strnlen",
+ "strspn_str",
+ "strspn_accept",
+ "strcspn_str",
+ "strcspn_reject",
+ "strstr",
+ "strnstr",
+};
+
+void run_too_long_tests(void)
+{
+ struct string_kfuncs_failure2 *skel;
+ struct bpf_program *prog;
+ char test_name[256];
+ int err, i;
+
+ skel = string_kfuncs_failure2__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load"))
+ return;
+
+ memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str));
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ sprintf(test_name, "test_%s_too_long", test_cases[i]);
+ if (!test__start_subtest(test_name))
+ continue;
+
+ prog = bpf_object__find_program_by_name(skel->obj, test_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+
+ ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG");
+ }
+
+cleanup:
+ string_kfuncs_failure2__destroy(skel);
+}
+
+void test_string_kfuncs(void)
+{
+ RUN_TESTS(string_kfuncs_success);
+ RUN_TESTS(string_kfuncs_failure1);
+
+ run_too_long_tests();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 66a900327f91..0ab36503c3b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry,
bool test_fexit,
bool test_fentry_entry)
{
- int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val;
+ int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val;
struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
struct bpf_program *prog, *fentry_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
index 924d0e25320c..d52a62af77bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -8,34 +8,6 @@
# define loopback 1
#endif
-static inline __u32 id_from_prog_fd(int fd)
-{
- struct bpf_prog_info prog_info = {};
- __u32 prog_info_len = sizeof(prog_info);
- int err;
-
- err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
- if (!ASSERT_OK(err, "id_from_prog_fd"))
- return 0;
-
- ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
- return prog_info.id;
-}
-
-static inline __u32 id_from_link_fd(int fd)
-{
- struct bpf_link_info link_info = {};
- __u32 link_info_len = sizeof(link_info);
- int err;
-
- err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
- if (!ASSERT_OK(err, "id_from_link_fd"))
- return 0;
-
- ASSERT_NEQ(link_info.id, 0, "link_info.id");
- return link_info.id;
-}
-
static inline __u32 ifindex_from_link_fd(int fd)
{
struct bpf_link_info link_info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index 47b56c258f3f..367f47e4a936 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -60,13 +60,19 @@ static void test_set_global_vars_succeeds(void)
" -G \"var_s8 = -128\" "\
" -G \"var_u8 = 255\" "\
" -G \"var_ea = EA2\" "\
- " -G \"var_eb = EB2\" "\
- " -G \"var_ec = EC2\" "\
+ " -G \"var_eb = EB2\" "\
+ " -G \"var_ec=EC2\" "\
" -G \"var_b = 1\" "\
- " -G \"struct1.struct2.u.var_u8 = 170\" "\
+ " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\
" -G \"union1.struct3.var_u8_l = 0xaa\" "\
" -G \"union1.struct3.var_u8_h = 0xaa\" "\
- "-vl2 > %s", fix->veristat, fix->tmpfile);
+ " -G \"arr[3]= 171\" " \
+ " -G \"arr[EA2] =172\" " \
+ " -G \"enum_arr[EC2]=EA3\" " \
+ " -G \"three_d[31][7][EA2]=173\"" \
+ " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \
+ " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \
+ " -vl2 > %s", fix->veristat, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
__CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
@@ -81,8 +87,14 @@ static void test_set_global_vars_succeeds(void)
__CHECK_STR("_w=12 ", "var_eb = EB2");
__CHECK_STR("_w=13 ", "var_ec = EC2");
__CHECK_STR("_w=1 ", "var_b = 1");
- __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170");
+ __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
__CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa");
+ __CHECK_STR("_w=171 ", "arr[3]= 171");
+ __CHECK_STR("_w=172 ", "arr[EA2] =172");
+ __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3");
+ __CHECK_STR("_w=173 ", "matrix[31][7][11]=173");
+ __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
+ __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
out:
teardown_fixture(fix);
@@ -129,6 +141,95 @@ out:
teardown_fixture(fix);
}
+static void test_unsupported_ptr_array_type(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_out_of_bounds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Array index 99 is out of bounds", "arr[99]");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_index_not_found(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_index_for_non_array(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1");
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_no_array_index_for_array(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1");
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1");
+
+out:
+ teardown_fixture(fix);
+}
+
void test_veristat(void)
{
if (test__start_subtest("set_global_vars_succeeds"))
@@ -139,6 +240,22 @@ void test_veristat(void)
if (test__start_subtest("set_global_vars_from_file_succeeds"))
test_set_global_vars_from_file_succeeds();
+
+ if (test__start_subtest("test_unsupported_ptr_array_type"))
+ test_unsupported_ptr_array_type();
+
+ if (test__start_subtest("test_array_out_of_bounds"))
+ test_array_out_of_bounds();
+
+ if (test__start_subtest("test_array_index_not_found"))
+ test_array_index_not_found();
+
+ if (test__start_subtest("test_array_index_for_non_array"))
+ test_array_index_for_non_array();
+
+ if (test__start_subtest("test_no_array_index_for_array"))
+ test_no_array_index_for_array();
+
}
#undef __CHECK_STR
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index f9392df23f8a..b81dde283052 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -115,7 +115,7 @@ static int create_bpffs_fd(void)
static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
{
- int mnt_fd, err;
+ int err;
/* set up token delegation mount options */
err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
@@ -136,12 +136,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
if (err < 0)
return -errno;
- /* create O_PATH fd for detached mount */
- mnt_fd = sys_fsmount(fs_fd, 0, 0);
- if (err < 0)
- return -errno;
-
- return mnt_fd;
+ return 0;
}
/* send FD over Unix domain (AF_UNIX) socket */
@@ -287,6 +282,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
{
int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
struct token_lsm *lsm_skel = NULL;
+ char one;
/* load and attach LSM "policy" before we go into unpriv userns */
lsm_skel = token_lsm__open_and_load();
@@ -333,13 +329,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
err = sendfd(sock_fd, fs_fd);
if (!ASSERT_OK(err, "send_fs_fd"))
goto cleanup;
- zclose(fs_fd);
+
+ /* wait that the parent reads the fd, does the fsconfig() calls
+ * and send us a signal that it is done
+ */
+ err = read(sock_fd, &one, sizeof(one));
+ if (!ASSERT_GE(err, 0, "read_one"))
+ goto cleanup;
/* avoid mucking around with mount namespaces and mounting at
- * well-known path, just get detach-mounted BPF FS fd back from parent
+ * well-known path, just create O_PATH fd for detached mount
*/
- err = recvfd(sock_fd, &mnt_fd);
- if (!ASSERT_OK(err, "recv_mnt_fd"))
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (!ASSERT_OK_FD(mnt_fd, "mnt_fd"))
goto cleanup;
/* try to fspick() BPF FS and try to add some delegation options */
@@ -429,24 +431,24 @@ again:
static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
{
- int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
+ int fs_fd = -1, token_fd = -1, err;
+ char one = 1;
err = recvfd(sock_fd, &fs_fd);
if (!ASSERT_OK(err, "recv_bpffs_fd"))
goto cleanup;
- mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
- if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+ err = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) {
err = -EINVAL;
goto cleanup;
}
- zclose(fs_fd);
- /* pass BPF FS context object to parent */
- err = sendfd(sock_fd, mnt_fd);
- if (!ASSERT_OK(err, "send_mnt_fd"))
+ /* notify the child that we did the fsconfig() calls and it can proceed. */
+ err = write(sock_fd, &one, sizeof(one));
+ if (!ASSERT_EQ(err, sizeof(one), "send_one"))
goto cleanup;
- zclose(mnt_fd);
+ zclose(fs_fd);
/* receive BPF token FD back from child for some extra tests */
err = recvfd(sock_fd, &token_fd);
@@ -459,7 +461,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
cleanup:
zclose(sock_fd);
zclose(fs_fd);
- zclose(mnt_fd);
zclose(token_fd);
if (child_pid > 0)
@@ -1046,6 +1047,41 @@ err_out:
#define bit(n) (1ULL << (n))
+static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ int err, token_fd = -1;
+ struct bpf_token_info info;
+ u32 len = sizeof(struct bpf_token_info);
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ memset(&info, 0, len);
+ err = bpf_obj_get_info_by_fd(token_fd, &info, &len);
+ if (!ASSERT_ERR(err, "bpf_obj_get_token_info"))
+ goto cleanup;
+ if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* The BPF_PROG_TYPE_EXT is not set in token */
+ if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext"))
+ err = -EINVAL;
+
+cleanup:
+ zclose(token_fd);
+ return err;
+}
+
void test_token(void)
{
if (test__start_subtest("map_token")) {
@@ -1149,4 +1185,13 @@ void test_token(void)
subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
}
+ if (test__start_subtest("bpf_token_info")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .progs = bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_bpf_token_info);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index a222df765bc3..10e231965589 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -28,10 +28,62 @@ out:
tracing_failure__destroy(skel);
}
+static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg)
+{
+ struct tracing_failure *skel;
+ struct bpf_program *prog;
+ char log_buf[256];
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+
+ bpf_program__set_autoload(prog, true);
+ bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_ERR(err, "tracing_failure__load"))
+ goto out;
+
+ ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf");
+out:
+ tracing_failure__destroy(skel);
+}
+
+static void test_tracing_deny(void)
+{
+ int btf_id;
+
+ /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */
+ btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY);
+ if (btf_id <= 0) {
+ test__skip();
+ return;
+ }
+
+ test_tracing_fail_prog("tracing_deny",
+ "Attaching tracing programs to function '__rcu_read_lock' is rejected.");
+}
+
+static void test_fexit_noreturns(void)
+{
+ test_tracing_fail_prog("fexit_noreturns",
+ "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+}
+
void test_tracing_failure(void)
{
if (test__start_subtest("bpf_spin_lock"))
test_bpf_spin_lock(true);
if (test__start_subtest("bpf_spin_unlock"))
test_bpf_spin_lock(false);
+ if (test__start_subtest("tracing_deny"))
+ test_tracing_deny();
+ if (test__start_subtest("fexit_noreturns"))
+ test_fexit_noreturns();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index c397336fe1ed..b17dc39a23db 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -251,7 +251,7 @@ static void test_uretprobe_syscall_call(void)
.retprobe = true,
);
struct uprobe_syscall_executed *skel;
- int pid, status, err, go[2], c;
+ int pid, status, err, go[2], c = 0;
if (!ASSERT_OK(pipe(go), "pipe"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 495d66414b57..9057e983cc54 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -270,8 +270,16 @@ static void subtest_multispec_usdt(void)
*/
trigger_300_usdts();
- /* we'll reuse usdt_100 BPF program for usdt_300 test */
bpf_link__destroy(skel->links.usdt_100);
+
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ /* If built with arm64/clang, there will be much less number of specs
+ * for usdt_300 call sites.
+ */
+#if !defined(__aarch64__) || !defined(__clang__)
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
"test", "usdt_300", NULL);
err = -errno;
@@ -282,13 +290,11 @@ static void subtest_multispec_usdt(void)
/* let's check that there are no "dangling" BPF programs attached due
* to partial success of the above test:usdt_300 attachment
*/
- bss->usdt_100_called = 0;
- bss->usdt_100_sum = 0;
-
f300(777); /* this is 301st instance of usdt_300 */
ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+#endif
/* This time we have USDT with 400 inlined invocations, but arg specs
* should be the same across all sites, so libbpf will only need to
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index d424e7ecbd12..9fd3ae987321 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -21,8 +21,7 @@
#include "../progs/test_user_ringbuf.h"
static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
-static const long c_ringbuf_size = 1 << 12; /* 1 small page */
-static const long c_max_entries = c_ringbuf_size / c_sample_size;
+static long c_ringbuf_size, c_max_entries;
static void drain_current_samples(void)
{
@@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void)
uint32_t remaining_samples = total_samples;
int err;
- BUILD_BUG_ON(total_samples <= c_max_entries);
+ if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries"))
+ return;
+
err = load_skel_create_user_ringbuf(&skel, &ringbuf);
if (err)
return;
@@ -686,6 +687,9 @@ void test_user_ringbuf(void)
{
int i;
+ c_ringbuf_size = getpagesize(); /* 1 page */
+ c_max_entries = c_ringbuf_size / c_sample_size;
+
for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
if (!test__start_subtest(success_tests[i].test_name))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index c9da06741104..77ec95d4ffaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -85,6 +85,7 @@
#include "verifier_store_release.skel.h"
#include "verifier_subprog_precision.skel.h"
#include "verifier_subreg.skel.h"
+#include "verifier_tailcall.skel.h"
#include "verifier_tailcall_jit.skel.h"
#include "verifier_typedef.skel.h"
#include "verifier_uninit.skel.h"
@@ -219,6 +220,7 @@ void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
void test_verifier_store_release(void) { RUN(verifier_store_release); }
void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
+void test_verifier_tailcall(void) { RUN(verifier_tailcall); }
void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }
void test_verifier_typedef(void) { RUN(verifier_typedef); }
void test_verifier_uninit(void) { RUN(verifier_uninit); }
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index ab0f02faa80c..4d69d9d55e17 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -268,7 +268,7 @@ static void test_verify_pkcs7_sig_from_map(void)
char *tmp_dir;
struct test_verify_pkcs7_sig *skel = NULL;
struct bpf_map *map;
- struct data data;
+ struct data data = {};
int ret, zero = 0;
/* Trigger creation of session keyring. */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index b2b2d85dbb1b..43264347e7d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void)
bpf_object__close(obj);
}
-static void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(bool is_64k_pagesize)
{
const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
struct bpf_object *obj;
- char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ char buf[8192]; /* avoid segfault: large buf to hold grow results */
__u32 expect_sz;
int err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 1,
);
+ /* topts.data_size_in as a special signal to bpf prog */
+ if (is_64k_pagesize)
+ topts.data_size_in = sizeof(pkt_v4) - 1;
+ else
+ topts.data_size_in = sizeof(pkt_v4);
+
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
@@ -208,7 +213,7 @@ out:
bpf_object__close(obj);
}
-static void test_xdp_adjust_frags_tail_grow(void)
+static void test_xdp_adjust_frags_tail_grow_4k(void)
{
const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
__u32 exp_size;
@@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void)
ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
- for (i = 0; i < 9000; i++)
- ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ for (i = 0; i < 9000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ }
- for (i = 9000; i < 9010; i++)
- ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ for (i = 9000; i < 9010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ }
- for (i = 9010; i < 16384; i++)
- ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ for (i = 9010; i < 16384; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ }
/* Test a too large grow */
memset(buf, 1, 16384);
@@ -273,16 +284,93 @@ out:
bpf_object__close(obj);
}
+static void test_xdp_adjust_frags_tail_grow_64k(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(262144);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 262144);
+ exp_size = 90000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 90000;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ for (i = 0; i < 90000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-old");
+ }
+
+ for (i = 90000; i < 90010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "90Kb+10b-new");
+ }
+
+ for (i = 90010; i < 262144; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched");
+ }
+
+ /* Test a too large grow */
+ memset(buf, 1, 262144);
+ exp_size = 90001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 90001;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
void test_xdp_adjust_tail(void)
{
+ int page_size = getpagesize();
+
if (test__start_subtest("xdp_adjust_tail_shrink"))
test_xdp_adjust_tail_shrink();
if (test__start_subtest("xdp_adjust_tail_grow"))
- test_xdp_adjust_tail_grow();
+ test_xdp_adjust_tail_grow(page_size == 65536);
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
test_xdp_adjust_frags_tail_shrink();
- if (test__start_subtest("xdp_adjust_frags_tail_grow"))
- test_xdp_adjust_frags_tail_grow();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow")) {
+ if (page_size == 65536)
+ test_xdp_adjust_frags_tail_grow_64k();
+ else
+ test_xdp_adjust_frags_tail_grow_4k();
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 7dac044664ac..dd34b0cc4b4e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
#else
#define MAX_PKT_SIZE 3408
#endif
+
+#define PAGE_SIZE_4K 4096
+#define PAGE_SIZE_64K 65536
+
static void test_max_pkt_size(int fd)
{
- char data[MAX_PKT_SIZE + 1] = {};
+ char data[PAGE_SIZE_64K + 1] = {};
int err;
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = MAX_PKT_SIZE,
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
+
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K;
+ else
+ opts.data_size_in = MAX_PKT_SIZE;
+
err = bpf_prog_test_run_opts(fd, &opts);
ASSERT_OK(err, "prog_run_max_size");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
new file mode 100644
index 000000000000..2f20485e0de3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 value = 0;
+
+ if (ctx->value == (void *)0)
+ return 0;
+
+ bpf_probe_read_kernel(&value, sizeof(value), ctx->value);
+ value_sum += value;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 6e208e24ba3b..530752ddde8e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -83,9 +83,11 @@
* expect return value to match passed parameter:
* - a decimal number
* - a hexadecimal number, when starts from 0x
- * - literal INT_MIN
- * - literal POINTER_VALUE (see definition below)
- * - literal TEST_DATA_LEN (see definition below)
+ * - a macro which expands to one of the above
+ * - literal _INT_MIN (expands to INT_MIN)
+ * In addition, two special macros are defined below:
+ * - POINTER_VALUE
+ * - TEST_DATA_LEN
* __retval_unpriv Same, but load program in unprivileged mode.
*
* __description Text to be used instead of a program name for display
@@ -125,8 +127,8 @@
#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
-#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val)))
-#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
+#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val))))
+#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val))))
#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
@@ -155,7 +157,7 @@
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
/* Magic constants used with __retval() */
-#define POINTER_VALUE 0xcafe4all
+#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
#ifndef __used
@@ -231,4 +233,12 @@
#define CAN_USE_LOAD_ACQ_STORE_REL
#endif
+#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)
+#define SPEC_V1
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define SPEC_V4
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
new file mode 100644
index 000000000000..6a0ea02c4de2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/getsockopt")
+int getsockopt_1(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_2(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_3(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_4(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
new file mode 100644
index 000000000000..092db1d0435e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+char value[16];
+
+static __always_inline void read_xattr(struct cgroup *cgroup)
+{
+ struct bpf_dynptr value_ptr;
+
+ bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+ bpf_cgroup_read_xattr(cgroup, "user.bpf_test",
+ &value_ptr);
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_non_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_css_iter_non_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("expected an RCU CS")
+int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(use_css_iter_sleepable_with_rcu_lock)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ bpf_rcu_read_lock();
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ goto out;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_bpf_cgroup_ancestor)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp, *ancestor;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, 1);
+ if (!ancestor)
+ goto out;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(ancestor);
+out:
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int BPF_PROG(cgroup_skb)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp, *ancestor;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, 1);
+ if (!ancestor)
+ goto out;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(ancestor);
+out:
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
index f3d79aecbf93..6884ab99a421 100644
--- a/tools/testing/selftests/bpf/progs/compute_live_registers.c
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -240,6 +240,22 @@ __naked void if2(void)
::: __clobber_all);
}
+/* Verifier misses that r2 is alive if jset is not handled properly */
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (45) if r1 & 0x7 goto pc+1")
+__naked void if3_jset_bug(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 & 0x7 goto +1;"
+ "exit;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
SEC("socket")
__log_level(2)
__msg("0: .......... (b7) r1 = 0")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index a0391f9da2d4..8315273cb900 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -9,6 +9,8 @@
#include "bpf_misc.h"
#include "errno.h"
+#define PAGE_SIZE_64K 65536
+
char _license[] SEC("license") = "GPL";
int pid, err, val;
@@ -611,11 +613,12 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp)
struct bpf_dynptr ptr_buf, ptr_xdp;
char data[] = "qwertyuiopasdfghjkl";
char buf[32] = {'\0'};
- __u32 len = sizeof(data);
+ __u32 len = sizeof(data), xdp_data_size;
int i, chunks = 200;
/* ptr_xdp is backed by non-contiguous memory */
bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ xdp_data_size = bpf_dynptr_size(&ptr_xdp);
bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf);
/* Destination dynptr is backed by non-contiguous memory */
@@ -673,7 +676,7 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp)
goto out;
}
- if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG)
+ if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG)
err = 1;
out:
@@ -681,6 +684,173 @@ out:
return XDP_DROP;
}
+char memset_zero_data[] = "data to be zeroed";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_zero_data);
+ char zeroes[32] = {'\0'};
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0);
+ err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz);
+
+ return 0;
+}
+
+#define DYNPTR_MEMSET_VAL 42
+
+char memset_notzero_data[] = "data to be overwritten";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_notzero(void *ctx)
+{
+ u32 data_sz = sizeof(memset_notzero_data);
+ struct bpf_dynptr ptr;
+ char expected[32];
+
+ __builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz);
+
+ err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL);
+ err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz);
+
+ return 0;
+}
+
+char memset_zero_offset_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_offset(void *ctx)
+{
+ char expected[] = "data to \0\0\0\0eroed partially";
+ __u32 data_sz = sizeof(memset_zero_offset_data);
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0);
+ err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz);
+
+ return 0;
+}
+
+char memset_zero_adjusted_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_adjusted(void *ctx)
+{
+ char expected[] = "data\0\0\0\0be zeroed partially";
+ __u32 data_sz = sizeof(memset_zero_adjusted_data);
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_adjust(&ptr, 4, 8);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+ err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz);
+
+ return 0;
+}
+
+char memset_overflow_data[] = "memset overflow data";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_overflow_data);
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+ ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0);
+ if (ret != -E2BIG)
+ err = 1;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow_offset(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_overflow_data);
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+ ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0);
+ if (ret != -E2BIG)
+ err = 1;
+
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_memset_readonly(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* cgroup skbs are read only, memset should fail */
+ ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+ if (ret != -EINVAL)
+ err = 1;
+
+ return 0;
+}
+
+#define min_t(type, x, y) ({ \
+ type __x = (x); \
+ type __y = (y); \
+ __x < __y ? __x : __y; })
+
+SEC("xdp")
+int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp)
+{
+ u32 data_sz, chunk_sz, offset = 0;
+ const int max_chunks = 200;
+ struct bpf_dynptr ptr_xdp;
+ char expected_buf[32];
+ char buf[32];
+ int i;
+
+ __builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf));
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ data_sz = bpf_dynptr_size(&ptr_xdp);
+
+ err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL);
+ if (err) {
+ /* bpf_dynptr_memset() eventually called bpf_xdp_pointer()
+ * where if data_sz is greater than 0xffff, -EFAULT will be
+ * returned. For 64K page size, data_sz is greater than
+ * 64K, so error is expected and let us zero out error and
+ * return success.
+ */
+ if (data_sz >= PAGE_SIZE_64K)
+ err = 0;
+ goto out;
+ }
+
+ bpf_for(i, 0, max_chunks) {
+ offset = i * sizeof(buf);
+ if (offset >= data_sz)
+ goto out;
+ chunk_sz = min_t(u32, sizeof(buf), data_sz - offset);
+ err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0);
+ if (err)
+ goto out;
+ err = bpf_memcmp(buf, expected_buf, sizeof(buf));
+ if (err)
+ goto out;
+ }
+out:
+ return XDP_DROP;
+}
+
void *user_ptr;
/* Contains the copy of the data pointed by user_ptr.
* Size 384 to make it not fit into a single kernel chunk when copying
diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c
deleted file mode 100644
index 54654539f550..000000000000
--- a/tools/testing/selftests/bpf/progs/fexit_noreturns.c
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-#include "bpf_misc.h"
-
-char _license[] SEC("license") = "GPL";
-
-SEC("fexit/do_exit")
-__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.")
-int BPF_PROG(noreturns)
-{
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 76adf4a8f2da..7dd92a303bf6 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1649,4 +1649,281 @@ int clean_live_states(const void *ctx)
return 0;
}
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "call noop;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int noop(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32())) {
+ * r6 = -31;
+ * jump_into_loop:
+ * goto +0;
+ * goto loop;
+ * }
+ * if (unlikely(bpf_get_prandom_u32()))
+ * goto jump_into_loop;
+ * loop:
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto jump_into_loop_%=;"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "jump_into_loop_%=: "
+ "goto +0;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state3(void)
+{
+ /*
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * loop1(-32, &fp[-8])
+ * loop1_wrapper(&fp[-8])
+ * bpf_iter_num_destroy(&fp[-8])
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ /* call #1 */
+ "r1 = -32;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call loop1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ /* call #2 */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call loop1_wrapper;"
+ /* return */
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1(void)
+{
+ /*
+ * int loop1(num, iter) {
+ * r6 = num;
+ * r7 = iter;
+ * while (bpf_iter_num_next(r7)) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * return 0
+ * }
+ */
+ asm volatile (
+ "r6 = r1;"
+ "r7 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "loop_%=:"
+ "r1 = r7;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1_wrapper(void)
+{
+ /*
+ * int loop1_wrapper(iter) {
+ * r6 = -32;
+ * r7 = iter;
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * loop1(r6, r7);
+ * return 0;
+ * }
+ */
+ asm volatile (
+ "r6 = -32;"
+ "r7 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "r1 = r6;"
+ "r2 = r7;"
+ "call loop1;"
+ "r0 = 0;"
+ "exit;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..4f94c971ae86
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)")
+__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar")
+int btf_id_to_ptr_mem(void *ctx)
+{
+ struct task_struct *task;
+ struct nameidata *idata;
+ u64 ret, off;
+
+ task = bpf_get_current_task_btf();
+ idata = task->nameidata;
+ off = bpf_core_field_offset(struct nameidata, pathname);
+ /*
+ * asm block to have reliable match target for __msg, equivalent of:
+ * ret = task->nameidata->pathname[0];
+ */
+ asm volatile (
+ "r7 = %[idata];"
+ "r7 += %[off];"
+ "r8 = *(u64 *)(r7 + 0);"
+ "r9 = *(u8 *)(r8 + 0);"
+ "%[ret] = r9;"
+ : [ret]"=r"(ret)
+ : [idata]"r"(idata),
+ [off]"r"(off)
+ : "r7", "r8", "r9");
+ return ret;
+}
+
+SEC("socket")
+__success
+__retval(0)
+int ldx_is_ok_bad_addr(void *ctx)
+{
+ char *p;
+
+ if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
+ return 42;
+
+ p = bpf_rdonly_cast(0, 0);
+ return p[0x7fff];
+}
+
+SEC("socket")
+__success
+__retval(1)
+int ldx_is_ok_good_addr(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return *p;
+}
+
+SEC("socket")
+__success
+int offset_not_tracked(void *ctx)
+{
+ int *p, i, s;
+
+ p = bpf_rdonly_cast(0, 0);
+ s = 0;
+ bpf_for(i, 0, 1000 * 1000 * 1000) {
+ p++;
+ s += *p;
+ }
+ return s;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int stx_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ *p = 1;
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ __sync_fetch_and_add(p, 1);
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_rmw_not_ok(void *ctx)
+{
+ long v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return __sync_val_compare_and_swap(p, 0, 42);
+}
+
+SEC("socket")
+__failure
+__msg("invalid access to memory, mem_size=0 off=0 size=4")
+__msg("R1 min value is outside of the allowed memory range")
+int kfunc_param_not_ok(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ bpf_kfunc_trusted_num_test(p);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure
+__msg("R1 type=rdonly_untrusted_mem expected=")
+int helper_param_not_ok(void *ctx)
+{
+ char *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ /*
+ * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
+ * the most permissive constraint
+ */
+ bpf_copy_from_user(p, 0, (void *)42);
+ return 0;
+}
+
+static __noinline u64 *get_some_addr(void)
+{
+ if (bpf_get_prandom_u32())
+ return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
+ else
+ return bpf_rdonly_cast(0, 0);
+}
+
+SEC("socket")
+__success
+__retval(0)
+int mixed_mem_type(void *ctx)
+{
+ u64 *p;
+
+ /* Try to avoid compiler hoisting load to if branches by using __noinline func. */
+ p = get_some_addr();
+ return *p;
+}
+
+__attribute__((__aligned__(8)))
+u8 global[] = {
+ 0x11, 0x22, 0x33, 0x44,
+ 0x55, 0x66, 0x77, 0x88,
+ 0x99
+};
+
+__always_inline
+static u64 combine(void *p)
+{
+ u64 acc;
+
+ acc = 0;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ acc |= (*(u64 *)p >> 56) << 24;
+ acc |= (*(u32 *)p >> 24) << 16;
+ acc |= (*(u16 *)p >> 8) << 8;
+ acc |= *(u8 *)p;
+#else
+ acc |= (*(u64 *)p & 0xff) << 24;
+ acc |= (*(u32 *)p & 0xff) << 16;
+ acc |= (*(u16 *)p & 0xff) << 8;
+ acc |= *(u8 *)p;
+#endif
+ return acc;
+}
+
+SEC("socket")
+__retval(0x88442211)
+int diff_size_access(void *ctx)
+{
+ return combine(bpf_rdonly_cast(&global, 0));
+}
+
+SEC("socket")
+__retval(0x99553322)
+int misaligned_access(void *ctx)
+{
+ return combine(bpf_rdonly_cast(&global, 0) + 1);
+}
+
+__weak int return_one(void)
+{
+ return 1;
+}
+
+SEC("socket")
+__success
+__retval(1)
+int null_check(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ if (p == 0)
+ /* make this a function call to avoid compiler
+ * moving r0 assignment before check.
+ */
+ return return_one();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index a3620c15c136..49fe93d7e059 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
}
m->key = 1;
- bpf_spin_lock(&glock);
- bpf_rbtree_add(&groot, &n->node, less);
- bpf_rbtree_add(&groot, &m->node, less);
- bpf_spin_unlock(&glock);
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_rbtree_add(root, &m->node, less);
+ bpf_spin_unlock(lock);
n = bpf_obj_new(typeof(*n));
if (!n)
return 3;
n->key = 3;
- bpf_spin_lock(&glock);
- bpf_rbtree_add(&groot, &n->node, less);
- bpf_spin_unlock(&glock);
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_spin_unlock(lock);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 43637ee2cdcd..3a868a199349 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -16,10 +16,11 @@ struct {
__type(value, long);
} map_a SEC(".maps");
-__u32 user_data, key_serial, target_pid;
+__u32 user_data, target_pid;
+__s32 key_serial;
__u64 flags, task_storage_val, cgroup_id;
-struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
new file mode 100644
index 000000000000..405adbe5e8b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid = 0;
+
+char xattr_value[64];
+static const char expected_value_a[] = "bpf_selftest_value_a";
+static const char expected_value_b[] = "bpf_selftest_value_b";
+bool found_value_a;
+bool found_value_b;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css, *tmp;
+ struct bpf_dynptr value_ptr;
+ struct cgroup *cgrp;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ css = &cgrp->self;
+ bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr);
+ bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) {
+ int ret;
+
+ ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test",
+ &value_ptr);
+ if (ret < 0)
+ continue;
+
+ if (ret == sizeof(expected_value_a) &&
+ !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a))
+ found_value_a = true;
+ if (ret == sizeof(expected_value_b) &&
+ !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b))
+ found_value_b = true;
+ }
+
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(cgrp);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c
new file mode 100644
index 000000000000..7216b3450e96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define EPERM 1 /* Operation not permitted */
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE 0x2
+
+struct map;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} prot_status_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} prot_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} not_prot_map SEC(".maps");
+
+SEC("fmod_ret/security_bpf_map")
+int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode)
+{
+ __u32 key = 0;
+ __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key);
+
+ if (!status_ptr || !*status_ptr)
+ return 0;
+
+ if (map == &prot_map) {
+ /* Allow read-only access */
+ if (fmode & FMODE_WRITE)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/*
+ * This program keeps references to maps. This is needed to prevent
+ * optimizing them out.
+ */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_dummy1, int a)
+{
+ __u32 key = 0;
+ __u32 val1 = a;
+ __u32 val2 = a + 1;
+
+ bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY);
+ bpf_map_update_elem(&not_prot_map, &key, &val2, BPF_ANY);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
index 90f5656c3991..ebaef28b2cb3 100644
--- a/tools/testing/selftests/bpf/progs/set_global_vars.c
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -7,22 +7,30 @@
char _license[] SEC("license") = "GPL";
-enum Enum { EA1 = 0, EA2 = 11 };
+typedef __s32 s32;
+typedef s32 i32;
+typedef __u8 u8;
+
+enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 };
enum Enumu64 {EB1 = 0llu, EB2 = 12llu };
enum Enums64 { EC1 = 0ll, EC2 = 13ll };
const volatile __s64 var_s64 = -1;
const volatile __u64 var_u64 = 0;
-const volatile __s32 var_s32 = -1;
+const volatile i32 var_s32 = -1;
const volatile __u32 var_u32 = 0;
const volatile __s16 var_s16 = -1;
const volatile __u16 var_u16 = 0;
const volatile __s8 var_s8 = -1;
-const volatile __u8 var_u8 = 0;
+const volatile u8 var_u8 = 0;
const volatile enum Enum var_ea = EA1;
const volatile enum Enumu64 var_eb = EB1;
const volatile enum Enums64 var_ec = EC1;
const volatile bool var_b = false;
+const volatile i32 arr[32];
+const volatile enum Enum enum_arr[32];
+const volatile i32 three_d[47][19][17];
+const volatile i32 *ptr_arr[32];
struct Struct {
int:16;
@@ -35,34 +43,38 @@ struct Struct {
volatile struct {
const int:1;
union {
- const volatile __u8 var_u8;
+ const volatile u8 var_u8[3];
const volatile __s16 filler3;
const int:1;
+ s32 mat[7][5];
} u;
};
- } struct2;
+ } struct2[2][4];
};
const volatile __u32 stru = 0; /* same prefix as below */
-const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}};
+const volatile struct Struct struct1[3];
+const volatile struct Struct struct11[11][7];
-union Union {
- __u16 var_u16;
- struct Struct3 {
- struct {
- __u8 var_u8_l;
- };
+struct Struct3 {
+ struct {
+ u8 var_u8_l;
+ };
+ struct {
struct {
- struct {
- __u8 var_u8_h;
- };
+ u8 var_u8_h;
};
- } struct3;
+ };
};
-const volatile union Union union1 = {.var_u16 = -1};
+typedef struct Struct3 Struct3_t;
-char arr[4] = {0};
+union Union {
+ __u16 var_u16;
+ Struct3_t struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
SEC("socket")
int test_set_globals(void *ctx)
@@ -81,8 +93,14 @@ int test_set_globals(void *ctx)
a = var_eb;
a = var_ec;
a = var_b;
- a = struct1.struct2.u.var_u8;
+ a = struct1[2].struct2[1][2].u.var_u8[2];
a = union1.var_u16;
+ a = arr[3];
+ a = arr[EA2];
+ a = enum_arr[EC2];
+ a = three_d[31][7][EA2];
+ a = struct1[2].struct2[1][2].u.mat[5][3];
+ a = struct11[7][5].struct2[0][1].u.mat[3][0];
return a;
}
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 8f483337e103..77966ded5467 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -23,6 +23,7 @@ static bool ipv4_addr_loopback(__be32 a)
}
volatile const unsigned int sf;
+volatile const unsigned int ss;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -42,16 +43,18 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != sf ||
- sk->sk_state != TCP_LISTEN ||
- sk->sk_family == AF_INET6 ?
+ (ss && sk->sk_state != ss) ||
+ (sk->sk_family == AF_INET6 ?
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
- !ipv4_addr_loopback(sk->sk_rcv_saddr))
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
@@ -67,6 +70,27 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
return 0;
}
+volatile const __u64 destroy_cookie;
+
+SEC("iter/tcp")
+int iter_tcp_destroy(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = (struct sock_common *)ctx->sk_common;
+ __u64 sock_cookie;
+
+ if (!sk_common)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ if (sock_cookie != destroy_cookie)
+ return 0;
+
+ bpf_sock_destroy(sk_common);
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk)
SEC("iter/udp")
@@ -83,15 +107,17 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != sf ||
- sk->sk_family == AF_INET6 ?
+ (sk->sk_family == AF_INET6 ?
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
- !ipv4_addr_loopback(sk->sk_rcv_saddr))
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
new file mode 100644
index 000000000000..35790897dc87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+#define ENOSPC 28
+#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+int size;
+
+SEC("syscall")
+__success __retval(0)
+int stream_exhaust(void *ctx)
+{
+ /* Use global variable for loop convergence. */
+ size = 0;
+ bpf_repeat(BPF_MAX_LOOPS) {
+ if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954)
+ return 0;
+ size += sizeof(_STR) - 1;
+ }
+ return 1;
+}
+
+SEC("syscall")
+__success __retval(0)
+int stream_cond_break(void *ctx)
+{
+ while (can_loop)
+ ;
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int stream_deadlock(void *ctx)
+{
+ struct bpf_res_spin_lock *lock, *nlock;
+
+ lock = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!lock)
+ return 1;
+ nlock = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!nlock)
+ return 1;
+ if (bpf_res_spin_lock(lock))
+ return 1;
+ if (bpf_res_spin_lock(nlock)) {
+ bpf_res_spin_unlock(lock);
+ return 0;
+ }
+ bpf_res_spin_unlock(nlock);
+ bpf_res_spin_unlock(lock);
+ return 1;
+}
+
+SEC("syscall")
+__success __retval(0)
+int stream_syscall(void *ctx)
+{
+ bpf_stream_printk(BPF_STDOUT, "foo");
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
new file mode 100644
index 000000000000..b4a0d0cc8ec8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+SEC("syscall")
+__failure __msg("Possibly NULL pointer passed")
+int stream_vprintk_null_arg(void *ctx)
+{
+ bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL);
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("R3 type=scalar expected=")
+int stream_vprintk_scalar_arg(void *ctx)
+{
+ bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL);
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("arg#1 doesn't point to a const string")
+int stream_vprintk_string_arg(void *ctx)
+{
+ bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
new file mode 100644
index 000000000000..53af438bd998
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char *user_ptr = (char *)1;
+char *invalid_kern_ptr = (char *)-1;
+
+/*
+ * When passing userspace pointers, the error code differs based on arch:
+ * -ERANGE on arches with non-overlapping address spaces
+ * -EFAULT on other arches
+ */
+#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \
+ defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86)
+#define USER_PTR_ERR -ERANGE
+#else
+#define USER_PTR_ERR -EFAULT
+#endif
+
+/*
+ * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and
+ * user_ptr (instead of causing an exception) so the below two groups of tests
+ * are not applicable.
+ */
+#ifndef __TARGET_ARCH_s390
+
+/* Passing NULL to string kfuncs (treated as a userspace ptr) */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); }
+
+/* Passing userspace ptr to string kfuncs */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); }
+
+#endif /* __TARGET_ARCH_s390 */
+
+/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
new file mode 100644
index 000000000000..89fb4669b0e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+
+char long_str[XATTR_SIZE_MAX + 1];
+
+SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
+SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
+SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
+SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); }
+SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); }
+SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); }
+SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); }
+SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); }
+SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); }
+SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); }
+SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); }
+SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
new file mode 100644
index 000000000000..46697f381878
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char str[] = "hello world";
+
+#define __test(retval) SEC("syscall") __success __retval(retval)
+
+/* Functional tests */
+__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
+__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
+__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
+__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
+__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); }
+__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); }
+__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); }
+__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); }
+__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); }
+__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); }
+__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); }
+__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); }
+__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); }
+__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); }
+__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); }
+__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
+__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
+__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
+__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
+__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
index 0e4d2ff63ab8..dbe646013811 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
index 58d5d8dc2235..3d89ad7cbe2a 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
index 31e58389bb8b..b1f6d7e5a8e5 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
index cdbbb12f1491..1f7e1e59b073 100644
--- a/tools/testing/selftests/bpf/progs/test_lookup_key.c
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -14,11 +14,11 @@
char _license[] SEC("license") = "GPL";
__u32 monitored_pid;
-__u32 key_serial;
+__s32 key_serial;
__u32 key_id;
__u64 flags;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
index 350513c0e4c9..f063a0013f85 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx)
if (cur_pid != pid)
return 0;
- sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
if (!sample1)
return 0;
/* first one can pass */
- sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
if (!sample2) {
bpf_ringbuf_discard(sample1, 0);
__sync_fetch_and_add(&discarded, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
index 8ef6b39335b6..34b30e2603f0 100644
--- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
__u32 monitored_pid;
char sig[MAX_SIG_SIZE];
__u32 sig_size;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
index 2796dd8545eb..1c7941a4ad00 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -1,8 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 ByteDance */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
@@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb)
change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
return SK_PASS;
} else if (data[0] == 'E') { /* Error */
- change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
return SK_PASS;
}
return SK_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
index 8bdb9987c0c7..83df4919c224 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -7,6 +7,8 @@ int cork_byte;
int push_start;
int push_end;
int apply_bytes;
+int pop_start;
+int pop_end;
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
@@ -22,6 +24,8 @@ int prog_sk_policy(struct sk_msg_md *msg)
bpf_msg_cork_bytes(msg, cork_byte);
if (push_start > 0 && push_end > 0)
bpf_msg_push_data(msg, push_start, push_end, 0);
+ if (pop_start >= 0 && pop_end > 0)
+ bpf_msg_pop_data(msg, pop_start, pop_end, 0);
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
index 28edafe803f0..fcba8299f0bc 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
long change_tail_ret = 1;
@@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb)
bpf_skb_change_tail(skb, len, 0);
return TCX_PASS;
} else if (payload[0] == 'E') { /* Error */
- change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
return TCX_PASS;
} else if (payload[0] == 'Z') { /* Zero */
change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index e96d09e11115..ff8d755548b9 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -17,7 +17,7 @@
#define MAX_SIG_SIZE 1024
__u32 monitored_pid;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
__u64 system_keyring_id;
struct data {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index dc74d8cf9e3f..5904f45cfbc4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
/* Data length determine test case */
if (data_len == 54) { /* sizeof(pkt_v4) */
- offset = 4096; /* test too large offset */
+ offset = 4096; /* test too large offset, 4k page size */
+ } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */
+ offset = 65536; /* test too large offset, 64k page size */
} else if (data_len == 74) { /* sizeof(pkt_v6) */
offset = 40;
} else if (data_len == 64) {
@@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
offset = 10;
} else if (data_len == 9001) {
offset = 4096;
+ } else if (data_len == 90000) {
+ offset = 10; /* test a small offset, 64k page size */
+ } else if (data_len == 90001) {
+ offset = 65536; /* test too large offset, 64k page size */
} else {
return XDP_ABORTED; /* No matching test */
}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
index d41665d2ec8c..65e485c4468c 100644
--- a/tools/testing/selftests/bpf/progs/tracing_failure.c
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -18,3 +18,15 @@ int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
{
return 0;
}
+
+SEC("?fentry/__rcu_read_lock")
+int BPF_PROG(tracing_deny)
+{
+ return 0;
+}
+
+SEC("?fexit/do_exit")
+int BPF_PROG(fexit_noreturns)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
index e97e518516b6..2b4fdca162be 100644
--- a/tools/testing/selftests/bpf/progs/verifier_and.c
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -85,8 +85,14 @@ l0_%=: r0 = r0; \
SEC("socket")
__description("check known subreg with unknown reg")
-__success __failure_unpriv __msg_unpriv("R1 !read_ok")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w0 < 0x1 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */
+__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void known_subreg_with_unknown_reg(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 67509c5d3982..7f4827eede3c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -3,6 +3,7 @@
#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
@@ -114,6 +115,111 @@ int basic_alloc3(void *ctx)
return 0;
}
+SEC("syscall")
+__success __retval(0)
+int basic_reserve1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ page += __PAGE_SIZE;
+
+ /* Reserve the second page */
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 2;
+
+ /* Try to explicitly allocate the reserved page. */
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 3;
+
+ /* Try to implicitly allocate the page (since there's only 2 of them). */
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 4;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_reserve2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if ((u64)page)
+ return 2;
+#endif
+ return 0;
+}
+
+/* Reserve the same page twice, should return -EBUSY. */
+SEC("syscall")
+__success __retval(0)
+int reserve_twice(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret != -EBUSY)
+ return 2;
+#endif
+ return 0;
+}
+
+/* Try to reserve past the end of the arena. */
+SEC("syscall")
+__success __retval(0)
+int reserve_invalid_region(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ /* Try a NULL pointer. */
+ ret = bpf_arena_reserve_pages(&arena, NULL, 3);
+ if (ret != -EINVAL)
+ return 1;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 3);
+ if (ret != -EINVAL)
+ return 2;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 4096);
+ if (ret != -EINVAL)
+ return 3;
+
+ ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
+ if (ret != -EINVAL)
+ return 4;
+#endif
+ return 0;
+}
+
SEC("iter.s/bpf_map")
__success __log_level(2)
int iter_maps1(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index f94f30cf1bb8..9dbdf123542d 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -67,6 +67,104 @@ int big_alloc1(void *ctx)
return 0;
}
+/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
+SEC("syscall")
+__success __retval(0)
+int access_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page;
+ char __arena *base;
+ const size_t len = 4;
+ int ret, i;
+
+ /* Get a separate region of the arena. */
+ page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
+
+ ret = bpf_arena_reserve_pages(&arena, base, len);
+ if (ret)
+ return 1;
+
+ /* Try to dirty reserved memory. */
+ for (i = 0; i < len && can_loop; i++)
+ *page = 0x5a;
+
+ for (i = 0; i < len && can_loop; i++) {
+ page = (volatile char __arena *)(base + i * PAGE_SIZE);
+
+ /*
+ * Error out in case either the write went through,
+ * or the address has random garbage.
+ */
+ if (*page == 0x5a)
+ return 2 + 2 * i;
+
+ if (*page)
+ return 2 + 2 * i + 1;
+ }
+#endif
+ return 0;
+}
+
+/* Try to allocate a region overlapping with a reservation. */
+SEC("syscall")
+__success __retval(0)
+int request_partially_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page;
+ char __arena *base;
+ int ret;
+
+ /* Add an arbitrary page offset. */
+ page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
+
+ ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
+ if ((u64)page != 0ULL)
+ return 2;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int free_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *addr;
+ char __arena *page;
+ int ret;
+
+ /* Add an arbitrary page offset. */
+ addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
+
+ page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
+ if (ret)
+ return 2;
+
+ /*
+ * Reserved and allocated pages should be interchangeable for
+ * bpf_arena_free_pages(). Free a reserved and an allocated
+ * page with a single call.
+ */
+ bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
+
+ /* The free call above should have succeeded, so this allocation should too. */
+ page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
+ if (!page)
+ return 3;
+#endif
+ return 0;
+}
+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
#define PAGE_CNT 100
__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 0eb33bb801b5..87a2c60d86e6 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -2,6 +2,7 @@
/* Converted from tools/testing/selftests/bpf/verifier/bounds.c */
#include <linux/bpf.h>
+#include <../../../include/linux/filter.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -620,8 +621,14 @@ l1_%=: exit; \
SEC("socket")
__description("bounds check mixed 32bit and 64bit arithmetic. test1")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void _32bit_and_64bit_arithmetic_test1(void)
{
asm volatile (" \
@@ -643,8 +650,14 @@ l1_%=: exit; \
SEC("socket")
__description("bounds check mixed 32bit and 64bit arithmetic. test2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void _32bit_and_64bit_arithmetic_test2(void)
{
asm volatile (" \
@@ -691,9 +704,14 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg = 0, reg xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_0_reg_xor_1(void)
{
asm volatile (" \
@@ -719,9 +737,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg32 = 0, reg32 xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg32_0_reg32_xor_1(void)
{
asm volatile (" \
@@ -747,9 +770,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg = 2, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 > 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_2_reg_xor_3(void)
{
asm volatile (" \
@@ -829,9 +857,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg > 0, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_0_reg_xor_3(void)
{
asm volatile (" \
@@ -858,9 +891,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg32 > 0, reg32 xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg32_0_reg32_xor_3(void)
{
asm volatile (" \
@@ -1028,7 +1066,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
__success __retval(0)
-__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
+__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void crossing_64_bit_signed_boundary_2(void)
{
asm volatile (" \
@@ -1334,4 +1372,300 @@ __naked void mult_sign_ovf(void)
__imm(bpf_skb_store_bytes)
: __clobber_all);
}
+
+SEC("socket")
+__description("64-bit addition, all outcomes overflow")
+__success __log_level(2)
+__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__retval(0)
+__naked void add64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 0xa000000000000000 ll;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()")
+__retval(0)
+__naked void add64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 2;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition overflow, all outcomes overflow")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 0xa0000000;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 2;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, all outcomes underflow")
+__success __log_level(2)
+__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)")
+__retval(0)
+__naked void sub64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 0x8000000000000000 ll;"
+ "r1 |= r2;"
+ "r3 = 0;"
+ "r3 -= r1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()")
+__retval(0)
+__naked void sub64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r3 = r0;"
+ "r2 = 1;"
+ "r3 -= r2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction overflow, all outcomes underflow")
+__success __log_level(2)
+__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w1 = w0;"
+ "w2 = 0x80000000;"
+ "w1 |= w2;"
+ "w3 = 0;"
+ "w3 -= w1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w3 = w0;"
+ "w2 = 1;"
+ "w3 -= w2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead branch on jset, does not result in invariants violation error")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_range_analysis(void)
+{
+ asm volatile (" \
+ call %[bpf_get_netns_cookie]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 & 0xffffffff goto +0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the negative side. At instruction 7, the ranges look as follows:
+ *
+ * 0 umin=0xfffffcf1 umax=0xff..ff6e U64_MAX
+ * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxxxx] [xxxxxxxxxxxx|
+ * 0 smax=0xeffffeee smin=-655 -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0 u64=[0xff..ffd71;0xff..ff6e] U64_MAX
+ * | [xxx] |
+ * |----------------------------|------------------------------|
+ * | [xxx] |
+ * 0 s64=[-655;-146] -1
+ *
+ * Without the deduction cross sign boundary, we end up with an invariant
+ * violation error.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, negative overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))")
+__retval(0)
+__naked void bounds_deduct_negative_overlap(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w3 = w0; \
+ w6 = (s8)w0; \
+ r0 = (s8)r0; \
+ if w6 >= 0xf0000000 goto l0_%=; \
+ r0 += r6; \
+ r6 += 400; \
+ r0 -= r6; \
+ if r3 < r0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the positive side. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0 umax=0xffffffffffffff00 U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx] [xxxxxxxx|
+ * 0 smax=127 smin=-128 -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0 u64=[0;127] U64_MAX
+ * [xxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * [xxxxxxxx] |
+ * 0 s64=[0;127] -1
+ *
+ * Without the deduction cross sign boundary, the program is rejected due to
+ * the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, positive overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
+__retval(0)
+__naked void bounds_deduct_positive_overlap(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 = (s8)r0; \
+ r1 = 0xffffffffffffff00; \
+ if r0 > r1 goto l0_%=; \
+ if r0 < 128 goto l0_%=; \
+ r10 = 0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test is the same as above, but the s64 and u64 ranges overlap in two
+ * places. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0 umax=0xffffffffffffff80 U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx] [xxxxxxxx|
+ * 0 smax=127 smin=-128 -1
+ *
+ * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and
+ * the program should fail due to the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, two overlaps")
+__failure __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
+__msg("frame pointer is read only")
+__naked void bounds_deduct_two_overlaps(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 = (s8)r0; \
+ r1 = 0xffffffffffffff80; \
+ if r0 > r1 goto l0_%=; \
+ if r0 < 128 goto l0_%=; \
+ r10 = 0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
index c506afbdd936..260a6df264e3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -159,13 +159,16 @@ __failure_unpriv
__naked void deducing_bounds_from_const_10(void)
{
asm volatile (" \
+ r6 = r1; \
r0 = 0; \
if r0 s<= 0 goto l0_%=; \
-l0_%=: /* Marks reg as unknown. */ \
- r0 = -r0; \
- r0 -= r1; \
+l0_%=: /* Marks r0 as unknown. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 -= r6; \
exit; \
-" ::: __clobber_all);
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index a83809a1dbbf..0450840c92d9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void)
: __clobber_all);
}
+#define narrow_load(type, ctx, field) \
+ SEC(type) \
+ __description("narrow load on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void invalid_narrow_load##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u32 *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(off, offsetof(struct ctx, field) + 4) \
+ : __clobber_all); \
+ }
+
+narrow_load("cgroup/getsockopt", bpf_sockopt, sk);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end);
+narrow_load("tc", __sk_buff, sk);
+narrow_load("cgroup/bind4", bpf_sock_addr, sk);
+narrow_load("sockops", bpf_sock_ops, sk);
+narrow_load("sockops", bpf_sock_ops, skb_data);
+narrow_load("sockops", bpf_sock_ops, skb_data_end);
+narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
index 458984da804c..34e0c012ee76 100644
--- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -77,7 +77,7 @@ l0_%=: exit; \
SEC("tc")
__description("MOD32 overflow, check 1")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
__naked void mod32_overflow_check_1(void)
{
asm volatile (" \
@@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void)
SEC("tc")
__description("MOD32 overflow, check 2")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
__naked void mod32_overflow_check_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 4ab0ef18d7eb..181da86ba5f0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -179,4 +179,132 @@ int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
return subprog_trusted_acq_rel(task);
}
+__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable)
+{
+ return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 untrusted cannot be combined with any other tags")
+int untrusted_bad_tags(void *ctx)
+{
+ return subprog_untrusted_bad_tags(0);
+}
+
+struct local_type_wont_be_accepted {};
+
+__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted)
+{
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches")
+int untrusted_bad_type(void *ctx)
+{
+ return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0));
+}
+
+__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted)
+{
+ return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_untrusted') is global and assumed valid.")
+__msg("Validating subprog_untrusted() func#1...")
+__msg(": R1=untrusted_ptr_task_struct")
+int trusted_to_untrusted(void *ctx)
+{
+ return subprog_untrusted(bpf_get_current_task_btf());
+}
+
+char mem[16];
+u32 off;
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted(void *ctx)
+{
+ /* untrusted to untrusted */
+ subprog_untrusted(bpf_core_cast(0, struct task_struct));
+ /* wrong type to untrusted */
+ subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env));
+ /* map value to untrusted */
+ subprog_untrusted((void *)mem);
+ /* scalar to untrusted */
+ subprog_untrusted(0);
+ /* variable offset to untrusted (map) */
+ subprog_untrusted((void *)mem + off);
+ /* variable offset to untrusted (trusted) */
+ subprog_untrusted((void *)bpf_get_current_task_btf() + off);
+ return 0;
+}
+
+__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted)
+{
+ return subprog_trusted_task_nullable(task);
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')")
+int untrusted_to_trusted(void *ctx)
+{
+ return subprog_untrusted2(bpf_get_current_task_btf());
+}
+
+__weak int subprog_void_untrusted(void *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+__weak int subprog_char_untrusted(char *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.")
+__msg("Validating subprog_void_untrusted() func#1...")
+__msg(": R1=rdonly_untrusted_mem(sz=0)")
+int trusted_to_untrusted_mem(void *ctx)
+{
+ return subprog_void_untrusted(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted_mem(void *ctx)
+{
+ /* untrusted to untrusted mem */
+ subprog_void_untrusted(bpf_core_cast(0, struct task_struct));
+ /* map value to untrusted mem */
+ subprog_void_untrusted(mem);
+ /* scalar to untrusted mem */
+ subprog_void_untrusted(0);
+ /* variable offset to untrusted mem (map) */
+ subprog_void_untrusted((void *)mem + off);
+ /* variable offset to untrusted mem (trusted) */
+ subprog_void_untrusted(bpf_get_current_task_btf() + off);
+ /* variable offset to untrusted char/enum (map) */
+ subprog_char_untrusted(mem + off);
+ subprog_enum_untrusted((void *)mem + off);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
index 7d088ba99ea5..16b761e510f0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void)
: __clobber_all);
}
+SEC("socket")
+__description("map_ptr is never null")
+__success
+__naked void map_ptr_is_never_null(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = %[map_in_map] ll; \
+ if r1 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner")
+__success
+__naked void map_ptr_is_never_null_inner(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner spill fill")
+__success
+__naked void map_ptr_is_never_null_inner_spill_fill(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: *(u64 *)(r10 -16) = r0; \
+ r1 = *(u64 *)(r10 -16); \
+ if r1 == 0 goto l1_%=; \
+ exit; \
+l1_%=: r10 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 64 * 1024);
+ });
+} rb_in_map SEC(".maps");
+
+struct rb_ctx {
+ void *rb;
+ struct bpf_dynptr dptr;
+};
+
+static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz)
+{
+ struct rb_ctx rb_ctx = {};
+ void *rb;
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u32 rb_slot = cpu & 1;
+
+ rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot);
+ if (!rb)
+ return rb_ctx;
+
+ rb_ctx.rb = rb;
+ bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr);
+
+ return rb_ctx;
+}
+
+static __noinline void __rb_event_submit(struct rb_ctx *ctx)
+{
+ if (!ctx->rb)
+ return;
+
+ /* If the verifier (incorrectly) concludes that ctx->rb can be
+ * NULL at this point, we'll get "BPF_EXIT instruction in main
+ * prog would lead to reference leak" error
+ */
+ bpf_ringbuf_submit_dynptr(&ctx->dptr, 0);
+}
+
+SEC("socket")
+int map_ptr_is_never_null_rb(void *ctx)
+{
+ struct rb_ctx event_ctx = __rb_event_reserve(256);
+ __rb_event_submit(&event_ctx);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index 994bbc346d25..a4d8814eb5ed 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -245,7 +245,13 @@ l0_%=: \
SEC("socket")
__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
__success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
__naked void mov64sx_s32_varoff_2(void)
{
asm volatile (" \
@@ -267,7 +273,13 @@ l0_%=: \
SEC("socket")
__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
__success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
__naked void mov64sx_s32_varoff_3(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 9fe5d255ee37..73fee2aec698 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -231,4 +231,74 @@ __naked void bpf_cond_op_not_r10(void)
::: __clobber_all);
}
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0_w=1")
+__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_2(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_3(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0_w=1")
+__msg("1: (87) r0 = -r0 ; R0_w=-1")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_4(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_5(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index fc91b414364e..1ecd34ebde19 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -8,7 +8,7 @@
/* From include/linux/filter.h */
#define MAX_BPF_STACK 512
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
struct elem {
struct bpf_timer t;
@@ -30,6 +30,18 @@ __jited(" movabsq $0x{{.*}}, %r9")
__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x100(%r9)")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_single_prog(void)
{
asm volatile (" \
@@ -45,6 +57,9 @@ __description("No private stack")
__success
__arch_x86_64
__jited(" subq $0x8, %rsp")
+__arch_arm64
+__jited(" mov x25, sp")
+__jited(" sub sp, sp, #0x10")
__naked void no_private_stack_nested(void)
{
asm volatile (" \
@@ -81,6 +96,19 @@ __jited(" pushq %r9")
__jited(" callq 0x{{.*}}")
__jited(" popq %r9")
__jited(" xorl %eax, %eax")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl {{.*}}")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_nested_1(void)
{
asm volatile (" \
@@ -131,6 +159,24 @@ __jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited("func #1")
+__jited("...")
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x7, #0x0")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_callback(void)
{
asm volatile (" \
@@ -154,6 +200,28 @@ __arch_x86_64
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x29, x30, [sp, #-0x10]!")
+__jited(" mov x29, sp")
+__jited(" stp xzr, x26, [sp, #-0x10]!")
+__jited(" mov x26, sp")
+__jited(" stp x19, x20, [sp, #-0x10]!")
+__jited(" stp x21, x22, [sp, #-0x10]!")
+__jited(" stp x23, x24, [sp, #-0x10]!")
+__jited(" stp x25, x26, [sp, #-0x10]!")
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" mov x0, #0x0")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_main_prog(void)
{
asm volatile (" \
@@ -179,6 +247,19 @@ __jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_sub_prog(void)
{
asm volatile (" \
@@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested")
__success __retval(0)
__arch_x86_64
__jited(" movabsq $0x{{.*}}, %r9")
+__arch_arm64
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
int private_stack_async_callback_1(void)
{
struct bpf_timer *arr_timer;
@@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting")
__success __retval(0)
__arch_x86_64
__jited(" subq $0x100, %rsp")
+__arch_arm64
+__jited(" sub sp, sp, #0x100")
int private_stack_async_callback_2(void)
{
struct bpf_timer *arr_timer;
diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
index 683a882b3e6d..910365201f68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
@@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index));
extern void bpf_key_put(struct bpf_key *key) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
/* BTF FUNC records are not generated for kfuncs referenced
* from inline assembly. These records are necessary for
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
new file mode 100644
index 000000000000..b4acce60fb9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__description("invalid map type for tail call")
+__failure __msg("expected prog array map for tail call")
+__failure_unpriv
+__naked void invalid_map_for_tail_call(void)
+{
+ asm volatile (" \
+ r2 = %[map_array] ll; \
+ r3 = 0; \
+ call %[bpf_tail_call]; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index a4a5e2071604..28b4f7035ceb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -572,8 +572,14 @@ l0_%=: exit; \
SEC("socket")
__description("alu32: mov u32 const")
-__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 == 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void alu32_mov_u32_const(void)
{
asm volatile (" \
@@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void)
SEC("socket")
__description("unpriv: cmp map pointer with zero")
-__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__success __success_unpriv
__retval(0)
__naked void cmp_map_pointer_with_zero(void)
{
asm volatile (" \
- r1 = 0; \
r1 = %[map_hash_8b] ll; \
if r1 == 0 goto l0_%=; \
l0_%=: r0 = 0; \
@@ -635,6 +640,22 @@ l0_%=: r0 = 0; \
}
SEC("socket")
+__description("unpriv: cmp map pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited")
+__retval(0)
+__naked void cmp_map_pointer_with_const(void)
+{
+ asm volatile (" \
+ r1 = %[map_hash_8b] ll; \
+ if r1 == 0x0000beef goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("unpriv: write into frame pointer")
__failure __msg("frame pointer is read only")
__failure_unpriv
@@ -723,4 +744,210 @@ l0_%=: r0 = 0; \
" ::: __clobber_all);
}
+SEC("socket")
+__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr")
+__success __success_unpriv __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 != 0x1 goto pc+2")
+/* This nospec prevents the exploit because it forces the mispredicted (not
+ * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer.
+ * This causes the CPU to realize that `r6 = r9` should have never executed. It
+ * ensures that r6 always contains a readable stack slot ptr when the insn after
+ * the nospec executes.
+ */
+__xlated_unpriv("nospec")
+__xlated_unpriv("r9 = *(u8 *)(r6 +0)")
+#endif
+__naked void unpriv_spec_v1_type_confusion(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* r0: pointer to a map array entry */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ /* r1, r2: prepared call args */ \
+ r6 = r10; \
+ r6 += -8; \
+ /* r6: pointer to readable stack slot */ \
+ r9 = 0xffffc900; \
+ r9 <<= 32; \
+ /* r9: scalar controlled by attacker */ \
+ r0 = *(u64 *)(r0 + 0); /* cache miss */ \
+ if r0 != 0x0 goto l0_%=; \
+ r6 = r9; \
+l0_%=: if r0 != 0x1 goto l1_%=; \
+ r9 = *(u8 *)(r6 + 0); \
+l1_%=: /* leak r9 */ \
+ r9 &= 1; \
+ r9 <<= 9; \
+ *(u64*)(r10 - 8) = r9; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* leak secret into is_cached(map[0|512]): */ \
+ r0 = *(u64 *)(r0 + 0); \
+l2_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V4
+__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */
+__xlated_unpriv("*(u64 *)(r10 -8) = r1")
+__xlated_unpriv("nospec")
+#endif
+__naked void unpriv_ldimm64_spectre_v4(void)
+{
+ asm volatile (" \
+ r1 = 0x2020200005642020 ll; \
+ *(u64 *)(r10 -8) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+/* starts with r0 == r8 == r9 == 0 */
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("if r9 == 0x0 goto pc+4")
+__xlated_unpriv("r2 = r0")
+/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64)
+ * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The
+ * test therefore ensures the Spectre-v4--induced nospec does not prevent the
+ * Spectre-v1--induced speculative path from being fully analyzed.
+ */
+__xlated_unpriv("nospec") /* Spectre v1 */
+__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */
+__xlated_unpriv("nospec") /* Spectre v4 */
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r8 = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4_simple(void)
+{
+ asm volatile (" \
+ r8 = 0; \
+ r9 = 0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+4")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */
+__xlated_unpriv("goto pc-1") /* second part of ldimm64 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void)
+{
+ asm volatile (" \
+ r8 = 0; \
+ r9 = 0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+ r1 = 0x2020200005642020 ll; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
index 5ba6e53571c8..af7938ce56cb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -231,6 +231,10 @@ __retval(1)
__naked void ptr_unknown_vs_unknown_lt(void)
{
asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
r0 = *(u32*)(r1 + %[__sk_buff_len]); \
r1 = 0; \
*(u64*)(r10 - 8) = r1; \
@@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \
r4 = *(u8*)(r0 + 0); \
if r4 == 1 goto l3_%=; \
r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x3; \
goto l4_%=; \
l3_%=: r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x7; \
l4_%=: r1 += r0; \
r0 = *(u8*)(r1 + 0); \
@@ -259,7 +263,8 @@ l2_%=: r0 = 1; \
: __imm(bpf_map_lookup_elem),
__imm_addr(map_array_48b),
__imm_addr(map_hash_16b),
- __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -271,6 +276,10 @@ __retval(1)
__naked void ptr_unknown_vs_unknown_gt(void)
{
asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
r0 = *(u32*)(r1 + %[__sk_buff_len]); \
r1 = 0; \
*(u64*)(r10 - 8) = r1; \
@@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \
r4 = *(u8*)(r0 + 0); \
if r4 == 1 goto l3_%=; \
r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x7; \
goto l4_%=; \
l3_%=: r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x3; \
l4_%=: r1 += r0; \
r0 = *(u8*)(r1 + 0); \
@@ -299,7 +308,8 @@ l2_%=: r0 = 1; \
: __imm(bpf_map_lookup_elem),
__imm_addr(map_array_48b),
__imm_addr(map_hash_16b),
- __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -398,7 +408,8 @@ l2_%=: r0 = 1; \
SEC("socket")
__description("map access: mixing value pointer and scalar, 1")
-__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
__retval(0)
__naked void value_pointer_and_scalar_1(void)
{
@@ -433,6 +444,7 @@ l2_%=: /* common instruction */ \
l3_%=: /* branch B */ \
r0 = 0x13371337; \
/* verifier follows fall-through */ \
+ /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\
if r2 != 0x100000 goto l4_%=; \
r0 = 0; \
exit; \
@@ -450,7 +462,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \
SEC("socket")
__description("map access: mixing value pointer and scalar, 2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
__retval(0)
__naked void value_pointer_and_scalar_2(void)
{
@@ -492,6 +505,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \
* prevent dead code sanitization, rejected \
* via branch B however \
*/ \
+ /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\
r0 = *(u8*)(r0 + 0); \
r0 = 0; \
exit; \
@@ -1296,9 +1310,13 @@ l0_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr -= unknown scalar, 2")
-__success __failure_unpriv
-__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__success __success_unpriv
__retval(1)
+#ifdef SPEC_V1
+__xlated_unpriv("r1 &= 7")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */
+__xlated_unpriv("r0 -= r1")
+#endif
__naked void value_ptr_unknown_scalar_2_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
new file mode 100755
index 000000000000..515b1df0501e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_map.sh
@@ -0,0 +1,398 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME="bpftool_map"
+BPF_FILE="security_bpf_map.bpf.o"
+BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
+PROTECTED_MAP_NAME="prot_map"
+NOT_PROTECTED_MAP_NAME="not_prot_map"
+BPF_FS_TMP_PARENT="/tmp"
+BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
+# bpftool will mount bpf file system under BPF_DIR if it is not mounted
+# under BPF_FS_PARENT.
+BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
+SCRIPT_DIR=$(dirname $(realpath "$0"))
+BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
+BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
+BPFTOOL_PATH="bpftool"
+# Assume the script is located under tools/testing/selftests/bpf/
+KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
+
+_cleanup()
+{
+ set +eu
+
+ # If BPF_DIR is a mount point this will not remove the mount point itself.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
+
+ # Unmount if BPF filesystem was temporarily created.
+ if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
+ # A loop and recursive unmount are required as bpftool might
+ # create multiple mounts. For example, a bind mount of the directory
+ # to itself. The bind mount is created to change mount propagation
+ # flags on an actual mount point.
+ max_attempts=3
+ attempt=0
+ while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
+ umount -R "$BPF_DIR" 2>/dev/null
+ attempt=$((attempt+1))
+ done
+
+ # The directory still exists. Remove it now.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
+ fi
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+check_root_privileges() {
+ if [ $(id -u) -ne 0 ]; then
+ echo "Need root privileges"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify bpftool path.
+# Parameters:
+# $1: bpftool path
+verify_bpftool_path() {
+ local bpftool_path="$1"
+ if ! "$bpftool_path" version > /dev/null 2>&1; then
+ echo "Could not run test without bpftool"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify BTF support.
+# The test requires BTF support for fmod_ret programs.
+verify_btf_support() {
+ if [ ! -f /sys/kernel/btf/vmlinux ]; then
+ echo "Could not run test without BTF support"
+ exit $ksft_skip
+ fi
+}
+
+# Function to initialize map entries with keys [0..2] and values set to 0.
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+initialize_map_entries() {
+ local map_name="$1"
+ local bpftool_path="$2"
+
+ for key in 0 1 2; do
+ "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
+ done
+}
+
+# Test read access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+access_for_read() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+
+ # Test read access to the map.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Read access to $key in $map_name failed"
+ exit 1
+ fi
+
+ # Test read access to map's BTF data.
+ if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
+ echo " Read access to $map_name for BTF data failed"
+ exit 1
+ fi
+}
+
+# Test write access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_write() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test entry deletion for the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_deletion() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ # Test deletion by key for the map.
+ # Before deleting, check the key exists.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Key $key does not exist in $map_name"
+ exit 1
+ fi
+
+ # Delete by key.
+ if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Deletion for $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Deletion for $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+
+ # After deleting, check the entry existence according to the expected status.
+ if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Key $key for $map_name was not deleted but should have been deleted"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "false" ]; then
+ echo "Key $key for $map_name was deleted but should have not been deleted"
+ exit 1
+ fi
+ fi
+
+ # Test creation of map's deleted entry, if deletion was successful.
+ # Otherwise, the entry exists.
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test map elements iterator.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: BPF_DIR
+# $5: bpf iterator object file path
+iterate_map_elem() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local bpf_dir="$4"
+ local bpf_file="$5"
+ local pin_path="$bpf_dir/map_iterator"
+
+ "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
+ if [ ! -f "$pin_path" ]; then
+ echo " Failed to pin iterator to $pin_path"
+ exit 1
+ fi
+
+ cat "$pin_path" 1>/dev/null
+ rm "$pin_path" 2>/dev/null
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key for rw
+# $5: key to delete
+# $6: Whether write should succeed (true/false)
+# $7: BPF_DIR
+# $8: bpf iterator object file path
+access_map() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key_for_rw="$4"
+ local key_to_del="$5"
+ local write_should_succeed="$6"
+ local bpf_dir="$7"
+ local bpf_iter_file_path="$8"
+
+ access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
+ access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
+ "$write_should_succeed"
+ access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
+ "$write_should_succeed"
+ iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
+ "$bpf_iter_file_path"
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+# $3: BPF_DIR
+# $4: Whether write should succeed (true/false)
+# $5: bpf iterator object file path
+test_map_access() {
+ local map_name="$1"
+ local bpftool_path="$2"
+ local bpf_dir="$3"
+ local pin_path="$bpf_dir/${map_name}_pinned"
+ local write_should_succeed="$4"
+ local bpf_iter_file_path="$5"
+
+ # Test access to the map by name.
+ access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+
+ # Pin the map to the BPF filesystem
+ "$bpftool_path" map pin name "$map_name" "$pin_path"
+ if [ ! -e "$pin_path" ]; then
+ echo " Failed to pin $map_name"
+ exit 1
+ fi
+
+ # Test access to the pinned map.
+ access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+}
+
+# Function to test map creation and map-of-maps
+# Parameters:
+# $1: bpftool path
+# $2: BPF_DIR
+test_map_creation_and_map_of_maps() {
+ local bpftool_path="$1"
+ local bpf_dir="$2"
+ local outer_map_name="outer_map_tt"
+ local inner_map_name="inner_map_tt"
+
+ "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
+ value 4 entries 4 name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$inner_map_name" ]; then
+ echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
+ key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$outer_map_name" ]; then
+ echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ # Add entries to the outer map by name and by pinned path.
+ "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
+ value pinned "$bpf_dir/$inner_map_name"
+ "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
+ name "$inner_map_name"
+
+ # The outer map should be full by now.
+ # The following map update command is expected to fail.
+ if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
+ "$inner_map_name" 2>/dev/null; then
+ echo " Update for $outer_map_name succeeded but should have failed"
+ exit 1
+ fi
+}
+
+# Function to test map access with the btf list command
+# Parameters:
+# $1: bpftool path
+test_map_access_with_btf_list() {
+ local bpftool_path="$1"
+
+ # The btf list command iterates over maps for
+ # loaded BPF programs.
+ if ! "$bpftool_path" btf list 1>/dev/null; then
+ echo " Failed to access btf data"
+ exit 1
+ fi
+}
+
+set -eu
+
+trap cleanup_skip EXIT
+
+check_root_privileges
+
+verify_bpftool_path "$BPFTOOL_PATH"
+
+verify_btf_support
+
+trap cleanup EXIT
+
+# Load and attach the BPF programs to control maps access.
+"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
+
+initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+
+# Activate the map protection mechanism. Protection status is controlled
+# by a value stored in the prot_status_map at index 0.
+"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
+
+# Test protected map (write should fail).
+test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
+ "$BPF_ITER_FILE_PATH"
+
+# Test not protected map (write should succeed).
+test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
+ "$BPF_ITER_FILE_PATH"
+
+test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
+
+test_map_access_with_btf_list "$BPFTOOL_PATH"
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 9551d8d5f8f9..78423cf89e01 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -40,7 +40,7 @@
#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
/* Warning: duplicated in bpf_misc.h */
-#define POINTER_VALUE 0xcafe4all
+#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -318,20 +318,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name)
static int parse_retval(const char *str, int *val, const char *name)
{
- struct {
- char *name;
- int val;
- } named_values[] = {
- { "INT_MIN" , INT_MIN },
- { "POINTER_VALUE", POINTER_VALUE },
- { "TEST_DATA_LEN", TEST_DATA_LEN },
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(named_values); ++i) {
- if (strcmp(str, named_values[i].name) != 0)
- continue;
- *val = named_values[i].val;
+ /*
+ * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a
+ * single int and cannot be parsed with strtol, so we handle it
+ * separately here. In addition, it expands to different expressions in
+ * different compilers so we use a prefixed _INT_MIN instead.
+ */
+ if (strcmp(str, "_INT_MIN") == 0) {
+ *val = INT_MIN;
return 0;
}
@@ -1103,9 +1097,9 @@ void run_subtest(struct test_loader *tester,
}
}
- do_prog_test_run(bpf_program__fd(tprog), &retval,
- bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
- if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+ err = do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
+ if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 986ce32b113a..3fae9ce46ca9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -535,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data)
static void test_queuemap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
int fd, i;
/* Fill test values to be used */
@@ -591,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data)
static void test_stackmap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
int fd, i;
/* Fill test values to be used */
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 870694f2a359..df2222a1806f 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -460,6 +460,34 @@ static inline void *u64_to_ptr(__u64 ptr)
return (void *) (unsigned long) ptr;
}
+static inline __u32 id_from_prog_fd(int fd)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "id_from_prog_fd"))
+ return 0;
+
+ ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+ return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ ASSERT_NEQ(link_info.id, 0, "link_info.id");
+ return link_info.id;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
index 220f6a963813..f997d7ec8fd0 100644
--- a/tools/testing/selftests/bpf/unpriv_helpers.c
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -1,15 +1,76 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <sys/utsname.h>
#include <unistd.h>
#include <fcntl.h>
+#include <zlib.h>
#include "unpriv_helpers.h"
-static bool get_mitigations_off(void)
+static gzFile open_config(void)
+{
+ struct utsname uts;
+ char buf[PATH_MAX];
+ gzFile config;
+
+ if (uname(&uts)) {
+ perror("uname");
+ goto config_gz;
+ }
+
+ snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release);
+ config = gzopen(buf, "rb");
+ if (config)
+ return config;
+ fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno));
+
+config_gz:
+ config = gzopen("/proc/config.gz", "rb");
+ if (!config)
+ perror("gzopen /proc/config.gz");
+ return config;
+}
+
+static int config_contains(const char *pat)
+{
+ const char *msg;
+ char buf[1024];
+ gzFile config;
+ int n, err;
+
+ config = open_config();
+ if (!config)
+ return -1;
+
+ for (;;) {
+ if (!gzgets(config, buf, sizeof(buf))) {
+ msg = gzerror(config, &err);
+ if (err == Z_ERRNO)
+ perror("gzgets /proc/config.gz");
+ else if (err != Z_OK)
+ fprintf(stderr, "gzgets /proc/config.gz: %s", msg);
+ gzclose(config);
+ return -1;
+ }
+ n = strlen(buf);
+ if (buf[n - 1] == '\n')
+ buf[n - 1] = 0;
+ if (strcmp(buf, pat) == 0) {
+ gzclose(config);
+ return 1;
+ }
+ }
+ gzclose(config);
+ return 0;
+}
+
+static bool cmdline_contains(const char *pat)
{
char cmdline[4096], *c;
int fd, ret = false;
@@ -27,7 +88,7 @@ static bool get_mitigations_off(void)
cmdline[sizeof(cmdline) - 1] = '\0';
for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
- if (strncmp(c, "mitigations=off", strlen(c)))
+ if (strncmp(c, pat, strlen(c)))
continue;
ret = true;
break;
@@ -37,8 +98,21 @@ out:
return ret;
}
+static int get_mitigations_off(void)
+{
+ int enabled_in_config;
+
+ if (cmdline_contains("mitigations=off"))
+ return 1;
+ enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y");
+ if (enabled_in_config < 0)
+ return -1;
+ return !enabled_in_config;
+}
+
bool get_unpriv_disabled(void)
{
+ int mitigations_off;
bool disabled;
char buf[2];
FILE *fd;
@@ -52,5 +126,19 @@ bool get_unpriv_disabled(void)
disabled = true;
}
- return disabled ? true : get_mitigations_off();
+ if (disabled)
+ return true;
+
+ /*
+ * Some unpriv tests rely on spectre mitigations being on.
+ * If mitigations are off or status can't be determined
+ * assume that unpriv tests are disabled.
+ */
+ mitigations_off = get_mitigations_off();
+ if (mitigations_off < 0) {
+ fprintf(stderr,
+ "Can't determine if mitigations are enabled, disabling unpriv tests.");
+ return true;
+ }
+ return mitigations_off;
}
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 18596ae0b0c1..f3492efc8834 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -2409,3 +2409,27 @@
.errstr_unpriv = "",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
+{
+ "calls: several args with ref_obj_id",
+ .insns = {
+ /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer.
+ * With a smaller size, the verifier would reject the call to
+ * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the
+ * ref_obj_id error.
+ */
+ BPF_MOV64_IMM(BPF_REG_2, 20),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* if r0 == 0 goto <exit> */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 2 },
+ .result = REJECT,
+ .errstr = "more than one arg with ref_obj_id",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index ee454327e5c6..77207b498c6f 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -2,14 +2,13 @@
"dead code: start",
.insns = {
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 7),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 43776f6f92f4..91d83e9cb148 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -84,11 +84,10 @@
BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -149,11 +148,10 @@
BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -214,11 +212,10 @@
BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -283,11 +280,10 @@
BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -354,11 +350,10 @@
BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -425,11 +420,10 @@
BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -496,11 +490,10 @@
BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -567,11 +560,10 @@
BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -638,11 +630,10 @@
BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -709,11 +700,10 @@
BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -780,11 +770,10 @@
BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 11fc68da735e..e901eefd774a 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -78,12 +78,11 @@
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.retval = 1,
.result = ACCEPT,
},
@@ -136,13 +135,12 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -154,16 +152,16 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3),
BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index b2bb20b00952..d532dd82a3a8 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -23,6 +23,7 @@
#include <float.h>
#include <math.h>
#include <limits.h>
+#include <assert.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -49,6 +50,7 @@ enum stat_id {
STACK,
PROG_TYPE,
ATTACH_TYPE,
+ MEMORY_PEAK,
FILE_NAME,
PROG_NAME,
@@ -155,13 +157,27 @@ struct filter {
bool abs;
};
-struct var_preset {
- char *name;
+struct rvalue {
enum { INTEGRAL, ENUMERATOR } type;
union {
long long ivalue;
char *svalue;
};
+};
+
+struct field_access {
+ enum { FIELD_NAME, ARRAY_INDEX } type;
+ union {
+ char *name;
+ struct rvalue index;
+ };
+};
+
+struct var_preset {
+ struct field_access *atoms;
+ int atom_count;
+ char *full_name;
+ struct rvalue value;
bool applied;
};
@@ -208,6 +224,9 @@ static struct env {
int top_src_lines;
struct var_preset *presets;
int npresets;
+ char orig_cgroup[PATH_MAX];
+ char stat_cgroup[PATH_MAX];
+ int memory_peak_fd;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -219,6 +238,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
return vfprintf(stderr, format, args);
}
+#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+__attribute__((format(printf, 3, 4)))
+static int log_errno_aux(const char *file, int line, const char *fmt, ...)
+{
+ int err = -errno;
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s:%d: ", file, line);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, " failed with error '%s'.\n", strerror(errno));
+ va_end(ap);
+ return err;
+}
+
#ifndef VERISTAT_VERSION
#define VERISTAT_VERSION "<kernel>"
#endif
@@ -344,6 +379,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
fprintf(stderr, "invalid top N specifier: %s\n", arg);
argp_usage(state);
}
+ break;
case 'C':
env.comparison_mode = true;
break;
@@ -734,13 +770,13 @@ cleanup:
}
static const struct stat_specs default_csv_output_spec = {
- .spec_cnt = 14,
+ .spec_cnt = 15,
.ids = {
FILE_NAME, PROG_NAME, VERDICT, DURATION,
TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
- STACK,
+ STACK, MEMORY_PEAK,
},
};
@@ -781,6 +817,7 @@ static struct stat_def {
[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
[PROG_TYPE] = { "Program type", {"prog_type"}, },
[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
+ [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, },
};
static bool parse_stat_id_var(const char *name, size_t len, int *id,
@@ -854,6 +891,18 @@ static bool is_desc_sym(char c)
return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
}
+static char *rtrim(char *str)
+{
+ int i;
+
+ for (i = strlen(str) - 1; i > 0; --i) {
+ if (!isspace(str[i]))
+ break;
+ str[i] = '\0';
+ }
+ return str;
+}
+
static int parse_stat(const char *stat_name, struct stat_specs *specs)
{
int id;
@@ -1182,6 +1231,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
case BPF_MAP_TYPE_TASK_STORAGE:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
break;
case BPF_MAP_TYPE_STRUCT_OPS:
mask_unrelated_struct_ops_progs(obj, map, prog);
@@ -1278,16 +1328,243 @@ static int max_verifier_log_size(void)
return log_size;
}
+static bool output_stat_enabled(int id)
+{
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++)
+ if (env.output_spec.ids[i] == id)
+ return true;
+ return false;
+}
+
+__attribute__((format(printf, 2, 3)))
+static int write_one_line(const char *file, const char *fmt, ...)
+{
+ int err, saved_errno;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "w");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ errno = 0;
+ err = vfprintf(f, fmt, ap);
+ saved_errno = errno;
+ va_end(ap);
+ fclose(f);
+ errno = saved_errno;
+ return err < 0 ? -1 : 0;
+}
+
+__attribute__((format(scanf, 3, 4)))
+static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...)
+{
+ int res = 0, saved_errno = 0;
+ char *line = NULL;
+ size_t line_len;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "r");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ while (getline(&line, &line_len, f) > 0) {
+ res = vsscanf(line, fmt, ap);
+ if (res == fields_expected)
+ goto out;
+ }
+ if (ferror(f)) {
+ saved_errno = errno;
+ res = -1;
+ }
+
+out:
+ va_end(ap);
+ free(line);
+ fclose(f);
+ errno = saved_errno;
+ return res;
+}
+
+static void destroy_stat_cgroup(void)
+{
+ char buf[PATH_MAX];
+ int err;
+
+ close(env.memory_peak_fd);
+
+ if (env.orig_cgroup[0]) {
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0)
+ log_errno("moving self to original cgroup %s\n", env.orig_cgroup);
+ }
+
+ if (env.stat_cgroup[0]) {
+ err = rmdir(env.stat_cgroup);
+ if (err < 0)
+ log_errno("deletion of cgroup %s", env.stat_cgroup);
+ }
+
+ env.memory_peak_fd = -1;
+ env.orig_cgroup[0] = 0;
+ env.stat_cgroup[0] = 0;
+}
+
+/*
+ * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>,
+ * moves current process to this cgroup.
+ */
+static void create_stat_cgroup(void)
+{
+ char cgroup_fs_mount[4096];
+ char buf[4096];
+ int err;
+
+ env.memory_peak_fd = -1;
+
+ if (!output_stat_enabled(MEMORY_PEAK))
+ return;
+
+ err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s",
+ cgroup_fs_mount, buf);
+ if (err != 2) {
+ if (err < 0)
+ log_errno("reading /proc/self/mounts");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't find cgroupfs v2 mount point.\n");
+ goto err_out;
+ }
+
+ /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */
+ err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf);
+ if (err != 1) {
+ if (err < 0)
+ log_errno("reading /proc/self/cgroup");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't infer veristat process cgroup.");
+ goto err_out;
+ }
+
+ snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf);
+
+ snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid());
+ err = mkdir(buf, 0777);
+ if (err < 0) {
+ log_errno("creation of cgroup %s", buf);
+ goto err_out;
+ }
+ strcpy(env.stat_cgroup, buf);
+
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0) {
+ log_errno("entering cgroup %s", buf);
+ goto err_out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup);
+ env.memory_peak_fd = open(buf, O_RDWR | O_APPEND);
+ if (env.memory_peak_fd < 0) {
+ log_errno("opening %s", buf);
+ goto err_out;
+ }
+
+ return;
+
+err_out:
+ if (!env.quiet)
+ fprintf(stderr, "Memory usage metric unavailable.\n");
+ destroy_stat_cgroup();
+}
+
+/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */
+static long cgroup_memory_peak(void)
+{
+ long err, memory_peak;
+ char buf[32];
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0);
+ if (err <= 0) {
+ log_errno("pread(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+
+ buf[err] = 0;
+ errno = 0;
+ memory_peak = strtoll(buf, NULL, 10);
+ if (errno) {
+ log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf);
+ return -1;
+ }
+
+ return memory_peak;
+}
+
+static int reset_stat_cgroup(void)
+{
+ char buf[] = "r\n";
+ int err;
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0);
+ if (err <= 0) {
+ log_errno("pwrite(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+ return 0;
+}
+
+static int parse_rvalue(const char *val, struct rvalue *rvalue)
+{
+ long long value;
+ char *val_end;
+
+ if (val[0] == '-' || isdigit(val[0])) {
+ /* must be a number */
+ errno = 0;
+ value = strtoll(val, &val_end, 0);
+ if (errno == ERANGE) {
+ errno = 0;
+ value = strtoull(val, &val_end, 0);
+ }
+ if (errno || *val_end != '\0') {
+ fprintf(stderr, "Failed to parse value '%s'\n", val);
+ return -EINVAL;
+ }
+ rvalue->ivalue = value;
+ rvalue->type = INTEGRAL;
+ } else {
+ /* if not a number, consider it enum value */
+ rvalue->svalue = strdup(val);
+ if (!rvalue->svalue)
+ return -ENOMEM;
+ rvalue->type = ENUMERATOR;
+ }
+ return 0;
+}
+
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
{
const char *base_filename = basename(strdupa(filename));
const char *prog_name = bpf_program__name(prog);
+ long mem_peak_a, mem_peak_b, mem_peak = -1;
char *buf;
int buf_sz, log_level;
struct verif_stats *stats;
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
- int err = 0;
+ int err = 0, cgroup_err;
void *tmp;
int fd;
@@ -1332,7 +1609,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (env.force_reg_invariants)
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
- err = bpf_object__load(obj);
+ err = bpf_object__prepare(obj);
+ if (!err) {
+ cgroup_err = reset_stat_cgroup();
+ mem_peak_a = cgroup_memory_peak();
+ err = bpf_object__load(obj);
+ mem_peak_b = cgroup_memory_peak();
+ if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
+ mem_peak = mem_peak_b - mem_peak_a;
+ }
env.progs_processed++;
stats->file_name = strdup(base_filename);
@@ -1341,6 +1626,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
stats->stats[SIZE] = bpf_program__insn_cnt(prog);
stats->stats[PROG_TYPE] = bpf_program__type(prog);
stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
+ stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024);
memset(&info, 0, info_len);
fd = bpf_program__fd(prog);
@@ -1361,15 +1647,74 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
free(buf);
return 0;
-};
+}
+
+static int append_preset_atom(struct var_preset *preset, char *value, bool is_index)
+{
+ struct field_access *tmp;
+ int i = preset->atom_count;
+ int err;
+
+ tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms));
+ if (!tmp)
+ return -ENOMEM;
+
+ preset->atoms = tmp;
+ preset->atom_count++;
+
+ if (is_index) {
+ preset->atoms[i].type = ARRAY_INDEX;
+ err = parse_rvalue(value, &preset->atoms[i].index);
+ if (err)
+ return err;
+ } else {
+ preset->atoms[i].type = FIELD_NAME;
+ preset->atoms[i].name = strdup(value);
+ if (!preset->atoms[i].name)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int parse_var_atoms(const char *full_var, struct var_preset *preset)
+{
+ char expr[256], var[256], *name, *saveptr;
+ int n, len, off, err;
+
+ snprintf(expr, sizeof(expr), "%s", full_var);
+ preset->atom_count = 0;
+ while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) {
+ len = strlen(name);
+ /* parse variable name */
+ if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) {
+ fprintf(stderr, "Can't parse %s\n", name);
+ return -EINVAL;
+ }
+ err = append_preset_atom(preset, var, false);
+ if (err)
+ return err;
+
+ /* parse optional array indexes */
+ while (off < len) {
+ if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) {
+ fprintf(stderr, "Can't parse %s as index\n", name + off);
+ return -EINVAL;
+ }
+ err = append_preset_atom(preset, var, true);
+ if (err)
+ return err;
+ off += n;
+ }
+ }
+ return 0;
+}
static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr)
{
void *tmp;
struct var_preset *cur;
- char var[256], val[256], *val_end;
- long long value;
- int n;
+ char var[256], val[256];
+ int n, err;
tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets));
if (!tmp)
@@ -1379,37 +1724,25 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char *
memset(cur, 0, sizeof(*cur));
(*cnt)++;
- if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
+ if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
fprintf(stderr, "Failed to parse expression '%s'\n", expr);
return -EINVAL;
}
+ /* Remove trailing spaces from var, as scanf may add those */
+ rtrim(var);
- if (val[0] == '-' || isdigit(val[0])) {
- /* must be a number */
- errno = 0;
- value = strtoll(val, &val_end, 0);
- if (errno == ERANGE) {
- errno = 0;
- value = strtoull(val, &val_end, 0);
- }
- if (errno || *val_end != '\0') {
- fprintf(stderr, "Failed to parse value '%s'\n", val);
- return -EINVAL;
- }
- cur->ivalue = value;
- cur->type = INTEGRAL;
- } else {
- /* if not a number, consider it enum value */
- cur->svalue = strdup(val);
- if (!cur->svalue)
- return -ENOMEM;
- cur->type = ENUMERATOR;
- }
+ err = parse_rvalue(val, &cur->value);
+ if (err)
+ return err;
- cur->name = strdup(var);
- if (!cur->name)
+ cur->full_name = strdup(var);
+ if (!cur->full_name)
return -ENOMEM;
+ err = parse_var_atoms(var, cur);
+ if (err)
+ return err;
+
return 0;
}
@@ -1486,22 +1819,96 @@ static bool is_preset_supported(const struct btf_type *t)
return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
}
-const int btf_find_member(const struct btf *btf,
- const struct btf_type *parent_type,
- __u32 parent_offset,
- const char *member_name,
- int *member_tid,
- __u32 *member_offset)
+static int find_enum_value(const struct btf *btf, const char *name, long long *value)
+{
+ const struct btf_type *t;
+ int cnt, i;
+ long long lvalue;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_any_enum(t))
+ continue;
+
+ if (enum_value_from_name(btf, t, name, &lvalue) == 0) {
+ *value = lvalue;
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result)
+{
+ int err = 0;
+
+ switch (rvalue->type) {
+ case INTEGRAL:
+ *result = rvalue->ivalue;
+ return 0;
+ case ENUMERATOR:
+ err = find_enum_value(btf, rvalue->svalue, result);
+ if (err) {
+ fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue);
+ return err;
+ }
+ return 0;
+ default:
+ fprintf(stderr, "Unknown rvalue type\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom,
+ const char *array_name, struct btf_var_secinfo *sinfo)
+{
+ const struct btf_type *t;
+ struct btf_array *barr;
+ long long idx;
+ int err;
+
+ tid = btf__resolve_type(btf, tid);
+ t = btf__type_by_id(btf, tid);
+ if (!btf_is_array(t)) {
+ fprintf(stderr, "Array index is not expected for %s\n",
+ array_name);
+ return -EINVAL;
+ }
+ barr = btf_array(t);
+ err = resolve_rvalue(btf, &atom->index, &idx);
+ if (err)
+ return err;
+ if (idx < 0 || idx >= barr->nelems) {
+ fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n",
+ idx, barr->nelems, array_name);
+ return -EINVAL;
+ }
+ sinfo->size = btf__resolve_size(btf, barr->type);
+ sinfo->offset += sinfo->size * idx;
+ sinfo->type = btf__resolve_type(btf, barr->type);
+ return 0;
+}
+
+static int adjust_var_secinfo_member(const struct btf *btf,
+ const struct btf_type *parent_type,
+ __u32 parent_offset,
+ const char *member_name,
+ struct btf_var_secinfo *sinfo)
{
int i;
- if (!btf_is_composite(parent_type))
+ if (!btf_is_composite(parent_type)) {
+ fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name);
return -EINVAL;
+ }
for (i = 0; i < btf_vlen(parent_type); ++i) {
const struct btf_member *member;
const struct btf_type *member_type;
- int tid;
+ int tid, off;
member = btf_members(parent_type) + i;
tid = btf__resolve_type(btf, member->type);
@@ -1509,6 +1916,7 @@ const int btf_find_member(const struct btf *btf,
return -EINVAL;
member_type = btf__type_by_id(btf, tid);
+ off = parent_offset + member->offset;
if (member->name_off) {
const char *name = btf__name_by_offset(btf, member->name_off);
@@ -1518,48 +1926,62 @@ const int btf_find_member(const struct btf *btf,
name);
return -EINVAL;
}
- *member_offset = parent_offset + member->offset;
- *member_tid = tid;
+ sinfo->offset += off / 8;
+ sinfo->type = tid;
+ sinfo->size = member_type->size;
return 0;
}
} else if (btf_is_composite(member_type)) {
int err;
- err = btf_find_member(btf, member_type, parent_offset + member->offset,
- member_name, member_tid, member_offset);
+ err = adjust_var_secinfo_member(btf, member_type, off,
+ member_name, sinfo);
if (!err)
return 0;
}
}
- return -EINVAL;
+ return -ESRCH;
}
static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
- struct btf_var_secinfo *sinfo, const char *var)
+ struct btf_var_secinfo *sinfo, struct var_preset *preset)
{
- char expr[256], *saveptr;
- const struct btf_type *base_type, *member_type;
- int err, member_tid;
- char *name;
- __u32 member_offset = 0;
+ const struct btf_type *base_type;
+ const char *prev_name;
+ int err, i;
+ int tid;
- base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
- snprintf(expr, sizeof(expr), "%s", var);
- strtok_r(expr, ".", &saveptr);
+ assert(preset->atom_count > 0);
+ assert(preset->atoms[0].type == FIELD_NAME);
- while ((name = strtok_r(NULL, ".", &saveptr))) {
- err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset);
- if (err) {
- fprintf(stderr, "Could not find member %s for variable %s\n", name, var);
- return err;
+ tid = btf__resolve_type(btf, t->type);
+ base_type = btf__type_by_id(btf, tid);
+ prev_name = preset->atoms[0].name;
+
+ for (i = 1; i < preset->atom_count; ++i) {
+ struct field_access *atom = preset->atoms + i;
+
+ switch (atom->type) {
+ case ARRAY_INDEX:
+ err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo);
+ break;
+ case FIELD_NAME:
+ err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo);
+ if (err == -ESRCH)
+ fprintf(stderr, "Can't find '%s'\n", atom->name);
+ prev_name = atom->name;
+ break;
+ default:
+ fprintf(stderr, "Unknown field_access type\n");
+ return -EOPNOTSUPP;
}
- member_type = btf__type_by_id(btf, member_tid);
- sinfo->offset += member_offset / 8;
- sinfo->size = member_type->size;
- sinfo->type = member_tid;
- base_type = member_type;
+ if (err)
+ return err;
+ base_type = btf__type_by_id(btf, sinfo->type);
+ tid = sinfo->type;
}
+
return 0;
}
@@ -1569,7 +1991,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf,
{
const struct btf_type *base_type;
void *ptr;
- long long value = preset->ivalue;
+ long long value = preset->value.ivalue;
size_t size;
base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
@@ -1578,22 +2000,23 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf,
return -EINVAL;
}
if (!is_preset_supported(base_type)) {
- fprintf(stderr, "Setting value for type %s is not supported\n",
- btf__name_by_offset(btf, base_type->name_off));
+ fprintf(stderr, "Can't set %s. Only ints and enums are supported\n",
+ preset->full_name);
return -EINVAL;
}
- if (preset->type == ENUMERATOR) {
+ if (preset->value.type == ENUMERATOR) {
if (btf_is_any_enum(base_type)) {
- if (enum_value_from_name(btf, base_type, preset->svalue, &value)) {
+ if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) {
fprintf(stderr,
"Failed to find integer value for enum element %s\n",
- preset->svalue);
+ preset->value.svalue);
return -EINVAL;
}
} else {
fprintf(stderr, "Value %s is not supported for type %s\n",
- preset->svalue, btf__name_by_offset(btf, base_type->name_off));
+ preset->value.svalue,
+ btf__name_by_offset(btf, base_type->name_off));
return -EINVAL;
}
}
@@ -1660,20 +2083,16 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
for (j = 0; j < n; ++j, ++sinfo) {
const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
const char *var_name;
- int var_len;
if (!btf_is_var(var_type))
continue;
var_name = btf__name_by_offset(btf, var_type->name_off);
- var_len = strlen(var_name);
for (k = 0; k < npresets; ++k) {
struct btf_var_secinfo tmp_sinfo;
- if (strncmp(var_name, presets[k].name, var_len) != 0 ||
- (presets[k].name[var_len] != '\0' &&
- presets[k].name[var_len] != '.'))
+ if (strcmp(var_name, presets[k].atoms[0].name) != 0)
continue;
if (presets[k].applied) {
@@ -1683,7 +2102,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
}
tmp_sinfo = *sinfo;
err = adjust_var_secinfo(btf, var_type,
- &tmp_sinfo, presets[k].name);
+ &tmp_sinfo, presets + k);
if (err)
return err;
@@ -1698,7 +2117,8 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
for (i = 0; i < npresets; ++i) {
if (!presets[i].applied) {
fprintf(stderr, "Global variable preset %s has not been applied\n",
- presets[i].name);
+ presets[i].full_name);
+ err = -EINVAL;
}
presets[i].applied = false;
}
@@ -1824,6 +2244,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
case TOTAL_STATES:
case PEAK_STATES:
case MAX_STATES_PER_INSN:
+ case MEMORY_PEAK:
case MARK_READ_MAX_LEN: {
long v1 = s1->stats[id];
long v2 = s2->stats[id];
@@ -2053,6 +2474,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
case STACK:
case SIZE:
case JITED_SIZE:
+ case MEMORY_PEAK:
*val = s ? s->stats[id] : 0;
break;
default:
@@ -2139,6 +2561,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
case MARK_READ_MAX_LEN:
case SIZE:
case JITED_SIZE:
+ case MEMORY_PEAK:
case STACK: {
long val;
int err, n;
@@ -2776,7 +3199,7 @@ static void output_prog_stats(void)
static int handle_verif_mode(void)
{
- int i, err;
+ int i, err = 0;
if (env.filename_cnt == 0) {
fprintf(stderr, "Please provide path to BPF object file!\n\n");
@@ -2784,11 +3207,12 @@ static int handle_verif_mode(void)
return -EINVAL;
}
+ create_stat_cgroup();
for (i = 0; i < env.filename_cnt; i++) {
err = process_obj(env.filenames[i]);
if (err) {
fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
- return err;
+ goto out;
}
}
@@ -2796,7 +3220,9 @@ static int handle_verif_mode(void)
output_prog_stats();
- return 0;
+out:
+ destroy_stat_cgroup();
+ return err;
}
static int handle_replay_mode(void)
@@ -2826,7 +3252,7 @@ static int handle_replay_mode(void)
int main(int argc, char **argv)
{
- int err = 0, i;
+ int err = 0, i, j;
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
return 1;
@@ -2885,9 +3311,19 @@ int main(int argc, char **argv)
}
free(env.deny_filters);
for (i = 0; i < env.npresets; ++i) {
- free(env.presets[i].name);
- if (env.presets[i].type == ENUMERATOR)
- free(env.presets[i].svalue);
+ free(env.presets[i].full_name);
+ for (j = 0; j < env.presets[i].atom_count; ++j) {
+ switch (env.presets[i].atoms[j].type) {
+ case FIELD_NAME:
+ free(env.presets[i].atoms[j].name);
+ break;
+ case ARRAY_INDEX:
+ if (env.presets[i].atoms[j].index.type == ENUMERATOR)
+ free(env.presets[i].atoms[j].index.svalue);
+ break;
+ }
+ }
+ free(env.presets[i].atoms);
}
free(env.presets);
return -err;
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 79505d294c44..2f869daf8a06 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -43,6 +43,15 @@ riscv64)
BZIMAGE="arch/riscv/boot/Image"
ARCH="riscv"
;;
+ppc64el)
+ QEMU_BINARY=qemu-system-ppc64
+ QEMU_CONSOLE="hvc0"
+ # KVM could not be tested for powerpc, therefore not enabled for now.
+ HOST_FLAGS=(-machine pseries -cpu POWER9)
+ CROSS_FLAGS=(-machine pseries -cpu POWER9)
+ BZIMAGE="vmlinux"
+ ARCH="powerpc"
+ ;;
*)
echo "Unsupported architecture"
exit 1
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 0ced4026ee44..a29de0713f19 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -109,6 +109,8 @@
#include <network_helpers.h>
+#define MAX_TX_BUDGET_DEFAULT 32
+
static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
@@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
return true;
}
+static u32 load_value(u32 *counter)
+{
+ return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+ u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+ u32 cons, ready_to_send;
+ int delta;
+
+ cons = load_value(xsk->tx.consumer);
+ ready_to_send = load_value(xsk->tx.producer) - cons;
+ *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+ delta = load_value(xsk->tx.consumer) - cons;
+ /* By default, xsk should consume exact @max_budget descs at one
+ * send in this case where hitting the max budget limit in while
+ * loop is triggered in __xsk_generic_xmit(). Please make sure that
+ * the number of descs to be sent is larger than @max_budget, or
+ * else the tx.consumer will be updated in xskq_cons_peek_desc()
+ * in time which hides the issue we try to verify.
+ */
+ if (ready_to_send > max_budget && delta != max_budget)
+ return false;
+
+ return true;
+}
+
static int kick_tx(struct xsk_socket_info *xsk)
{
int ret;
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (xsk->check_consumer) {
+ if (!kick_tx_with_check(xsk, &ret))
+ return TEST_FAILURE;
+ } else {
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ }
if (ret >= 0)
return TEST_PASS;
if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
@@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test)
XSK_UMEM__LARGE_FRAME_SIZE * 2);
}
+static int testapp_tx_queue_consumer(struct test_spec *test)
+{
+ int nr_packets;
+
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+ pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE);
+ test->ifobj_tx->xsk->batch_size = nr_packets;
+ test->ifobj_tx->xsk->check_consumer = true;
+
+ return testapp_validate_traffic(test);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = {
{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+ {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
};
static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 67fc44b2813b..4df3a5d329ac 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -95,6 +95,7 @@ struct xsk_socket_info {
u32 batch_size;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
+ bool check_consumer;
};
struct pkt {
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 8d275f03e977..8d233ac95696 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -127,22 +127,42 @@ int run_test(int cpu)
return KSFT_PASS;
}
+/*
+ * Reads the suspend success count from sysfs.
+ * Returns the count on success or exits on failure.
+ */
+static int get_suspend_success_count_or_fail(void)
+{
+ FILE *fp;
+ int val;
+
+ fp = fopen("/sys/power/suspend_stats/success", "r");
+ if (!fp)
+ ksft_exit_fail_msg(
+ "Failed to open suspend_stats/success: %s\n",
+ strerror(errno));
+
+ if (fscanf(fp, "%d", &val) != 1) {
+ fclose(fp);
+ ksft_exit_fail_msg(
+ "Failed to read suspend success count\n");
+ }
+
+ fclose(fp);
+ return val;
+}
+
void suspend(void)
{
- int power_state_fd;
int timerfd;
int err;
+ int count_before;
+ int count_after;
struct itimerspec spec = {};
if (getuid() != 0)
ksft_exit_skip("Please run the test as root - Exiting.\n");
- power_state_fd = open("/sys/power/state", O_RDWR);
- if (power_state_fd < 0)
- ksft_exit_fail_msg(
- "open(\"/sys/power/state\") failed %s)\n",
- strerror(errno));
-
timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timerfd < 0)
ksft_exit_fail_msg("timerfd_create() failed\n");
@@ -152,14 +172,15 @@ void suspend(void)
if (err < 0)
ksft_exit_fail_msg("timerfd_settime() failed\n");
+ count_before = get_suspend_success_count_or_fail();
+
system("(echo mem > /sys/power/state) 2> /dev/null");
- timerfd_gettime(timerfd, &spec);
- if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0)
+ count_after = get_suspend_success_count_or_fail();
+ if (count_after <= count_before)
ksft_exit_fail_msg("Failed to enter Suspend state\n");
close(timerfd);
- close(power_state_fd);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
index ed210037b29d..77b3665c73c7 100644
--- a/tools/testing/selftests/coredump/Makefile
+++ b/tools/testing/selftests/coredump/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS = $(KHDR_INCLUDES)
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
TEST_GEN_PROGS := stackdump_test
TEST_FILES := stackdump
diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config
new file mode 100644
index 000000000000..a05ef112b4f9
--- /dev/null
+++ b/tools/testing/selftests/coredump/config
@@ -0,0 +1,3 @@
+CONFIG_COREDUMP=y
+CONFIG_NET=y
+CONFIG_UNIX=y
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 68f8e479ac36..5a5a7a5f7e1d 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -1,12 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libgen.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
#include <linux/limits.h>
#include <pthread.h>
#include <string.h>
#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -14,16 +20,23 @@
#include <unistd.h>
#include "../kselftest_harness.h"
+#include "../filesystems/wrappers.h"
#include "../pidfd/pidfd.h"
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
#define NUM_THREAD_SPAWN 128
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
static void *do_nothing(void *)
{
while (1)
pause();
+
+ return NULL;
}
static void crashing_child(void)
@@ -42,16 +55,32 @@ FIXTURE(coredump)
{
char original_core_pattern[256];
pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
};
+static int create_detached_tmpfs(void)
+{
+ int fd_context, fd_tmpfs;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ if (fd_context < 0)
+ return -1;
+
+ if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+ return -1;
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ close(fd_context);
+ return fd_tmpfs;
+}
+
FIXTURE_SETUP(coredump)
{
- char buf[PATH_MAX];
FILE *file;
- char *dir;
int ret;
self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
file = fopen("/proc/sys/kernel/core_pattern", "r");
ASSERT_NE(NULL, file);
@@ -60,6 +89,8 @@ FIXTURE_SETUP(coredump)
ASSERT_LT(ret, sizeof(self->original_core_pattern));
self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
ret = fclose(file);
ASSERT_EQ(0, ret);
@@ -98,6 +129,15 @@ FIXTURE_TEARDOWN(coredump)
goto fail;
}
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
return;
fail:
/* This should never happen */
@@ -106,11 +146,10 @@ fail:
TEST_F_TIMEOUT(coredump, stackdump, 120)
{
- struct sigaction action = {};
unsigned long long stack;
char *test_dir, *line;
size_t line_length;
- char buf[PATH_MAX];
+ char buf[PAGE_SIZE];
int ret, i, status;
FILE *file;
pid_t pid;
@@ -169,153 +208,166 @@ TEST_F_TIMEOUT(coredump, stackdump, 120)
fclose(file);
}
+static int create_and_listen_unix_socket(const char *path)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ assert(strlen(path) < sizeof(addr.sun_path) - 1);
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ size_t addr_len =
+ offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ goto out;
+
+ ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+ if (ret < 0)
+ goto out;
+
+ ret = listen(fd, 128);
+ if (ret < 0)
+ goto out;
+
+ return fd;
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+static bool set_core_pattern(const char *pattern)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = write(fd, pattern, strlen(pattern));
+ close(fd);
+ if (ret < 0)
+ return false;
+
+ fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+ return ret == strlen(pattern);
+}
+
+static int get_peer_pidfd(int fd)
+{
+ int fd_peer_pidfd;
+ socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+ &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ return -1;
+ }
+ return fd_peer_pidfd;
+}
+
+static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0;
+}
+
+static void
+wait_and_check_coredump_server(pid_t pid_coredump_server,
+ struct __test_metadata *const _metadata,
+ FIXTURE_DATA(coredump)* self)
+{
+ int status;
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
TEST_F(coredump, socket)
{
- int fd, pidfd, ret, status;
- FILE *file;
+ int pidfd, ret, status;
pid_t pid, pid_coredump_server;
struct stat st;
- char core_file[PATH_MAX];
struct pidfd_info info = {};
int ipc_sockets[2];
char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
-
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
if (fd_server < 0)
- _exit(EXIT_FAILURE);
-
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ goto out;
- ret = listen(fd_server, 1);
- if (ret < 0) {
- fprintf(stderr, "Failed to listen on coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
close(ipc_sockets[1]);
fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "Failed to accept coredump socket connection\n");
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ if (fd_coredump < 0)
+ goto out;
- fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
- &fd_peer_pidfd, &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
- memset(&info, 0, sizeof(info));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
- if (ret < 0) {
- fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "Missing coredump information from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
- if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
- fprintf(stderr, "Received connection from non-coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
fd_core_file = creat("/tmp/coredump.file", 0644);
- if (fd_core_file < 0) {
- fprintf(stderr, "Failed to create coredump file\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (fd_core_file < 0)
+ goto out;
for (;;) {
char buffer[4096];
ssize_t bytes_read, bytes_write;
bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0) {
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_FAILURE);
- }
+ if (bytes_read < 0)
+ goto out;
if (bytes_read == 0)
break;
bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_read != bytes_write) {
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_FAILURE);
- }
+ if (bytes_read != bytes_write)
+ goto out;
}
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_SUCCESS);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -335,48 +387,27 @@ TEST_F(coredump, socket)
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_TRUE(WCOREDUMP(status));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
ASSERT_GT(st.st_size, 0);
- /*
- * We should somehow validate the produced core file.
- * For now just allow for visual inspection
- */
system("file /tmp/coredump.file");
}
TEST_F(coredump, socket_detect_userspace_client)
{
- int fd, pidfd, ret, status;
- FILE *file;
+ int pidfd, ret, status;
pid_t pid, pid_coredump_server;
struct stat st;
- char core_file[PATH_MAX];
struct pidfd_info info = {};
int ipc_sockets[2];
char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
@@ -384,93 +415,49 @@ TEST_F(coredump, socket_detect_userspace_client)
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
if (fd_server < 0)
- _exit(EXIT_FAILURE);
+ goto out;
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- ret = listen(fd_server, 1);
- if (ret < 0) {
- fprintf(stderr, "Failed to listen on coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
close(ipc_sockets[1]);
fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "Failed to accept coredump socket connection\n");
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
-
- fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
- &fd_peer_pidfd, &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ if (fd_coredump < 0)
+ goto out;
- memset(&info, 0, sizeof(info));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
- if (ret < 0) {
- fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "Missing coredump information from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
- if (info.coredump_mask & PIDFD_COREDUMPED) {
- fprintf(stderr, "Received unexpected connection from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
- ret = read(fd_coredump, &c, 1);
+ if (info.coredump_mask & PIDFD_COREDUMPED)
+ goto out;
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
+ if (read(fd_coredump, &c, 1) < 1)
+ goto out;
- if (ret < 1)
- _exit(EXIT_FAILURE);
- _exit(EXIT_SUCCESS);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -483,17 +470,22 @@ TEST_F(coredump, socket_detect_userspace_client)
if (pid == 0) {
int fd_socket;
ssize_t ret;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len =
+ offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd_socket < 0)
_exit(EXIT_FAILURE);
-
ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
if (ret < 0)
_exit(EXIT_FAILURE);
- (void *)write(fd_socket, &(char){ 0 }, 1);
close(fd_socket);
_exit(EXIT_SUCCESS);
}
@@ -505,15 +497,11 @@ TEST_F(coredump, socket_detect_userspace_client)
ASSERT_TRUE(WIFEXITED(status));
ASSERT_EQ(WEXITSTATUS(status), 0);
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
ASSERT_EQ(errno, ENOENT);
@@ -521,17 +509,10 @@ TEST_F(coredump, socket_detect_userspace_client)
TEST_F(coredump, socket_enoent)
{
- int pidfd, ret, status;
- FILE *file;
+ int pidfd, status;
pid_t pid;
- char core_file[PATH_MAX];
-
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
pid = fork();
ASSERT_GE(pid, 0);
@@ -549,7 +530,6 @@ TEST_F(coredump, socket_enoent)
TEST_F(coredump, socket_no_listener)
{
int pidfd, ret, status;
- FILE *file;
pid_t pid, pid_coredump_server;
int ipc_sockets[2];
char c;
@@ -560,45 +540,616 @@ TEST_F(coredump, socket_no_listener)
size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
sizeof("/tmp/coredump.socket");
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
-
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (fd_server < 0)
- _exit(EXIT_FAILURE);
+ goto out;
ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
+ if (ret < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
+ close(ipc_sockets[1]);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+static ssize_t recv_marker(int fd)
+{
+ enum coredump_mark mark = COREDUMP_MARK_REQACK;
+ ssize_t ret;
+
+ ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+ if (ret != sizeof(mark))
+ return -1;
+
+ switch (mark) {
+ case COREDUMP_MARK_REQACK:
+ fprintf(stderr, "Received marker: ReqAck\n");
+ return COREDUMP_MARK_REQACK;
+ case COREDUMP_MARK_MINSIZE:
+ fprintf(stderr, "Received marker: MinSize\n");
+ return COREDUMP_MARK_MINSIZE;
+ case COREDUMP_MARK_MAXSIZE:
+ fprintf(stderr, "Received marker: MaxSize\n");
+ return COREDUMP_MARK_MAXSIZE;
+ case COREDUMP_MARK_UNSUPPORTED:
+ fprintf(stderr, "Received marker: Unsupported\n");
+ return COREDUMP_MARK_UNSUPPORTED;
+ case COREDUMP_MARK_CONFLICTING:
+ fprintf(stderr, "Received marker: Conflicting\n");
+ return COREDUMP_MARK_CONFLICTING;
+ default:
+ fprintf(stderr, "Received unknown marker: %u\n", mark);
+ break;
+ }
+ return -1;
+}
+
+static bool read_marker(int fd, enum coredump_mark mark)
+{
+ ssize_t ret;
+
+ ret = recv_marker(fd);
+ if (ret < 0)
+ return false;
+ return ret == mark;
+}
+
+static bool read_coredump_req(int fd, struct coredump_req *req)
+{
+ ssize_t ret;
+ size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+ memset(req, 0, sizeof(*req));
+ field_size = sizeof(req->size);
+
+ /* Peek the size of the coredump request. */
+ ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+ if (ret != field_size)
+ return false;
+ kernel_size = req->size;
+
+ if (kernel_size < COREDUMP_ACK_SIZE_VER0)
+ return false;
+ if (kernel_size >= PAGE_SIZE)
+ return false;
+
+ /* Use the minimum of user and kernel size to read the full request. */
+ user_size = sizeof(struct coredump_req);
+ ack_size = user_size < kernel_size ? user_size : kernel_size;
+ ret = recv(fd, req, ack_size, MSG_WAITALL);
+ if (ret != ack_size)
+ return false;
+
+ fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+ req->size, (unsigned long long)req->mask);
+
+ if (user_size > kernel_size)
+ remaining_size = user_size - kernel_size;
+ else
+ remaining_size = kernel_size - user_size;
+
+ if (PAGE_SIZE <= remaining_size)
+ return false;
+
+ /*
+ * Discard any additional data if the kernel's request was larger than
+ * what we knew about or cared about.
+ */
+ if (remaining_size) {
+ char buffer[PAGE_SIZE];
+
+ ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+ if (ret != remaining_size)
+ return false;
+ fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+ }
+
+ return true;
+}
+
+static bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack)
+{
+ ssize_t ret;
+ /*
+ * Wrap struct coredump_ack in a larger struct so we can
+ * simulate sending to much data to the kernel.
+ */
+ struct large_ack_for_size_testing {
+ struct coredump_ack ack;
+ char buffer[PAGE_SIZE];
+ } large_ack = {};
+
+ if (!size_ack)
+ size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+ sizeof(struct coredump_ack) :
+ req->size_ack;
+ large_ack.ack.mask = mask;
+ large_ack.ack.size = size_ack;
+ ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+ if (ret != size_ack)
+ return false;
+
+ fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+ size_ack, (unsigned long long)mask);
+ return true;
+}
+
+static bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask)
+{
+ if (req->size < min_size)
+ return false;
+ if ((req->mask & required_mask) != required_mask)
+ return false;
+ if (req->mask & ~required_mask)
+ return false;
+ return true;
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0)
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write)
+ goto out;
}
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_USERSPACE | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0)
+ goto out;
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0)
+ goto out;
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
}
- close(fd_server);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
close(ipc_sockets[1]);
- _exit(EXIT_SUCCESS);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -618,10 +1169,662 @@ TEST_F(coredump, socket_no_listener)
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_FALSE(WCOREDUMP(status));
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 / 2))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+static int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+ return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+#define NUM_CRASHING_COREDUMPS 5
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+ struct coredump_req req = {};
+
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "Failed to create and listen on unix socket\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "Failed to notify parent via ipc socket\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+ goto out;
+ }
+ }
+
+ close(fd_core_file);
+ close(fd_peer_pidfd);
+ close(fd_coredump);
+ fd_peer_pidfd = -1;
+ fd_coredump = -1;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ waitpid(pid[i], &status[i], 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+#define MAX_EVENTS 128
+
+static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+ int epfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ epfd = epoll_create1(0);
+ if (epfd < 0)
+ goto out;
+
+ struct epoll_event ev;
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd_coredump;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0)
+ goto out;
+
+ for (;;) {
+ struct epoll_event events[1];
+ int n = epoll_wait(epfd, events, 1, -1);
+ if (n < 0)
+ break;
+
+ if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+ goto out;
+ }
+ if (bytes_read == 0)
+ goto done;
+ ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_write != bytes_read)
+ goto out;
+ }
+ }
+ }
+
+done:
+ exit_code = EXIT_SUCCESS;
+out:
+ if (epfd >= 0)
+ close(epfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ _exit(exit_code);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+ fd_server = -1;
+ exit_code = EXIT_FAILURE;
+ n_conns = 0;
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+ close(ipc_sockets[1]);
+
+ while (n_conns < NUM_CRASHING_COREDUMPS) {
+ int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ struct coredump_req req = {};
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+ goto out;
+ }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+ if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+ if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0))
+ goto out;
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0)
+ goto out;
+ pid_t worker = fork();
+ if (worker == 0) {
+ close(fd_server);
+ process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+ }
+ worker_pids[n_conns] = worker;
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ n_conns++;
+ }
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+
+ // Reap all worker processes
+ for (int i = 0; i < n_conns; i++) {
+ int wstatus;
+ if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+ fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+ } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+ fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+ exit_code = EXIT_FAILURE;
+ }
+ }
+
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+ ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index d5dc7e0dc726..6232a46ca6e1 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -67,7 +67,7 @@ hotpluggable_cpus()
done
}
-hotplaggable_offline_cpus()
+hotpluggable_offline_cpus()
{
hotpluggable_cpus 0
}
@@ -151,7 +151,7 @@ offline_cpu_expect_fail()
online_all_hot_pluggable_cpus()
{
- for cpu in `hotplaggable_offline_cpus`; do
+ for cpu in `hotpluggable_offline_cpus`; do
online_cpu_expect_success $cpu
done
}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index be780bcb73a3..3556f3563e08 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -12,14 +12,17 @@ TEST_GEN_FILES := \
TEST_PROGS := \
napi_id.py \
netcons_basic.sh \
+ netcons_cmdline.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
netcons_sysdata.sh \
+ netpoll_basic.py \
ping.py \
queues.py \
stats.py \
shaper.py \
hds.py \
+ xdp.py \
# end of TEST_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index df2c047ffa90..fdc97355588c 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -16,6 +16,7 @@ TEST_PROGS = \
irq.py \
loopback.sh \
pp_alloc_fail.py \
+ rss_api.py \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..ead6784d1910
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,465 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+ - Verifies that without TC mapping, bandwidth is NOT distributed according to
+ the configured 80/20 split between TC4 and TC3
+ - This test should fail if bandwidth matches the 80/20 split without TC
+ mapping
+ - Expected: Bandwidth should NOT be distributed as 80/20
+
+2. test_tc_mapping_bandwidth:
+ - Configures TC mapping using mqprio qdisc
+ - Verifies that with TC mapping, bandwidth IS distributed according to the
+ configured 80/20 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 80/20
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 80% of total bandwidth
+- TC4 (VLAN 102): Configured for 20% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+
+
+class BandwidthValidator:
+ """
+ Validates bandwidth totals and per-TC shares against expected values
+ with a tolerance.
+ """
+
+ def __init__(self):
+ self.tolerance_percent = 12
+ self.expected_total_gbps = 1.0
+ self.total_min_expected = self.min_expected(self.expected_total_gbps)
+ self.total_max_expected = self.max_expected(self.expected_total_gbps)
+ self.tc_expected_percent = {
+ 3: 20.0,
+ 4: 80.0,
+ }
+
+ def min_expected(self, value):
+ """Calculates the minimum acceptable value based on tolerance."""
+ return value - (value * self.tolerance_percent / 100)
+
+ def max_expected(self, value):
+ """Calculates the maximum acceptable value based on tolerance."""
+ return value + (value * self.tolerance_percent / 100)
+
+ def bound(self, expected, value):
+ """Returns True if value is within expected tolerance."""
+ return self.min_expected(expected) <= value <= self.max_expected(expected)
+
+ def tc_bandwidth_bound(self, value, tc_ix):
+ """
+ Returns True if the given bandwidth value is within tolerance
+ for the TC's expected bandwidth.
+ """
+ expected = self.tc_expected_percent[tc_ix]
+ return self.bound(expected, value)
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+ """
+ Sets up a VF on the given network interface.
+
+ Enables SR-IOV and switchdev mode, brings the VF interface up,
+ and optionally configures TC mapping using mqprio.
+ """
+ try:
+ cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+ defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+ try:
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+ time.sleep(2)
+ vf_ifc = (os.listdir(
+ f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+ if vf_ifc:
+ ip(f"link set dev {vf_ifc} up")
+ else:
+ raise KsftSkipEx("VF interface not found")
+ if set_tc_mapping:
+ cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+ return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+ """
+ Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+ """
+ vlan_configs = [
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"},
+ ]
+
+ for config in vlan_configs:
+ vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+ ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+ ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+ ip(f"link set dev {vlan_dev} up")
+ ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+ ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+ """
+ Finds the VF representor interface and devlink port index
+ for the given PCI device used in the test environment.
+ """
+ cfg.vf_representor = None
+ cfg.vf_port_index = None
+ out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+ ports = json.loads(out)["port"]
+
+ for port_name, props in ports.items():
+ netdev = props.get("netdev")
+
+ if (port_name.startswith(f"pci/{cfg.pci}/") and
+ props.get("vfnum") == 0):
+ cfg.vf_representor = netdev
+ cfg.vf_port_index = int(port_name.split("/")[-1])
+ break
+
+
+def setup_bridge(cfg):
+ """
+ Creates and configures a Linux bridge, with both the uplink
+ and VF representor interfaces attached to it.
+ """
+ bridge_name = f"br_{os.getpid()}"
+ ip(f"link add name {bridge_name} type bridge")
+ defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+ ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+ rep_name = cfg.vf_representor
+ if rep_name:
+ ip(f"link set dev {rep_name} master {bridge_name}")
+ ip(f"link set dev {rep_name} up")
+ ksft_pr(f"Set representor {rep_name} up and added to bridge")
+ else:
+ raise KsftSkipEx("Could not find representor for the VF")
+
+ ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+ """
+ Configures devlink rate tx_max and traffic class bandwidth for the VF.
+ """
+ port_index = cfg.vf_port_index
+ if port_index is None:
+ raise KsftSkipEx("Could not find VF port index")
+ try:
+ cfg.devnl.rate_set({
+ "bus-name": "pci",
+ "dev-name": cfg.pci,
+ "port-index": port_index,
+ "rate-tx-max": 125000000,
+ "rate-tc-bws": [
+ {"index": 0, "bw": 0},
+ {"index": 1, "bw": 0},
+ {"index": 2, "bw": 0},
+ {"index": 3, "bw": 20},
+ {"index": 4, "bw": 80},
+ {"index": 5, "bw": 0},
+ {"index": 6, "bw": 0},
+ {"index": 7, "bw": 0},
+ ]
+ })
+ except NlError as exc:
+ if exc.error == 95: # EOPNOTSUPP
+ raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+ raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_server(cfg):
+ """
+ Sets up VLAN interfaces and starts iperf3 servers on the remote side.
+ """
+ remote_dev = cfg.remote_ifname
+ vlan_ids = [101, 102]
+ remote_ips = ["198.51.100.1", "198.51.100.9"]
+
+ for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+ vlan_dev = f"{remote_dev}.{vlan_id}"
+ cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+ f"type vlan id {vlan_id}", host=cfg.remote)
+ cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+ cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+ cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote)
+ defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+ """
+ Sets up the complete test environment including VF creation, VLANs,
+ bridge configuration, devlink rate setup, and the remote server.
+ """
+ vf_ifc = setup_vf(cfg, set_tc_mapping)
+ ksft_pr(f"Created VF interface: {vf_ifc}")
+
+ setup_vlans_on_vf(vf_ifc)
+
+ get_vf_info(cfg)
+ setup_bridge(cfg)
+
+ setup_devlink_rate(cfg)
+ setup_remote_server(cfg)
+ time.sleep(2)
+
+
+def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1):
+ """
+ Runs a single iperf3 client instance, binding to the given local IP.
+ Waits on a barrier to synchronize with other threads.
+ """
+ try:
+ barrier.wait(timeout=10)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+ iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"]
+ result = subprocess.run(iperf_cmd, capture_output=True, text=True,
+ check=True)
+
+ try:
+ output = json.loads(result.stdout)
+ bits_per_second = output["end"]["sum_received"]["bits_per_second"]
+ gbps = bits_per_second / 1e9
+ if gbps < min_expected_gbps:
+ ksft_pr(
+ f"iperf3 bandwidth too low: {gbps:.2f} Gbps "
+ f"(expected ≥ {min_expected_gbps} Gbps)"
+ )
+ return None
+ return gbps
+ except json.JSONDecodeError as exc:
+ ksft_pr(f"Failed to parse iperf3 JSON output: {exc}")
+ return None
+
+
+def run_bandwidth_test():
+ """
+ Launches iperf3 client threads for each VLAN/TC pair and collects results.
+ """
+ def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix):
+ results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier)
+
+ vf_vlan_data = [
+ # (local_ip, remote_ip, TC)
+ ("198.51.100.2", "198.51.100.1", 3),
+ ("198.51.100.10", "198.51.100.9", 4),
+ ]
+
+ results = {}
+ threads = []
+ start_barrier = threading.Barrier(len(vf_vlan_data))
+
+ for local_ip, remote_ip, tc_ix in vf_vlan_data:
+ thread = threading.Thread(
+ target=_run_iperf_client_thread,
+ args=(remote_ip, local_ip, results, start_barrier, tc_ix)
+ )
+ thread.start()
+ threads.append(thread)
+
+ for thread in threads:
+ thread.join()
+
+ for tc_ix, tc_bw in results.items():
+ if tc_bw is None:
+ raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth")
+
+ return results
+
+def calculate_bandwidth_percentages(results):
+ """
+ Calculates the percentage of total bandwidth received by TC3 and TC4.
+ """
+ if 3 not in results or 4 not in results:
+ raise KsftFailEx(f"Missing expected TC results in {results}")
+
+ tc3_bw = results[3]
+ tc4_bw = results[4]
+ total_bw = tc3_bw + tc4_bw
+ tc3_percentage = (tc3_bw / total_bw) * 100
+ tc4_percentage = (tc4_bw / total_bw) * 100
+
+ return {
+ 'tc3_bw': tc3_bw,
+ 'tc4_bw': tc4_bw,
+ 'tc3_percentage': tc3_percentage,
+ 'tc4_percentage': tc4_percentage,
+ 'total_bw': total_bw
+ }
+
+
+def print_bandwidth_results(bw_data, test_name):
+ """
+ Prints bandwidth measurements and TC usage summary for a given test.
+ """
+ ksft_pr(f"Bandwidth check results {test_name}:")
+ ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+ ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+ ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+ """
+ Ensures the total measured bandwidth falls within the acceptable tolerance.
+ """
+ total = bw_data['total_bw']
+
+ if validator.bound(validator.expected_total_gbps, total):
+ return
+
+ if total < validator.total_min_expected:
+ raise KsftSkipEx(
+ f"Total bandwidth {total:.2f} Gbps < minimum "
+ f"{validator.total_min_expected:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total_gbps:.1f} G) "
+ f"not reached, cannot validate share"
+ )
+
+ raise KsftFailEx(
+ f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+ f"{validator.total_max_expected:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)"
+ )
+
+
+def check_bandwidth_distribution(bw_data, validator):
+ """
+ Checks whether the measured TC3 and TC4 bandwidth percentages
+ fall within their expected tolerance ranges.
+
+ Returns:
+ bool: True if both TC3 and TC4 percentages are within bounds.
+ """
+ tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3)
+ tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4)
+
+ return tc3_valid and tc4_valid
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+ """
+ Runs parallel iperf3 tests for both TCs and collects results.
+ """
+ setup_test_environment(cfg, set_tc_mapping)
+ bandwidths = run_bandwidth_test()
+ bw_data = calculate_bandwidth_percentages(bandwidths)
+ test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+ print_bandwidth_results(bw_data, test_name)
+
+ verify_total_bandwidth(bw_data, cfg.bw_validator)
+
+ return check_bandwidth_distribution(bw_data, cfg.bw_validator)
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is not split 80/20 without traffic class mapping.
+ """
+ pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping"
+ is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+ if is_mlx5:
+ raise KsftXfailEx(fail_bw_msg)
+ raise KsftFailEx(fail_bw_msg)
+ if is_mlx5:
+ raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+ ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is correctly split 80/20 between TC3 and TC4
+ when traffic class mapping is set.
+ """
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+ ksft_pr("Bandwidth is distributed as 80/20 with TC mapping")
+ else:
+ raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping")
+
+
+def main() -> None:
+ """
+ Main entry point for running the test cases.
+ """
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.devnl = DevlinkFamily()
+
+ cfg.pci = os.path.basename(
+ os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+ )
+ if not cfg.pci:
+ raise KsftSkipEx("Could not get PCI address of the interface")
+ cfg.require_cmd("iperf3", local=True, remote=True)
+
+ cfg.bw_validator = BandwidthValidator()
+
+ cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+ ksft_run(cases=cases, args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 7947650210a0..baa2f24240ba 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -51,15 +51,14 @@ def check_tx(cfg) -> None:
@ksft_disruptive
def check_tx_chunks(cfg) -> None:
- cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
with bkg(listen_cmd, exit_wait=True) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 9c03fd777f3d..712c806508b5 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -3,37 +3,37 @@
import re
from os import path
-from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
def _get_current_settings(cfg):
- output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+ output = ethtool(f"-g {cfg.ifname}", json=True)[0]
return (output['rx'], output['hds-thresh'])
def _get_combined_channels(cfg):
- output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout
+ output = ethtool(f"-l {cfg.ifname}").stdout
values = re.findall(r'Combined:\s+(\d+)', output)
return int(values[1])
def _create_rss_ctx(cfg, chan):
- output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
values = re.search(r'New RSS context is (\d+)', output).group(1)
ctx_id = int(values)
- return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
def _set_flow_rule(cfg, port, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
def _set_flow_rule_rss(cfg, port, ctx_id):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
@@ -47,26 +47,26 @@ def test_zcrx(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def test_zcrx_oneshot(cfg) -> None:
@@ -78,26 +78,26 @@ def test_zcrx_oneshot(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def test_zcrx_rss(cfg) -> None:
@@ -109,27 +109,27 @@ def test_zcrx_rss(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
(ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index b582885786f5..1462a339a74b 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -7,8 +7,25 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
+
from net.lib.py import *
from drivers.net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \
+ rand_port, tool, wait_port_listen
+ from net.lib.py import fd_read_timeout
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true
+ from net.lib.py import NetNSEnter
+ from drivers.net.lib.py import GenerateTraffic
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 02e4d3d7ded2..72f828021f83 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -526,12 +526,10 @@ static struct netdev_queue_id *create_queues(void)
struct netdev_queue_id *queues;
size_t i = 0;
- queues = calloc(num_queues, sizeof(*queues));
+ queues = netdev_queue_id_alloc(num_queues);
for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
+ netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+ netdev_queue_id_set_id(&queues[i], start_queue + i);
}
return queues;
@@ -852,7 +850,6 @@ static int do_client(struct memory_buffer *mem)
ssize_t line_size = 0;
struct cmsghdr *cmsg;
char *line = NULL;
- unsigned long mid;
size_t len = 0;
int socket_fd;
__u32 ddmabuf;
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+ return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+ descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ if to_nl:
+ converter = {
+ "IP SA": "ip-src",
+ "IP DA": "ip-dst",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+ }
+
+ ret = set()
+ else:
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ }
+
+ ret = ""
+
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ if to_nl:
+ ret.add(converter[line])
+ else:
+ ret += converter[line]
+ return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+ """
+ Test error path of Netlink SET.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-name": "lo"},
+ "indir": None})
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [100000]})
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+ """
+ Test setting indirection table via Netlink.
+ """
+ qcnt = _require_2qs(cfg)
+
+ # Test some SETs with a value
+ reset = defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex}, "indir": None})
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+ # Test reset back to default
+ reset.exec()
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+ """
+ Test setting indirection table for a custom context via Netlink.
+ """
+ _require_2qs(cfg)
+
+ # Get setting for ctx 0, we'll make sure they don't get clobbered
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+ # Create context
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+ reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after reset")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_is(ntf["msg"].get("context"), None)
+ ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified on an additional RSS context.
+ """
+ _require_2qs(cfg)
+
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"].get("context"), ctx_id)
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+ """
+ Test setting hashing key via Netlink.
+ """
+
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "hkey": dflt["hkey"], "indir": None})
+
+ # Empty key should error out
+ with ksft_raises(NlError) as cm:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": None})
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+ # Set key to random
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+
+ # Set key to random and indir tbl to something at once
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1], "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+ """
+ Test reading Rx Flow Hash over Netlink.
+ """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+ ethnl = EthtoolFamily()
+
+ cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+ ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+ comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+ """ Test configuring Rx Flow Hash over Netlink. """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+ # Collect current settings
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # symmetric hashing prevents some of the configs below
+ if cfg_old.get("input-xfrm"):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": {}})
+
+ for fl_type in flow_types:
+ cur = _ethtool_get_cfg(cfg, fl_type)
+ if cur == "sdfn":
+ change_nl = {"ip-src", "ip-dst"}
+ change_ic = "sd"
+ else:
+ change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+ change_ic = "sdfn"
+
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {fl_type: change_nl}
+ })
+ reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+ f"rx-flow-hash {fl_type} {cur}")
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+ comment=f"Config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(change_ic, cfg_ic,
+ comment=f"Config for {fl_type} over IOCTL")
+
+ reset.exec()
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+ comment=f"Un-config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+ # Try to set multiple at once, the defer was already installed at the start
+ change = {"ip-src"}
+ if change == cfg_old["flow-hash"]["tcp4"]:
+ change = {"ip-dst"}
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {x: change for x in flow_types}
+ })
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+ comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+ """ Test changing Rx Flow Hash vs xfrm_input at once. """
+
+ def set_rss(cfg, xfrm, fh):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": xfrm, "flow-hash": fh})
+
+ # Install the reset handler
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # Make sure we start with input-xfrm off, and tcp4 config non-sym
+ set_rss(cfg, {}, {})
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+ # Setting sym and fixing tcp4 config not expected to pass right now
+ with ksft_raises(NlError):
+ set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+ # One at a time should work, hopefully
+ set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+ no_support = False
+ try:
+ set_rss(cfg, {"sym-xor"}, {})
+ except NlError:
+ try:
+ set_rss(cfg, {"sym-or-xor"}, {})
+ except NlError:
+ no_support = True
+ if no_support:
+ raise KsftSkipEx("no input-xfrm supported")
+ # Disabling two at once should not work either without kernel changes
+ with ksft_raises(NlError):
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+ """ Test Rx Flow Hash notifications. """
+
+ cur = _ethtool_get_cfg(cfg, "tcp4")
+ if cur == "sdfn":
+ change = {"ip-src", "ip-dst"}
+ else:
+ change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {"tcp4": change}
+ })
+ reset = defer(ethtool,
+ f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after IOCTL change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after Netlink change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+ """ Test creating an additional RSS context via Netlink """
+
+ _require_2qs(cfg)
+
+ # Test basic creation
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_ne(ctx.get("context", 0), 0)
+ ksft_ne(set(ctx.get("indir", [0])), {0},
+ comment="Driver should init the indirection table")
+
+ # Try requesting the ID we just got allocated
+ with ksft_raises(NlError) as cm:
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx.get("context"),
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ d.exec()
+ ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+ # Test creating with a specified RSS table, and context ID
+ ctx_id = ctx.get("context")
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx_id,
+ "indir": [1],
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_eq(ctx.get("context"), ctx_id)
+ ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+ """ Test notifications for creating additional RSS contexts """
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ # Create / delete via Netlink
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ cfg.ethnl.rss_delete_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx["context"],
+ })
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+ ksft_eq(ctx, ntf["msg"])
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+ # Create / deleve via IOCTL
+ ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 648ff50bc1c3..72880e388478 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,16 +32,16 @@ def test_rss_input_xfrm(cfg, ipver):
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
if not hasattr(socket, "SO_INCOMING_CPU"):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
- input_xfrm = cfg.ethnl.rss_get(
- {'header': {'dev-name': cfg.ifname}}).get('input-xfrm')
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
# Check for symmetric xor/or-xor
- if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
+ if not input_xfrm:
raise KsftSkipEx("Symmetric RSS hash not requested")
cpus = set()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index 3370827409aa..c13dd5efa27a 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -34,7 +34,7 @@ def tcp_sock_get_retrans(sock):
def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
@@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info):
remote_addr = cfg.remote_addr_v[outer_ipver]
tun_type = tun_info[0]
- tun_arg = tun_info[2]
+ tun_arg = tun_info[1]
ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
defer(ip, f"link del {tun_type}-ksft")
ip(f"link set dev {tun_type}-ksft up")
@@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info):
return remote_v4, remote_v6
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
"""Construct specific tests from the common template."""
def f(cfg):
cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
if not cfg.have_stat_super_count and \
not cfg.have_stat_wire_count:
raise KsftSkipEx(f"Device does not support LSO queue stats")
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
ipver = outer_ipver
if tun:
remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
@@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
remote_v4 = cfg.remote_addr_v["4"]
remote_v6 = cfg.remote_addr_v["6"]
- tun_partial = tun and tun[1]
- # Tunnel which can silently fall back to gso-partial
- has_gso_partial = tun and 'tx-gso-partial' in cfg.features
-
- # For TSO4 via partial we need mangleid
- if ipver == "4" and feature in cfg.partial_features:
- ksft_pr("Testing with mangleid enabled")
- if 'tx-tcp-mangleid-segmentation' not in cfg.features:
- ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
- defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
-
# First test without the feature enabled.
ethtool(f"-K {cfg.ifname} {feature} off")
- if has_gso_partial:
- ethtool(f"-K {cfg.ifname} tx-gso-partial off")
run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
- # Now test with the feature enabled.
- # For compatible tunnels only - just GSO partial, not specific feature.
- if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6,
- should_lso=tun_partial)
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
# Full feature enabled.
- if feature in cfg.features:
- ethtool(f"-K {cfg.ifname} {feature} on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
- else:
- raise KsftXfailEx(f"Device does not support {feature}")
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
return f
@@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None:
cfg.have_stat_super_count = False
cfg.have_stat_wire_count = False
- cfg.features = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
- for f in features["active"]["bits"]["bit"]:
- cfg.features.add(f["name"])
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
# Check which features are supported via GSO partial
cfg.partial_features = set()
- if 'tx-gso-partial' in cfg.features:
+ if 'tx-gso-partial' in cfg.hw_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial off")
no_partial = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
for f in features["active"]["bits"]["bit"]:
no_partial.add(f["name"])
- cfg.partial_features = cfg.features - no_partial
+ cfg.partial_features = cfg.hw_features - no_partial
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ restore_wanted_features(cfg)
+
stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
if stats:
if 'tx-hw-gso-packets' in stats[0]:
@@ -211,13 +227,14 @@ def main() -> None:
query_nic_features(cfg)
test_info = (
- # name, v4/v6 ethtool_feature tun:(type, partial, args)
- ("", "4", "tx-tcp-segmentation", None),
- ("", "6", "tx-tcp6-segmentation", None),
- ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
- ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
- ("gre", "4", "tx-gre-segmentation", ("gre", False, "")),
- ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
)
cases = []
@@ -227,11 +244,13 @@ def main() -> None:
if info[1] and outer_ipver != info[1]:
continue
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="4"))
if info[3]:
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="6"))
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 401e70f7f136..8711c67ad658 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -7,7 +7,21 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
+
from net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, tool, wait_port_listen
+ from net.lib.py import fd_read_timeout
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 3bccddf8cbc5..1b8bd648048f 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -259,7 +259,7 @@ class NetDrvEpEnv(NetDrvEnvBase):
if not self._require_cmd(comm, "local"):
raise KsftSkipEx("Test requires command: " + comm)
if remote:
- if not self._require_cmd(comm, "remote"):
+ if not self._require_cmd(comm, "remote", host=self.remote):
raise KsftSkipEx("Test requires (remote) command: " + comm)
def wait_hw_stats_settle(self):
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index d9c10613ae67..c4e808407cc4 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -1,21 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
+import re
import time
from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
class GenerateTraffic:
def __init__(self, env, port=None):
- env.require_cmd("iperf3", remote=True)
+ env.require_cmd("iperf3", local=True, remote=True)
self.env = env
- if port is None:
- port = rand_port()
- self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True)
- wait_port_listen(port)
+ self.port = rand_port() if port is None else port
+ self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True)
+ wait_port_listen(self.port)
time.sleep(0.1)
- self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
+ self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400",
background=True, host=env.remote)
# Wait for traffic to ramp up
@@ -56,3 +56,16 @@ class GenerateTraffic:
ksft_pr(">> Server:")
ksft_pr(self._iperf_server.stdout)
ksft_pr(self._iperf_server.stderr)
+ self._wait_client_stopped()
+
+ def _wait_client_stopped(self, sleep=0.005, timeout=5):
+ end = time.monotonic() + timeout
+
+ live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ")
+
+ while time.monotonic() < end:
+ data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
+ if not live_port_pattern.search(data):
+ return
+ time.sleep(sleep)
+ raise Exception(f"Waiting for client to stop timed out after {timeout}s")
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 29b01b8e2215..b6071e80ebbb 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -11,9 +11,11 @@ set -euo pipefail
LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
SRCIF="" # to be populated later
-SRCIP=192.0.2.1
+SRCIP4="192.0.2.1"
+SRCIP6="fc00::1"
DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+DSTIP4="192.0.2.2"
+DSTIP6="fc00::2"
PORT="6666"
MSG="netconsole selftest"
@@ -80,7 +82,23 @@ function configure_ip() {
ip link set "${SRCIF}" up
}
+function select_ipv4_or_ipv6()
+{
+ local VERSION=${1}
+
+ if [[ "$VERSION" == "ipv6" ]]
+ then
+ DSTIP="${DSTIP6}"
+ SRCIP="${SRCIP6}"
+ else
+ DSTIP="${DSTIP4}"
+ SRCIP="${SRCIP4}"
+ fi
+}
+
function set_network() {
+ local IP_VERSION=${1:-"ipv4"}
+
# setup_ns function is coming from lib.sh
setup_ns NAMESPACE
@@ -91,10 +109,13 @@ function set_network() {
# Link both interfaces back to back
link_ifaces
+ select_ipv4_or_ipv6 "${IP_VERSION}"
configure_ip
}
function create_dynamic_target() {
+ local FORMAT=${1:-"extended"}
+
DSTMAC=$(ip netns exec "${NAMESPACE}" \
ip link show "${DSTIF}" | awk '/ether/ {print $2}')
@@ -106,7 +127,33 @@ function create_dynamic_target() {
echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+ if [ "${FORMAT}" == "basic" ]
+ then
+ # Basic target does not support release
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 0 > "${NETCONS_PATH}"/extended
+ elif [ "${FORMAT}" == "extended" ]
+ then
+ echo 1 > "${NETCONS_PATH}"/extended
+ fi
+
echo 1 > "${NETCONS_PATH}"/enabled
+
+ # This will make sure that the kernel was able to
+ # load the netconsole driver configuration. The console message
+ # gets more organized/sequential as well.
+ sleep 1
+}
+
+# Generate the command line argument for netconsole following:
+# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+function create_cmdline_str() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+ SRCPORT="1514"
+ TGTPORT="6666"
+
+ echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\""
}
# Do not append the release to the header of the message
@@ -116,16 +163,9 @@ function disable_release_append() {
echo 1 > "${NETCONS_PATH}"/enabled
}
-function cleanup() {
+function do_cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
- # delete netconsole dynamic reconfiguration
- echo 0 > "${NETCONS_PATH}"/enabled
- # Remove all the keys that got created during the selftest
- find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
- # Remove the configfs entry
- rmdir "${NETCONS_PATH}"
-
# Delete netdevsim devices
echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
@@ -137,6 +177,17 @@ function cleanup() {
echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
}
+function cleanup() {
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ do_cleanup
+}
+
function set_user_data() {
if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
then
@@ -152,18 +203,24 @@ function set_user_data() {
function listen_port_and_save_to() {
local OUTPUT=${1}
+ local IPVERSION=${2:-"ipv4"}
+
+ if [ "${IPVERSION}" == "ipv4" ]
+ then
+ SOCAT_MODE="UDP-LISTEN"
+ else
+ SOCAT_MODE="UDP6-LISTEN"
+ fi
+
# Just wait for 2 seconds
timeout 2 ip netns exec "${NAMESPACE}" \
- socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
}
-function validate_result() {
+# Only validate that the message arrived properly
+function validate_msg() {
local TMPFILENAME="$1"
- # TMPFILENAME will contain something like:
- # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
- # key=value
-
# Check if the file exists
if [ ! -f "$TMPFILENAME" ]; then
echo "FAIL: File was not generated." >&2
@@ -175,17 +232,32 @@ function validate_result() {
cat "${TMPFILENAME}" >&2
exit "${ksft_fail}"
fi
+}
- if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
- echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
+# Validate the message and userdata
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ validate_msg "${TMPFILENAME}"
+
+ # userdata is not supported on basic format target,
+ # thus, do not validate it.
+ if [ "${FORMAT}" != "basic" ];
+ then
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
fi
# Delete the file once it is validated, otherwise keep it
# for debugging purposes
rm "${TMPFILENAME}"
- exit "${ksft_pass}"
}
function check_for_dependencies() {
@@ -209,6 +281,11 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
+ if [ ! -f /proc/net/if_inet6 ]; then
+ echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
exit "${ksft_skip}"
@@ -224,8 +301,15 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
- if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
- echo "SKIP: IPs already in use. Skipping it" >&2
+ REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
+ REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
+ if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
+ echo "SKIP: IPv4s already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
+ echo "SKIP: IPv6s already in use. Skipping it" >&2
exit "${ksft_skip}"
fi
}
@@ -239,10 +323,41 @@ function check_for_taskset() {
# This is necessary if running multiple tests in a row
function pkill_socat() {
- PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
# socat runs under timeout(1), kill it if it is still alive
# do not fail if socat doesn't exist anymore
set +e
- pkill -f "${PROCESS_NAME}"
+ pkill -f "${PROCESS_NAME4}"
+ pkill -f "${PROCESS_NAME6}"
set -e
}
+
+# Check if netconsole was compiled as a module, otherwise exit
+function check_netconsole_module() {
+ if modinfo netconsole | grep filename: | grep -q builtin
+ then
+ echo "SKIP: netconsole should be compiled as a module" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# A wrapper to translate protocol version to udp version
+function wait_for_port() {
+ local NAMESPACE=${1}
+ local PORT=${2}
+ IP_VERSION=${3}
+
+ if [ "${IP_VERSION}" == "ipv6" ]
+ then
+ PROTOCOL="udp6"
+ else
+ PROTOCOL="udp"
+ fi
+
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
+ # even after the port is open, let's wait 1 second before writing
+ # otherwise the packet could be missed, and the test will fail. Happens
+ # more frequently on IPv6
+ sleep 1
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 899b6892603f..d7505b933aef 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource changed
# following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 482ebb744eba..7b98cdd0580d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
continue
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource
# changed following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
index 356bac46ba04..d05eddcad539 100755
--- a/tools/testing/selftests/drivers/net/napi_id.py
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -7,10 +7,10 @@ from lib.py import bkg, cmd, rand_port, NetNSEnter
def test_napi_id(cfg) -> None:
port = rand_port()
- listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}"
with bkg(listen_cmd, ksft_wait=3) as server:
- cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True)
ksft_eq(0, server.ret)
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
index eecd610c2109..7f49ca6c8637 100644
--- a/tools/testing/selftests/drivers/net/napi_id_helper.c
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -7,41 +7,58 @@
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
+#include <netdb.h>
#include "../../net/lib/ksft.h"
int main(int argc, char *argv[])
{
- struct sockaddr_in address;
+ struct sockaddr_storage address;
+ struct addrinfo *result;
+ struct addrinfo hints;
unsigned int napi_id;
- unsigned int port;
+ socklen_t addr_len;
socklen_t optlen;
char buf[1024];
int opt = 1;
+ int family;
int server;
int client;
int ret;
- server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(argv[1], argv[2], &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
+ return 1;
+ }
+
+ family = result->ai_family;
+ addr_len = result->ai_addrlen;
+
+ server = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (server < 0) {
perror("socket creation failed");
+ freeaddrinfo(result);
if (errno == EAFNOSUPPORT)
return -1;
return 1;
}
- port = atoi(argv[2]);
-
if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
perror("setsockopt");
+ freeaddrinfo(result);
return 1;
}
- address.sin_family = AF_INET;
- inet_pton(AF_INET, argv[1], &address.sin_addr);
- address.sin_port = htons(port);
+ memcpy(&address, result->ai_addr, result->ai_addrlen);
+ freeaddrinfo(result);
- if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ if (bind(server, (struct sockaddr *)&address, addr_len) < 0) {
perror("bind failed");
return 1;
}
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index fe765da498e8..a3446b569976 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -32,21 +32,42 @@ check_for_dependencies
echo "6 5" > /proc/sys/kernel/printk
# Remove the namespace, interfaces and netconsole target on exit
trap cleanup EXIT
-# Create one namespace and two interfaces
-set_network
-# Create a dynamic target for netconsole
-create_dynamic_target
-# Set userdata "key" with the "value" value
-set_user_data
-# Listed for netconsole port inside the namespace and destination interface
-listen_port_and_save_to "${OUTPUT_FILE}" &
-# Wait for socat to start and listen to the port.
-wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
-# Send the message
-echo "${MSG}: ${TARGET}" > /dev/kmsg
-# Wait until socat saves the file to disk
-busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
-
-# Make sure the message was received in the dst part
-# and exit
-validate_result "${OUTPUT_FILE}"
+
+# Run the test twice, with different format modes
+for FORMAT in "basic" "extended"
+do
+ for IP_VERSION in "ipv6" "ipv4"
+ do
+ echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+ # Create one namespace and two interfaces
+ set_network "${IP_VERSION}"
+ # Create a dynamic target for netconsole
+ create_dynamic_target "${FORMAT}"
+ # Only set userdata for extended format
+ if [ "$FORMAT" == "extended" ]
+ then
+ # Set userdata "key" with the "value" value
+ set_user_data
+ fi
+ # Listed for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+ cleanup
+ echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
+ done
+done
+
+trap - EXIT
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
new file mode 100755
index 000000000000..ad2fb8b1c463
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a selftest to test cmdline arguments on netconsole.
+# It exercises loading of netconsole from cmdline instead of the dynamic
+# reconfiguration. This includes parsing the long netconsole= line and all the
+# flow through init_netconsole().
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+check_netconsole_module
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+# check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace and network interfaces
+trap do_cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create the command line for netconsole, with the configuration from the
+# function above
+CMDLINE="$(create_cmdline_str)"
+
+# Load the module, with the cmdline set
+modprobe netconsole "${CMDLINE}"
+
+# Listed for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Make sure the message was received in the dst part
+# and exit
+validate_msg "${OUTPUT_FILE}"
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
index a737e377bf08..baf69031089e 100755
--- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -53,6 +53,17 @@ function set_release() {
echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
}
+# Enable the msgid to be appended to sysdata
+function set_msgid() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]]
+ then
+ echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
# Disable the sysdata cpu_nr feature
function unset_cpu_nr() {
echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
@@ -67,6 +78,10 @@ function unset_release() {
echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
}
+function unset_msgid() {
+ echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
# Test if MSG contains sysdata
function validate_sysdata() {
# OUTPUT_FILE will contain something like:
@@ -74,6 +89,7 @@ function validate_sysdata() {
# userdatakey=userdatavalue
# cpu=X
# taskname=<taskname>
+ # msgid=<id>
# Echo is what this test uses to create the message. See runtest()
# function
@@ -104,6 +120,12 @@ function validate_sysdata() {
exit "${ksft_fail}"
fi
+ if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
pkill_socat
}
@@ -155,6 +177,12 @@ function validate_no_sysdata() {
exit "${ksft_fail}"
fi
+ if grep -q "msgid=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
}
@@ -206,6 +234,7 @@ set_cpu_nr
# Enable taskname to be appended to sysdata
set_taskname
set_release
+set_msgid
runtest
# Make sure the message was received in the dst part
# and exit
@@ -235,6 +264,7 @@ MSG="Test #3 from CPU${CPU}"
unset_cpu_nr
unset_taskname
unset_release
+unset_msgid
runtest
# At this time, cpu= shouldn't be present in the msg
validate_no_sysdata
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index b5ea2526f23c..030762b203d7 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -40,6 +40,8 @@ fw_flash_test()
return
fi
+ echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms
+
devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates on"
@@ -608,6 +610,46 @@ rate_attr_parent_check()
check_err $? "Unexpected parent attr value $api_value != $parent"
}
+rate_attr_tc_bw_check()
+{
+ local handle=$1
+ local tc_bw=$2
+ local debug_file=$3
+
+ local tc_bw_str=""
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ tc_bw_str="$tc_bw_str $tc:$value"
+ done
+ tc_bw_str=${tc_bw_str# }
+
+ rate_attr_set "$handle" tc-bw "$tc_bw_str"
+ check_err $? "Failed to set tc-bw values"
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ local debug_value
+ debug_value=$(cat "$debug_file"/tc"${tc}"_bw)
+ check_err $? "Failed to read tc-bw value from debugfs for tc$tc"
+ [ "$debug_value" == "$value" ]
+ check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value"
+ done
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local expected_value=${bw##*:}
+ local api_value
+ api_value=$(rate_attr_get "$handle" tc_"$tc")
+ if [ "$api_value" = "null" ]; then
+ api_value=0
+ fi
+ [ "$api_value" == "$expected_value" ]
+ check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value"
+ done
+}
+
rate_node_add()
{
local handle=$1
@@ -649,6 +691,13 @@ rate_test()
rate=$(($rate+100))
done
+ local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0"
+ for r_obj in $leafs
+ do
+ rate_attr_tc_bw_check "$r_obj" "$tc_bw" \
+ "$DEBUGFS_DIR"/ports/"${r_obj##*/}"
+ done
+
local node1_name='group1'
local node1="$DL_HANDLE/$node1_name"
rate_node_add "$node1"
@@ -666,6 +715,12 @@ rate_test()
rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0"
+ rate_attr_tc_bw_check $node1 "$tc_bw" \
+ "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}"
+
+
rate_node_del "$node1"
check_err $? "Failed to delete node $node1"
local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 92c2f0376c08..4c859ecdad94 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -266,7 +266,6 @@ for port in 0 1; do
echo $NSIM_ID > /sys/bus/netdevsim/new_device
else
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
@@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net)
port=0
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
-echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
echo 0 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
-new_vxlan vxlan0 10000 $NSIM_NETDEV
-
-modprobe -r vxlan
-modprobe -r udp_tunnel
-
-msg="remove tunnels"
-exp0=( 0 0 0 0 )
-check_tables
-
-msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
+exp0=( `mke 10000 1` 0 0 0 )
new_vxlan vxlan0 10000 $NSIM_NETDEV
exp0=( 0 0 0 0 )
@@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
-echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
new file mode 100755
index 000000000000..408bd54d6779
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Author: Breno Leitao <leitao@debian.org>
+"""
+ This test aims to evaluate the netpoll polling mechanism (as in
+ netpoll_poll_dev()). It presents a complex scenario where the network
+ attempts to send a packet but fails, prompting it to poll the NIC from within
+ the netpoll TX side.
+
+ This has been a crucial path in netpoll that was previously untested. Jakub
+ suggested using a single RX/TX queue, pushing traffic to the NIC, and then
+ sending netpoll messages (via netconsole) to trigger the poll.
+
+ In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
+ so, the test passes, otherwise it will be skipped. This test is very dependent on
+ the driver and environment, given we are trying to trigger a tricky scenario.
+"""
+
+import errno
+import logging
+import os
+import random
+import string
+import threading
+import time
+from typing import Optional
+
+from lib.py import (
+ bpftrace,
+ CmdExitFailure,
+ defer,
+ ethtool,
+ GenerateTraffic,
+ ksft_exit,
+ ksft_pr,
+ ksft_run,
+ KsftFailEx,
+ KsftSkipEx,
+ NetDrvEpEnv,
+ KsftXfailEx,
+)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
+NETCONS_REMOTE_PORT: int = 6666
+NETCONS_LOCAL_PORT: int = 1514
+
+# Max number of netcons messages to send. Each iteration will setup
+# netconsole and send MAX_WRITES messages
+ITERATIONS: int = 20
+# Number of writes to /dev/kmsg per iteration
+MAX_WRITES: int = 40
+# MAPS contains the information coming from bpftrace it will have only one
+# key: "hits", which tells the number of times netpoll_poll_dev() was called
+MAPS: dict[str, int] = {}
+# Thread to run bpftrace in parallel
+BPF_THREAD: Optional[threading.Thread] = None
+# Time bpftrace will be running in parallel.
+BPFTRACE_TIMEOUT: int = 10
+
+
+def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
+ """
+ Read the ringsize using ethtool. This will be used to restore it after the test
+ """
+ try:
+ ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
+ rxs = ethtool_result["rx"]
+ txs = ethtool_result["tx"]
+ except (KeyError, IndexError) as exception:
+ raise KsftSkipEx(
+ f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxs, txs
+
+
+def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
+ """Try to the number of RX and TX ringsize."""
+ rxs = ring_size[0]
+ txs = ring_size[1]
+
+ logging.debug("Setting ring size to %d/%d", rxs, txs)
+ try:
+ ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
+ except CmdExitFailure:
+ # This might fail on real device, retry with a higher value,
+ # worst case, keep it as it is.
+ return False
+
+ return True
+
+
+def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
+ """Read the number of RX, TX and combined queues using ethtool"""
+
+ try:
+ ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
+ rxq = ethtool_result.get("rx", -1)
+ txq = ethtool_result.get("tx", -1)
+ combined = ethtool_result.get("combined", -1)
+
+ except IndexError as exception:
+ raise KsftSkipEx(
+ f"Failed to read queues numbers: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxq, txq, combined
+
+
+def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
+ """Set the number of RX, TX and combined queues using ethtool"""
+ rxq, txq, combined = queues
+
+ cmdline = f"-L {interface_name}"
+
+ if rxq != -1:
+ cmdline += f" rx {rxq}"
+ if txq != -1:
+ cmdline += f" tx {txq}"
+ if combined != -1:
+ cmdline += f" combined {combined}"
+
+ logging.debug("calling: ethtool %s", cmdline)
+
+ try:
+ ethtool(cmdline)
+ except CmdExitFailure as exception:
+ raise KsftSkipEx(
+ f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
+ ) from exception
+
+
+def netcons_generate_random_target_name() -> str:
+ """Generate a random target name starting with 'netcons'"""
+ random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+ return f"netcons_{random_suffix}"
+
+
+def netcons_create_target(
+ config_data: dict[str, str],
+ target_name: str,
+) -> None:
+ """Create a netconsole dynamic target against the interfaces"""
+ logging.debug("Using netconsole name: %s", target_name)
+ try:
+ os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
+ logging.debug(
+ "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
+ )
+ except OSError as exception:
+ if exception.errno != errno.EEXIST:
+ raise KsftFailEx(
+ f"Failed to create netconsole target directory: {exception}"
+ ) from exception
+
+ try:
+ for key, value in config_data.items():
+ path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
+ logging.debug("Writing %s to %s", key, path)
+ with open(path, "w", encoding="utf-8") as file:
+ # Always convert to string to write to file
+ file.write(str(value))
+
+ # Read all configuration values for debugging purposes
+ for debug_key in config_data.keys():
+ with open(
+ f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
+ "r",
+ encoding="utf-8",
+ ) as file:
+ content = file.read()
+ logging.debug(
+ "%s/%s/%s : %s",
+ NETCONSOLE_CONFIGFS_PATH,
+ target_name,
+ debug_key,
+ content.strip(),
+ )
+
+ except Exception as exception:
+ raise KsftFailEx(
+ f"Failed to configure netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_configure_target(
+ cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
+ """Configure netconsole on the interface with the given target name"""
+ config_data = {
+ "extended": "1",
+ "dev_name": interface_name,
+ "local_port": NETCONS_LOCAL_PORT,
+ "remote_port": NETCONS_REMOTE_PORT,
+ "local_ip": cfg.addr,
+ "remote_ip": cfg.remote_addr,
+ "remote_mac": "00:00:00:00:00:00", # Not important for this test
+ "enabled": "1",
+ }
+
+ netcons_create_target(config_data, target_name)
+ logging.debug(
+ "Created netconsole target: %s on interface %s", target_name, interface_name
+ )
+
+
+def netcons_delete_target(name: str) -> None:
+ """Delete a netconsole dynamic target"""
+ target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
+ try:
+ if os.path.exists(target_path):
+ os.rmdir(target_path)
+ except OSError as exception:
+ raise KsftFailEx(
+ f"Failed to delete netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_load_module() -> None:
+ """Try to load the netconsole module"""
+ os.system("modprobe netconsole")
+
+
+def bpftrace_call() -> None:
+ """Call bpftrace to find how many times netpoll_poll_dev() is called.
+ Output is saved in the global variable `maps`"""
+
+ # This is going to update the global variable, that will be seen by the
+ # main function
+ global MAPS # pylint: disable=W0603
+
+ # This will be passed to bpftrace as in bpftrace -e "expr"
+ expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
+
+ MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
+ logging.debug("BPFtrace output: %s", MAPS)
+
+
+def bpftrace_start():
+ """Start a thread to call `call_bpf` in a parallel thread"""
+ global BPF_THREAD # pylint: disable=W0603
+
+ BPF_THREAD = threading.Thread(target=bpftrace_call)
+ BPF_THREAD.start()
+ if not BPF_THREAD.is_alive():
+ raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
+
+
+def bpftrace_stop() -> None:
+ """Stop the bpftrace thread"""
+ if BPF_THREAD:
+ BPF_THREAD.join()
+
+
+def bpftrace_any_hit(join: bool) -> bool:
+ """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
+ if not BPF_THREAD:
+ raise KsftFailEx("BPFtrace didn't start")
+
+ if BPF_THREAD.is_alive():
+ if join:
+ # Wait for bpftrace to finish
+ BPF_THREAD.join()
+ else:
+ # bpftrace is still running, so, we will not check the result yet
+ return False
+
+ logging.debug("MAPS coming from bpftrace = %s", MAPS)
+ if "hits" not in MAPS.keys():
+ raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
+
+ logging.debug("Got a total of %d hits", MAPS["hits"])
+ return MAPS["hits"] > 0
+
+
+def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ # Start bpftrace in parallel, so, it is watching
+ # netpoll_poll_dev() while we are sending netconsole messages
+ bpftrace_start()
+ defer(bpftrace_stop)
+
+ do_netpoll_flush(cfg, ifname, target_name)
+
+ if bpftrace_any_hit(join=True):
+ ksft_pr("netpoll_poll_dev() was called. Success")
+ return
+
+ raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
+
+
+def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ netcons_configure_target(cfg, ifname, target_name)
+ retry = 0
+
+ for i in range(int(ITERATIONS)):
+ if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
+ # bpftrace is done, stop sending messages
+ break
+
+ msg = f"netcons test #{i}"
+ with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
+ for j in range(MAX_WRITES):
+ try:
+ kmsg.write(f"{msg}-{j}\n")
+ except OSError as exception:
+ # in some cases, kmsg can be busy, so, we will retry
+ time.sleep(1)
+ retry += 1
+ if retry < 5:
+ logging.info("Failed to write to kmsg. Retrying")
+ # Just retry a few times
+ continue
+ raise KsftFailEx(
+ f"Failed to write to kmsg: {exception}"
+ ) from exception
+
+ netcons_delete_target(target_name)
+ netcons_configure_target(cfg, ifname, target_name)
+ # If we sleep here, we will have a better chance of triggering
+ # This number is based on a few tests I ran while developing this test
+ time.sleep(0.4)
+
+
+def configure_network(ifname: str) -> None:
+ """Configure ring size and queue numbers"""
+
+ # Set defined queues to 1 to force congestion
+ prev_queues = ethtool_get_queues_cnt(ifname)
+ logging.debug("RX/TX/combined queues: %s", prev_queues)
+ # Only set the queues to 1 if they exists in the device. I.e, they are > 0
+ ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
+ defer(ethtool_set_queues_cnt, ifname, prev_queues)
+
+ # Try to set the ring size to some low value.
+ # Do not fail if the hardware do not accepted desired values
+ prev_ring_size = ethtool_get_ringsize(ifname)
+ for size in [(1, 1), (128, 128), (256, 256)]:
+ if ethtool_set_ringsize(ifname, size):
+ # hardware accepted the desired ringsize
+ logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
+ break
+ defer(ethtool_set_ringsize, ifname, prev_ring_size)
+
+
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
+ """
+ Test netpoll by sending traffic to the interface and then sending
+ netconsole messages to trigger a poll
+ """
+
+ ifname = cfg.ifname
+ configure_network(ifname)
+ target_name = netcons_generate_random_target_name()
+ traffic = None
+
+ try:
+ traffic = GenerateTraffic(cfg)
+ do_netpoll_flush_monitored(cfg, ifname, target_name)
+ finally:
+ if traffic:
+ traffic.stop()
+
+ # Revert RX/TX queues
+ netcons_delete_target(target_name)
+
+
+def test_check_dependencies() -> None:
+ """Check if the dependencies are met"""
+ if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
+ raise KsftSkipEx(
+ f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
+ )
+
+
+def main() -> None:
+ """Main function to run the test"""
+ netcons_load_module()
+ test_check_dependencies()
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(
+ [test_netpoll],
+ args=(cfg,),
+ )
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index e0f114612c1a..da3623c5e8a9 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -30,7 +30,7 @@ def _test_v6(cfg) -> None:
cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
def _test_tcp(cfg) -> None:
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index efcc1e10575b..c2bb5d3f1ca1 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Tests related to standard netdevice statistics.
+"""
+
import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
-from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import KsftSkipEx, KsftFailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
@@ -18,13 +22,16 @@ rtnl = RtnlFamily()
def check_pause(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support Pause config also report standard
+ pause stats.
+ """
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("pause not supported by the device")
+ raise KsftSkipEx("pause not supported by the device") from e
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
@@ -33,13 +40,16 @@ def check_pause(cfg) -> None:
def check_fec(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support FEC config also report standard
+ FEC stats.
+ """
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("FEC not supported by the device")
+ raise KsftSkipEx("FEC not supported by the device") from e
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
@@ -48,15 +58,17 @@ def check_fec(cfg) -> None:
def pkt_byte_sum(cfg) -> None:
- global netfam, rtnl
+ """
+ Check that qstat and interface stats match in value.
+ """
def get_qstat(test):
- global netfam
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
+ return None
qstat = get_qstat(cfg)
if qstat is None:
@@ -77,15 +89,14 @@ def pkt_byte_sum(cfg) -> None:
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
- raise Exception("RTNL stats are lower, fetched later")
+ raise KsftFailEx("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
- raise Exception("Qstats are lower, fetched later")
+ raise KsftFailEx("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
- global netfam
- global rtnl
+ """ Qstats Netlink API tests - querying by ifindex. """
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
@@ -93,7 +104,7 @@ def qstat_by_ifindex(cfg) -> None:
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
- for ifindex in ifindexes.keys():
+ for ifindex in ifindexes:
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
@@ -145,7 +156,7 @@ def qstat_by_ifindex(cfg) -> None:
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
- all_ifindexes = set([dev["ifi-index"] for dev in devs])
+ all_ifindexes = set(dev["ifi-index"] for dev in devs)
lowest = 2
while lowest in all_ifindexes:
lowest += 1
@@ -158,18 +169,20 @@ def qstat_by_ifindex(cfg) -> None:
@ksft_disruptive
def check_down(cfg) -> None:
+ """ Test statistics (interface and qstat) are not impacted by ifdown """
+
try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftSkipEx("qstats not supported by the device")
+ raise KsftSkipEx("qstats not supported by the device") from e
raise
ip(f"link set dev {cfg.dev['ifname']} down")
defer(ip, f"link set dev {cfg.dev['ifname']} up")
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- for k, v in qstat.items():
+ for k in qstat:
ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
# exercise per-queue API to make sure that "device down" state
@@ -263,6 +276,8 @@ def procfs_downup_hammer(cfg) -> None:
def main() -> None:
+ """ Ksft boiler plate main """
+
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
check_down, procfs_hammer, procfs_downup_hammer],
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
new file mode 100755
index 000000000000..1dd8bf3bf6c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -0,0 +1,658 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This file contains tests to verify native XDP support in network drivers.
+The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib`
+directory, with each test focusing on a specific aspect of XDP functionality.
+"""
+import random
+import string
+from dataclasses import dataclass
+from enum import Enum
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr
+from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ip, bpftool, defer
+
+
+class TestConfig(Enum):
+ """Enum for XDP configuration options."""
+ MODE = 0 # Configures the BPF program for a specific test
+ PORT = 1 # Port configuration to communicate with the remote host
+ ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking
+ ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension
+
+
+class XDPAction(Enum):
+ """Enum for XDP actions."""
+ PASS = 0 # Pass the packet up to the stack
+ DROP = 1 # Drop the packet
+ TX = 2 # Route the packet to the remote host
+ TAIL_ADJST = 3 # Adjust the tail of the packet
+ HEAD_ADJST = 4 # Adjust the head of the packet
+
+
+class XDPStats(Enum):
+ """Enum for XDP statistics."""
+ RX = 0 # Count of valid packets received for testing
+ PASS = 1 # Count of packets passed up to the stack
+ DROP = 2 # Count of packets dropped
+ TX = 3 # Count of incoming packets routed to the remote host
+ ABORT = 4 # Count of packets that were aborted
+
+
+@dataclass
+class BPFProgInfo:
+ """Data class to store information about a BPF program."""
+ name: str # Name of the BPF program
+ file: str # BPF program object file
+ xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags")
+ mtu: int = 1500 # Maximum Transmission Unit, default is 1500
+
+
+def _exchg_udp(cfg, port, test_string):
+ """
+ Exchanges UDP packets between a local and remote host using the socat tool.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ test_string: String that the remote host will send.
+
+ Returns:
+ The string received by the test host.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp_cmd, exit_wait=True) as nc:
+ wait_port_listen(port, proto="udp")
+ cmd(tx_udp_cmd, host=cfg.remote, shell=True)
+
+ return nc.stdout.strip()
+
+
+def _test_udp(cfg, port, size=256):
+ """
+ Tests UDP packet exchange between a local and remote host.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ size: The length of the test string to be exchanged, default is 256 characters.
+
+ Returns:
+ bool: True if the received string matches the sent string, False otherwise.
+ """
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ return recvd_str == test_str
+
+
+def _load_xdp_prog(cfg, bpf_info):
+ """
+ Loads an XDP program onto a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+
+ Returns:
+ dict: A dictionary containing the XDP program ID, name, and associated map IDs.
+ """
+ abs_path = cfg.net_lib_dir / bpf_info.file
+ prog_info = {}
+
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+
+ cmd(
+ f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}",
+ shell=True
+ )
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+
+ xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
+ prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
+ prog_info["name"] = xdp_info["xdp"]["prog"]["name"]
+ prog_id = prog_info["id"]
+
+ map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+ prog_info["maps"] = {}
+ for map_id in map_ids:
+ name = bpftool(f"map show id {map_id}", json=True)["name"]
+ prog_info["maps"][name] = map_id
+
+ return prog_info
+
+
+def format_hex_bytes(value):
+ """
+ Helper function that converts an integer into a formatted hexadecimal byte string.
+
+ Args:
+ value: An integer representing the number to be converted.
+
+ Returns:
+ A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+ """
+ hex_str = value.to_bytes(4, byteorder='little', signed=True)
+ return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def _set_xdp_map(map_name, key, value):
+ """
+ Updates an XDP map with a given key-value pair using bpftool.
+
+ Args:
+ map_name: The name of the XDP map to update.
+ key: The key to update in the map, formatted as a hexadecimal string.
+ value: The value to associate with the key, formatted as a hexadecimal string.
+ """
+ key_formatted = format_hex_bytes(key)
+ value_formatted = format_hex_bytes(value)
+ bpftool(
+ f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+ )
+
+
+def _get_stats(xdp_map_id):
+ """
+ Retrieves and formats statistics from an XDP map.
+
+ Args:
+ xdp_map_id: The ID of the XDP map from which to retrieve statistics.
+
+ Returns:
+ A dictionary containing formatted packet statistics for various XDP actions.
+ The keys are based on the XDPStats Enum values.
+
+ Raises:
+ KsftFailEx: If the stats retrieval fails.
+ """
+ stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True)
+ if not stats_dump:
+ raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
+
+ stats_formatted = {}
+ for key in range(0, 5):
+ val = stats_dump[key]["formatted"]["value"]
+ if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value:
+ stats_formatted[XDPStats.RX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value:
+ stats_formatted[XDPStats.PASS.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value:
+ stats_formatted[XDPStats.DROP.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value:
+ stats_formatted[XDPStats.TX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value:
+ stats_formatted[XDPStats.ABORT.value] = val
+
+ return stats_formatted
+
+
+def _test_pass(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_PASS action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch")
+
+
+def test_xdp_native_pass_sb(cfg):
+ """
+ Tests the XDP_PASS action for single buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_pass(cfg, bpf_info, 256)
+
+
+def test_xdp_native_pass_mb(cfg):
+ """
+ Tests the XDP_PASS action for a multi-buff size.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_pass(cfg, bpf_info, 8000)
+
+
+def _test_drop(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_DROP action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch")
+
+
+def test_xdp_native_drop_sb(cfg):
+ """
+ Tests the XDP_DROP action for a signle-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_drop(cfg, bpf_info, 256)
+
+
+def test_xdp_native_drop_mb(cfg):
+ """
+ Tests the XDP_DROP action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_drop(cfg, bpf_info, 8000)
+
+
+def test_xdp_native_tx_mb(cfg):
+ """
+ Tests the XDP_TX action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000))
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+ wait_port_listen(port, proto="udp", host=cfg.remote)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ stats = _get_stats(prog_info['maps']['map_xdp_stats'])
+
+ ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+ ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch")
+
+
+def _validate_res(res, offset_lst, pkt_sz_lst):
+ """
+ Validates the result of a test.
+
+ Args:
+ res: The result of the test, which should be a dictionary with a "status" key.
+
+ Raises:
+ KsftFailEx: If the test fails to pass any combination of offset and packet size.
+ """
+ if "status" not in res:
+ raise KsftFailEx("Missing 'status' key in result dictionary")
+
+ # Validate that not a single case was successful
+ if res["status"] == "fail":
+ if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]:
+ raise KsftFailEx(f"{res['reason']}")
+
+ # Get the previous offset and packet size to report the successful run
+ tmp_idx = offset_lst.index(res["offset"])
+ prev_offset = offset_lst[tmp_idx - 1]
+ if tmp_idx == 0:
+ tmp_idx = pkt_sz_lst.index(res["pkt_sz"])
+ prev_pkt_sz = pkt_sz_lst[tmp_idx - 1]
+ else:
+ prev_pkt_sz = res["pkt_sz"]
+
+ # Use these values for error reporting
+ ksft_pr(
+ f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. "
+ f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. "
+ f"Reason: {res['reason']}"
+ )
+
+
+def _check_for_failures(recvd_str, stats):
+ """
+ Checks for common failures while adjusting headroom or tailroom.
+
+ Args:
+ recvd_str: The string received from the remote host after sending a test string.
+ stats: A dictionary containing formatted packet statistics for various XDP actions.
+
+ Returns:
+ str: A string describing the failure reason if a failure is detected, otherwise None.
+ """
+
+ # Any adjustment failure result in an abort hence, we track this counter
+ if stats[XDPStats.ABORT.value] != 0:
+ return "Adjustment failed"
+
+ # Since we are using aggregate stats for a single test across all offsets and packet sizes
+ # we can't use RX stats only to track data exchange failure without taking a previous
+ # snapshot. An easier way is to simply check for non-zero length of received string.
+ if len(recvd_str) == 0:
+ return "Data exchange failed"
+
+ # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run
+ if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]:
+ return "RX stats mismatch"
+
+ return None
+
+
+def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP tail adjustment functionality.
+
+ This function loads the appropriate XDP program based on the provided
+ program name and configures the XDP map for tail adjustment. It then
+ validates the tail adjustment by sending and receiving UDP packets
+ with specified packet sizes and offsets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ prog: Name of the XDP program to load.
+ pkt_sz_lst: List of packet sizes to test.
+ offset_lst: List of offsets to validate support for tail adjustment.
+
+ Returns:
+ dict: A dictionary with test status and failure details if applicable.
+ """
+ port = rand_port()
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+
+ # Configure the XDP map for tail adjustment
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ for offset in offset_lst:
+ tag = format(random.randint(65, 90), "02x")
+
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+ if offset > 0:
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+
+ for pkt_sz in pkt_sz_lst:
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ failure = _check_for_failures(recvd_str, stats)
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset > 0:
+ expected_data = test_str + (offset * chr(int(tag, 16)))
+ else:
+ expected_data = test_str[0:pkt_sz + offset]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_tail_grow_data(cfg):
+ """
+ Tests the XDP tail adjustment by growing packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [1, 16, 32, 64, 128, 256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_tail_shrnk_data(cfg):
+ """
+ Tests the XDP tail adjustment by shrinking packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def get_hds_thresh(cfg):
+ """
+ Retrieves the header data split (HDS) threshold for a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ Returns:
+ The HDS threshold value. If the threshold is not supported or an error occurs,
+ a default value of 1500 is returned.
+ """
+ netnl = cfg.netnl
+ hds_thresh = 1500
+
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' not in rings:
+ ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
+ return hds_thresh
+ hds_thresh = rings['hds-thresh']
+ except NlError as e:
+ ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}")
+
+ return hds_thresh
+
+
+def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP head adjustment action for a multi-buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ netnl: Network namespace or link object (not used in this function).
+
+ This function sets up the packet size and offset lists, then performs
+ the head adjustment test by sending and receiving UDP packets.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000))
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ hds_thresh = get_hds_thresh(cfg)
+ for offset in offset_lst:
+ for pkt_sz in pkt_sz_lst:
+ # The "head" buffer must contain at least the Ethernet header
+ # after we eat into it. We send large-enough packets, but if HDS
+ # is enabled head will only contain headers. Don't try to eat
+ # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14)
+ l2_cut_off = 28 if cfg.addr_ipver == 4 else 48
+ if pkt_sz > hds_thresh and offset > l2_cut_off:
+ ksft_pr(
+ f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and "
+ f"offset {offset} > {l2_cut_off}"
+ )
+ return {"status": "pass"}
+
+ test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ tag = format(random.randint(65, 90), '02x')
+
+ _set_xdp_map("map_xdp_setup",
+ TestConfig.ADJST_OFFSET.value,
+ offset)
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ # Check for failures around adjustment and data exchange
+ failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats']))
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset < 0:
+ expected_data = chr(int(tag, 16)) * (0 - offset) + test_str
+ else:
+ expected_data = test_str[offset:]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_head_grow_data(cfg):
+ """
+ Tests the XDP headroom growth support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully extended for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Negative values result in headroom shrinking, resulting in growing of payload
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_head_shrnk_data(cfg):
+ """
+ Tests the XDP headroom shrinking support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully shrunk for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Positive values result in headroom growing, resulting in shrinking of payload
+ offset_lst = [16, 32, 64, 128, 256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def main():
+ """
+ Main function to execute the XDP tests.
+
+ This function runs a series of tests to validate the XDP support for
+ both the single and multi-buffer. It uses the NetDrvEpEnv context
+ manager to manage the network driver environment and the ksft_run
+ function to execute the tests.
+ """
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.netnl = EthtoolFamily()
+ ksft_run(
+ [
+ test_xdp_native_pass_sb,
+ test_xdp_native_pass_mb,
+ test_xdp_native_drop_sb,
+ test_xdp_native_drop_mb,
+ test_xdp_native_tx_mb,
+ test_xdp_native_adjst_tail_grow_data,
+ test_xdp_native_adjst_tail_shrnk_data,
+ test_xdp_native_adjst_head_grow_data,
+ test_xdp_native_adjst_head_shrnk_data,
+ ],
+ args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 7afa58e2bb20..fcbdb1297e24 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -3,3 +3,4 @@ dnotify_test
devpts_pts
file_stressor
anon_inode_test
+kernfs_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index b02326193fee..73d4650af1a5 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c
new file mode 100644
index 000000000000..16538b3b318e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/kernfs_test.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "../kselftest_harness.h"
+#include "wrappers.h"
+
+TEST(kernfs_listxattr)
+{
+ int fd;
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(flistxattr(fd, NULL, 0), 0);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST(kernfs_getxattr)
+{
+ int fd;
+ char buf[1];
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0);
+ ASSERT_EQ(errno, ENODATA);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index 73f6c6fcecab..2506f464811b 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -16,6 +16,18 @@ ocnt=`cat enabled_functions | wc -l`
echo "f:myevent1 $PLACE" >> dynamic_events
+echo "f:myevent2 $PLACE%return" >> dynamic_events
+
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
+test -d events/fprobes/myevent1
+test -d events/fprobes/myevent2
+
+echo 1 > events/fprobes/myevent1/enable
# Make sure the event is attached and is the only one
grep -q $PLACE enabled_functions
cnt=`cat enabled_functions | wc -l`
@@ -23,29 +35,22 @@ if [ $cnt -ne $((ocnt + 1)) ]; then
exit_fail
fi
-echo "f:myevent2 $PLACE%return" >> dynamic_events
-
+echo 1 > events/fprobes/myevent2/enable
# It should till be the only attached function
cnt=`cat enabled_functions | wc -l`
if [ $cnt -ne $((ocnt + 1)) ]; then
exit_fail
fi
-# add another event
-echo "f:myevent3 $PLACE2" >> dynamic_events
-
+echo 1 > events/fprobes/myevent3/enable
+# If the function is different, the attached function should be increased
grep -q $PLACE2 enabled_functions
cnt=`cat enabled_functions | wc -l`
if [ $cnt -ne $((ocnt + 2)) ]; then
exit_fail
fi
-grep -q myevent1 dynamic_events
-grep -q myevent2 dynamic_events
-grep -q myevent3 dynamic_events
-test -d events/fprobes/myevent1
-test -d events/fprobes/myevent2
-
+echo 0 > events/fprobes/myevent2/enable
echo "-:myevent2" >> dynamic_events
grep -q myevent1 dynamic_events
@@ -57,6 +62,7 @@ if [ $cnt -ne $((ocnt + 2)) ]; then
exit_fail
fi
+echo 0 > events/fprobes/enable
echo > dynamic_events
# Should have none left
@@ -67,12 +73,14 @@ fi
echo "f:myevent4 $PLACE" >> dynamic_events
+echo 1 > events/fprobes/myevent4/enable
# Should only have one enabled
cnt=`cat enabled_functions | wc -l`
if [ $cnt -ne $((ocnt + 1)) ]; then
exit_fail
fi
+echo 0 > events/fprobes/enable
echo > dynamic_events
# Should have none left
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index b7c8f29c09a9..65916bb55dfb 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -14,11 +14,35 @@ fail() { #msg
exit_fail
}
+# As reading trace can last forever, simply look for 3 different
+# events then exit out of reading the file. If there's not 3 different
+# events, then the test has failed.
+check_unique() {
+ cat trace | grep -v '^#' | awk '
+ BEGIN { cnt = 0; }
+ {
+ for (i = 0; i < cnt; i++) {
+ if (event[i] == $5) {
+ break;
+ }
+ }
+ if (i == cnt) {
+ event[cnt++] = $5;
+ if (cnt > 2) {
+ exit;
+ }
+ }
+ }
+ END {
+ printf "%d", cnt;
+ }'
+}
+
echo 'sched:*' > set_event
yield
-count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -29,7 +53,7 @@ echo 1 > events/sched/enable
yield
-count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 4b994b6df5ac..ed81eaf2afd6 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$'
ftrace_filter_check '*pin*lock' '.*pin.*lock$'
# filter by start*mid*
-ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*'
# Advanced full-glob matching feature is recently supported.
# Skip the tests if we are sure the kernel does not support it.
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index 24a92dc94eb8..aea001ac4946 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -26,14 +26,12 @@ static int counter;
#ifndef PR_FUTEX_HASH
#define PR_FUTEX_HASH 78
# define PR_FUTEX_HASH_SET_SLOTS 1
-# define FH_FLAG_IMMUTABLE (1ULL << 0)
# define PR_FUTEX_HASH_GET_SLOTS 2
-# define PR_FUTEX_HASH_GET_IMMUTABLE 3
#endif
-static int futex_hash_slots_set(unsigned int slots, int flags)
+static int futex_hash_slots_set(unsigned int slots)
{
- return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags);
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0);
}
static int futex_hash_slots_get(void)
@@ -41,16 +39,11 @@ static int futex_hash_slots_get(void)
return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
}
-static int futex_hash_immutable_get(void)
-{
- return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
-}
-
static void futex_hash_slots_set_verify(int slots)
{
int ret;
- ret = futex_hash_slots_set(slots, 0);
+ ret = futex_hash_slots_set(slots);
if (ret != 0) {
ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
ksft_finished();
@@ -64,13 +57,13 @@ static void futex_hash_slots_set_verify(int slots)
ksft_test_result_pass("SET and GET slots %d passed\n", slots);
}
-static void futex_hash_slots_set_must_fail(int slots, int flags)
+static void futex_hash_slots_set_must_fail(int slots)
{
int ret;
- ret = futex_hash_slots_set(slots, flags);
- ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n",
- slots, flags);
+ ret = futex_hash_slots_set(slots);
+ ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n",
+ slots);
}
static void *thread_return_fn(void *arg)
@@ -111,6 +104,30 @@ static void join_max_threads(void)
}
}
+#define SEC_IN_NSEC 1000000000
+#define MSEC_IN_NSEC 1000000
+
+static void futex_dummy_op(void)
+{
+ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ struct timespec timeout;
+ int ret;
+
+ pthread_mutex_lock(&lock);
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_nsec += 100 * MSEC_IN_NSEC;
+ if (timeout.tv_nsec >= SEC_IN_NSEC) {
+ timeout.tv_nsec -= SEC_IN_NSEC;
+ timeout.tv_sec++;
+ }
+ ret = pthread_mutex_timedlock(&lock, &timeout);
+ if (ret == 0)
+ ksft_exit_fail_msg("Successfully locked an already locked mutex.\n");
+
+ if (ret != ETIMEDOUT)
+ ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);
+}
+
static void usage(char *prog)
{
printf("Usage: %s\n", prog);
@@ -128,18 +145,14 @@ int main(int argc, char *argv[])
{
int futex_slots1, futex_slotsn, online_cpus;
pthread_mutexattr_t mutex_attr_pi;
- int use_global_hash = 0;
- int ret;
+ int ret, retry = 20;
int c;
- while ((c = getopt(argc, argv, "cghv:")) != -1) {
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
switch (c) {
case 'c':
log_color(1);
break;
- case 'g':
- use_global_hash = 1;
- break;
case 'h':
usage(basename(argv[0]));
exit(0);
@@ -154,7 +167,7 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(22);
+ ksft_set_plan(21);
ret = pthread_mutexattr_init(&mutex_attr_pi);
ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
@@ -167,10 +180,6 @@ int main(int argc, char *argv[])
if (ret != 0)
ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
- ret = futex_hash_immutable_get();
- if (ret != 0)
- ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret);
-
ksft_test_result_pass("Basic get slots and immutable status.\n");
ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
if (ret != 0)
@@ -208,8 +217,24 @@ int main(int argc, char *argv[])
*/
ksft_print_msg("Online CPUs: %d\n", online_cpus);
if (online_cpus > 16) {
+retry_getslots:
futex_slotsn = futex_hash_slots_get();
if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
+ retry--;
+ /*
+ * Auto scaling on thread creation can be slightly delayed
+ * because it waits for a RCU grace period twice. The new
+ * private hash is assigned upon the first futex operation
+ * after grace period.
+ * To cover all this for testing purposes the function
+ * below will acquire a lock and acquire it again with a
+ * 100ms timeout which must timeout. This ensures we
+ * sleep for 100ms and issue a futex operation.
+ */
+ if (retry > 0) {
+ futex_dummy_op();
+ goto retry_getslots;
+ }
ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
futex_slots1, futex_slotsn);
ksft_exit_fail_msg(test_msg_auto_inc);
@@ -227,7 +252,7 @@ int main(int argc, char *argv[])
futex_hash_slots_set_verify(32);
futex_hash_slots_set_verify(16);
- ret = futex_hash_slots_set(15, 0);
+ ret = futex_hash_slots_set(15);
ksft_test_result(ret < 0, "Use 15 slots\n");
futex_hash_slots_set_verify(2);
@@ -245,28 +270,23 @@ int main(int argc, char *argv[])
ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
ret);
- futex_hash_slots_set_must_fail(1 << 29, 0);
+ futex_hash_slots_set_must_fail(1 << 29);
+ futex_hash_slots_set_verify(4);
/*
- * Once the private hash has been made immutable or global hash has been requested,
- * then this requested can not be undone.
+ * Once the global hash has been requested, then this requested can not
+ * be undone.
*/
- if (use_global_hash) {
- ret = futex_hash_slots_set(0, 0);
- ksft_test_result(ret == 0, "Global hash request\n");
- } else {
- ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE);
- ksft_test_result(ret == 0, "Immutable resize to 4\n");
- }
+ ret = futex_hash_slots_set(0);
+ ksft_test_result(ret == 0, "Global hash request\n");
if (ret != 0)
goto out;
- futex_hash_slots_set_must_fail(4, 0);
- futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE);
- futex_hash_slots_set_must_fail(8, 0);
- futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE);
- futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE);
- futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(4);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(0);
+ futex_hash_slots_set_must_fail(6);
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
if (ret != 0) {
@@ -277,14 +297,7 @@ int main(int argc, char *argv[])
join_max_threads();
ret = futex_hash_slots_get();
- if (use_global_hash) {
- ksft_test_result(ret == 0, "Continue to use global hash\n");
- } else {
- ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n");
- }
-
- ret = futex_hash_immutable_get();
- ksft_test_result(ret == 1, "Hash reports to be immutable\n");
+ ksft_test_result(ret == 0, "Continue to use global hash\n");
out:
ksft_finished();
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index ea79662405bc..1f625b39948a 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -4,6 +4,7 @@
*
* Copyright 2021 Collabora Ltd.
*/
+#include <linux/time_types.h>
#include <stdint.h>
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
@@ -65,7 +66,12 @@ struct futex32_numa {
static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
unsigned long flags, struct timespec *timo, clockid_t clockid)
{
- return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+ struct __kernel_timespec ts = {
+ .tv_sec = timo->tv_sec,
+ .tv_nsec = timo->tv_nsec,
+ };
+
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid);
}
/*
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index ddbcfc9b7bac..7a5fd1d5355e 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t;
FUTEX_PRIVATE_FLAG)
#endif
+/*
+ * SYS_futex is expected from system C library, in glibc some 32-bit
+ * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have
+ * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as
+ * SYS_futex_time64 in this situation to ensure the compilation and the
+ * compatibility.
+ */
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+#define SYS_futex SYS_futex_time64
+#endif
+
/**
* futex() - SYS_futex syscall wrapper
* @uaddr: address of first futex
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index b1f40857307d..38c51158adf8 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NETFILTER_NETLINK_QUEUE=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_MATCH_COMMENT=y
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 1926ef6b40ab..3eebf5e3b974 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -766,19 +766,34 @@ TEST_F(iommufd_ioas, get_hw_info)
uint8_t max_pasid = 0;
/* Provide a zero-size user_buffer */
- test_cmd_get_hw_info(self->device_id, NULL, 0);
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, 0);
/* Provide a user_buffer with exact size */
- test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ sizeof(buffer_exact));
+
+ /* Request for a wrong data_type, and a correct one */
+ test_err_get_hw_info(EOPNOTSUPP, self->device_id,
+ IOMMU_HW_INFO_TYPE_SELFTEST + 1,
+ &buffer_exact, sizeof(buffer_exact));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_SELFTEST, &buffer_exact,
+ sizeof(buffer_exact));
/*
* Provide a user_buffer with size larger than the exact size to check if
* kernel zero the trailing bytes.
*/
- test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+ sizeof(buffer_larger));
/*
* Provide a user_buffer with size smaller than the exact size to check if
* the fields within the size range still gets updated.
*/
- test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT,
+ &buffer_smaller, sizeof(buffer_smaller));
test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
ASSERT_EQ(0, max_pasid);
if (variant->pasid_capable) {
@@ -788,9 +803,11 @@ TEST_F(iommufd_ioas, get_hw_info)
}
} else {
test_err_get_hw_info(ENOENT, self->device_id,
- &buffer_exact, sizeof(buffer_exact));
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ sizeof(buffer_exact));
test_err_get_hw_info(ENOENT, self->device_id,
- &buffer_larger, sizeof(buffer_larger));
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+ sizeof(buffer_larger));
}
}
@@ -953,6 +970,33 @@ TEST_F(iommufd_ioas, area_auto_iova)
test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
}
+/* https://lore.kernel.org/r/685af644.a00a0220.2e5631.0094.GAE@google.com */
+TEST_F(iommufd_ioas, reserved_overflow)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved.start = 6,
+ };
+ unsigned int map_len;
+ __u64 iova;
+
+ if (PAGE_SIZE == 4096) {
+ test_cmd.add_reserved.length = 0xffffffffffff8001;
+ map_len = 0x5000;
+ } else {
+ test_cmd.add_reserved.length =
+ 0xffffffffffffffff - MOCK_PAGE_SIZE * 16;
+ map_len = MOCK_PAGE_SIZE * 10;
+ }
+
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ test_err_ioctl_ioas_map(ENOSPC, buffer, map_len, &iova);
+}
+
TEST_F(iommufd_ioas, area_allowed)
{
struct iommu_test_cmd test_cmd = {
@@ -2193,8 +2237,7 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
- test_cmd_get_hw_capabilities(self->idev_id, caps,
- IOMMU_HW_CAP_DIRTY_TRACKING);
+ test_cmd_get_hw_capabilities(self->idev_id, caps);
ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
caps & IOMMU_HW_CAP_DIRTY_TRACKING);
@@ -2706,7 +2749,7 @@ FIXTURE_SETUP(iommufd_viommu)
/* Allocate a vIOMMU taking refcount of the parent hwpt */
test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
&self->viommu_id);
/* Allocate a regular nested hwpt */
@@ -2745,24 +2788,27 @@ TEST_F(iommufd_viommu, viommu_negative_tests)
if (self->device_id) {
/* Negative test -- invalid hwpt (hwpt_id=0) */
test_err_viommu_alloc(ENOENT, device_id, 0,
- IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
/* Negative test -- not a nesting parent hwpt */
test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id);
test_err_viommu_alloc(EINVAL, device_id, hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
test_ioctl_destroy(hwpt_id);
/* Negative test -- unsupported viommu type */
test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id,
- 0xdead, NULL);
+ 0xdead, NULL, 0, NULL);
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, self->hwpt_id));
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, self->viommu_id));
} else {
test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
}
}
@@ -2778,35 +2824,66 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf)
uint32_t fault_fd;
uint32_t vdev_id;
- if (self->device_id) {
- test_ioctl_fault_alloc(&fault_id, &fault_fd);
- test_err_hwpt_alloc_iopf(
- ENOENT, dev_id, viommu_id, UINT32_MAX,
- IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
- IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
- test_err_hwpt_alloc_iopf(
- EOPNOTSUPP, dev_id, viommu_id, fault_id,
- IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id,
- IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
- test_cmd_hwpt_alloc_iopf(
- dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID,
- &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
- sizeof(data));
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
- /* Must allocate vdevice before attaching to a nested hwpt */
- test_err_mock_domain_replace(ENOENT, self->stdev_id,
- iopf_hwpt_id);
- test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
- test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
- EXPECT_ERRNO(EBUSY,
- _test_ioctl_destroy(self->fd, iopf_hwpt_id));
- test_cmd_trigger_iopf(dev_id, fault_fd);
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_err_hwpt_alloc_iopf(ENOENT, dev_id, viommu_id, UINT32_MAX,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+ test_err_hwpt_alloc_iopf(EOPNOTSUPP, dev_id, viommu_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID | (1 << 31),
+ &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_alloc_iopf(dev_id, viommu_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
- test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
- test_ioctl_destroy(iopf_hwpt_id);
- close(fault_fd);
- test_ioctl_destroy(fault_id);
- }
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id, iopf_hwpt_id);
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id));
+ test_cmd_trigger_iopf(dev_id, fault_fd);
+
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(iopf_hwpt_id);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+}
+
+TEST_F(iommufd_viommu, viommu_alloc_with_data)
+{
+ struct iommu_viommu_selftest data = {
+ .in_data = 0xbeef,
+ };
+ uint32_t *test;
+
+ if (!self->device_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data),
+ &self->viommu_id);
+ ASSERT_EQ(data.out_data, data.in_data);
+
+ /* Negative mmap tests -- offset and length cannot be changed */
+ test_err_mmap(ENXIO, data.out_mmap_length,
+ data.out_mmap_offset + PAGE_SIZE);
+ test_err_mmap(ENXIO, data.out_mmap_length,
+ data.out_mmap_offset + PAGE_SIZE * 2);
+ test_err_mmap(ENXIO, data.out_mmap_length / 2, data.out_mmap_offset);
+ test_err_mmap(ENXIO, data.out_mmap_length * 2, data.out_mmap_offset);
+
+ /* Now do a correct mmap for a loopback test */
+ test = mmap(NULL, data.out_mmap_length, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->fd, data.out_mmap_offset);
+ ASSERT_NE(MAP_FAILED, test);
+ ASSERT_EQ(data.in_data, *test);
+
+ /* The owner of the mmap region should be blocked */
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id));
+ munmap(test, data.out_mmap_length);
}
TEST_F(iommufd_viommu, vdevice_alloc)
@@ -2867,169 +2944,234 @@ TEST_F(iommufd_viommu, vdevice_cache)
uint32_t vdev_id = 0;
uint32_t num_inv;
- if (dev_id) {
- test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
-
- test_cmd_dev_check_cache_all(dev_id,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
-
- /* Check data_type by passing zero-length array */
- num_inv = 0;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
-
- /* Negative test: Invalid data_type */
- num_inv = 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+
+ test_cmd_dev_check_cache_all(dev_id, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Check data_type by passing zero-length array */
+ num_inv = 0;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: Invalid data_type */
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: structure size sanity */
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs) + 1, &num_inv);
+ assert(!num_inv);
+
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 1,
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid flag is passed */
+ num_inv = 1;
+ inv_reqs[0].flags = 0xffffffff;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid data_uptr when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid entry_len when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 0,
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid cache_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid vdev_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x9;
+ inv_reqs[0].cache_id = 0;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
- /* Negative test: structure size sanity */
- num_inv = 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs) + 1, &num_inv);
- assert(!num_inv);
-
- num_inv = 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- 1, &num_inv);
- assert(!num_inv);
-
- /* Negative test: invalid flag is passed */
- num_inv = 1;
- inv_reqs[0].flags = 0xffffffff;
- inv_reqs[0].vdev_id = 0x99;
- test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
-
- /* Negative test: invalid data_uptr when array is not empty */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
-
- /* Negative test: invalid entry_len when array is not empty */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- 0, &num_inv);
- assert(!num_inv);
-
- /* Negative test: invalid cache_id */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+ /*
+ * Invalidate the 1st cache entry but fail the 2nd request
+ * due to invalid flags configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 0;
+ inv_reqs[1].flags = 0xffffffff;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = 1;
+ test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
- /* Negative test: invalid vdev_id */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x9;
- inv_reqs[0].cache_id = 0;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+ /*
+ * Invalidate the 1st cache entry but fail the 2nd request
+ * due to invalid cache_id configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 0;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Invalidate the 2nd cache entry and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 1;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, 0);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Invalidate the 3rd and 4th cache entries and verify */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 2;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = 3;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 2);
+ test_cmd_dev_check_cache_all(dev_id, 0);
+
+ /* Invalidate all cache entries for nested_dev_id[1] and verify */
+ num_inv = 1;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache_all(dev_id, 0);
+ test_ioctl_destroy(vdev_id);
+}
+
+TEST_F(iommufd_viommu, hw_queue)
+{
+ __u64 iova = MOCK_APERTURE_START, iova2;
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t hw_queue_id[2];
+
+ if (!viommu_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ /* Fail IOMMU_HW_QUEUE_TYPE_DEFAULT */
+ test_err_hw_queue_alloc(EOPNOTSUPP, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_DEFAULT, 0, iova, PAGE_SIZE,
+ &hw_queue_id[0]);
+ /* Fail queue addr and length */
+ test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, 0, &hw_queue_id[0]);
+ test_err_hw_queue_alloc(EOVERFLOW, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, ~(uint64_t)0,
+ PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail missing iova */
+ test_err_hw_queue_alloc(ENOENT, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, PAGE_SIZE, &hw_queue_id[0]);
+
+ /* Map iova */
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+ test_ioctl_ioas_map(buffer + PAGE_SIZE, PAGE_SIZE, &iova2);
+
+ /* Fail index=1 and =MAX; must start from index=0 */
+ test_err_hw_queue_alloc(EIO, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+ iova, PAGE_SIZE, &hw_queue_id[0]);
+ test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ IOMMU_TEST_HW_QUEUE_MAX, iova, PAGE_SIZE,
+ &hw_queue_id[0]);
+
+ /* Allocate index=0, declare ownership of the iova */
+ test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0,
+ iova, PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail duplicated index */
+ test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail unmap, due to iova ownership */
+ test_err_ioctl_ioas_unmap(EBUSY, iova, PAGE_SIZE);
+ /* The 2nd page is not pinned, so it can be unmmap */
+ test_ioctl_ioas_unmap(iova2, PAGE_SIZE);
+
+ /* Allocate index=1, with an unaligned case */
+ test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+ iova + PAGE_SIZE / 2, PAGE_SIZE / 2,
+ &hw_queue_id[1]);
+ /* Fail to destroy, due to dependency */
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0]));
+
+ /* Destroy in descending order */
+ test_ioctl_destroy(hw_queue_id[1]);
+ test_ioctl_destroy(hw_queue_id[0]);
+ /* Now it can unmap the first page */
+ test_ioctl_ioas_unmap(iova, PAGE_SIZE);
+}
- /*
- * Invalidate the 1st cache entry but fail the 2nd request
- * due to invalid flags configuration in the 2nd request.
- */
- num_inv = 2;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 0;
- inv_reqs[1].flags = 0xffffffff;
- inv_reqs[1].vdev_id = 0x99;
- inv_reqs[1].cache_id = 1;
- test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache(dev_id, 0, 0);
- test_cmd_dev_check_cache(dev_id, 1,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 2,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 3,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
+TEST_F(iommufd_viommu, vdevice_tombstone)
+{
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t dev_id = self->device_id;
+ uint32_t vdev_id = 0;
- /*
- * Invalidate the 1st cache entry but fail the 2nd request
- * due to invalid cache_id configuration in the 2nd request.
- */
- num_inv = 2;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 0;
- inv_reqs[1].flags = 0;
- inv_reqs[1].vdev_id = 0x99;
- inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache(dev_id, 0, 0);
- test_cmd_dev_check_cache(dev_id, 1,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 2,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 3,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
-
- /* Invalidate the 2nd cache entry and verify */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 1;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache(dev_id, 0, 0);
- test_cmd_dev_check_cache(dev_id, 1, 0);
- test_cmd_dev_check_cache(dev_id, 2,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 3,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
-
- /* Invalidate the 3rd and 4th cache entries and verify */
- num_inv = 2;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 2;
- inv_reqs[1].flags = 0;
- inv_reqs[1].vdev_id = 0x99;
- inv_reqs[1].cache_id = 3;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 2);
- test_cmd_dev_check_cache_all(dev_id, 0);
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
- /* Invalidate all cache entries for nested_dev_id[1] and verify */
- num_inv = 1;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache_all(dev_id, 0);
- test_ioctl_destroy(vdev_id);
- }
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_ioctl_destroy(self->stdev_id);
+ EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, vdev_id));
}
FIXTURE(iommufd_device_pasid)
@@ -3123,8 +3265,7 @@ TEST_F(iommufd_device_pasid, pasid_attach)
/* Allocate a regular nested hwpt based on viommu */
test_cmd_viommu_alloc(self->device_id, parent_hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST,
- &viommu_id);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &viommu_id);
test_cmd_hwpt_alloc_nested(self->device_id, viommu_id,
IOMMU_HWPT_ALLOC_PASID,
&nested_hwpt_id[2],
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index e11ec4b121fc..651fc9f13c08 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -634,6 +634,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
uint32_t idev_id;
uint32_t hwpt_id;
uint32_t viommu_id;
+ uint32_t hw_queue_id;
uint32_t vdev_id;
__u64 iova;
@@ -666,8 +667,8 @@ TEST_FAIL_NTH(basic_fail_nth, device)
&self->stdev_id, NULL, &idev_id))
return -1;
- if (_test_cmd_get_hw_info(self->fd, idev_id, &info,
- sizeof(info), NULL, NULL))
+ if (_test_cmd_get_hw_info(self->fd, idev_id, IOMMU_HW_INFO_TYPE_DEFAULT,
+ &info, sizeof(info), NULL, NULL))
return -1;
if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
@@ -688,13 +689,19 @@ TEST_FAIL_NTH(basic_fail_nth, device)
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
- if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id))
+ if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, 0,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ &viommu_id))
return -1;
if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id))
return -1;
+ if (_test_cmd_hw_queue_alloc(self->fd, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, iova,
+ PAGE_SIZE, &hw_queue_id))
+ return -1;
+
if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd))
return -1;
close(fault_fd);
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 6e967b58acfd..3c3e08b8c90e 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -56,6 +56,10 @@ static unsigned long PAGE_SIZE;
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+#define test_err_mmap(_errno, length, offset) \
+ EXPECT_ERRNO(_errno, (long)mmap(NULL, length, PROT_READ | PROT_WRITE, \
+ MAP_SHARED, self->fd, offset))
+
static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
{
int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
@@ -762,20 +766,24 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
#endif
/* @data can be NULL */
-static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
- size_t data_len, uint32_t *capabilities,
- uint8_t *max_pasid)
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, __u32 data_type,
+ void *data, size_t data_len,
+ uint32_t *capabilities, uint8_t *max_pasid)
{
struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
struct iommu_hw_info cmd = {
.size = sizeof(cmd),
.dev_id = device_id,
.data_len = data_len,
+ .in_data_type = data_type,
.data_uptr = (uint64_t)data,
.out_capabilities = 0,
};
int ret;
+ if (data_type != IOMMU_HW_INFO_TYPE_DEFAULT)
+ cmd.flags |= IOMMU_HW_INFO_FLAG_INPUT_TYPE;
+
ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd);
if (ret)
return ret;
@@ -818,20 +826,23 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
return 0;
}
-#define test_cmd_get_hw_info(device_id, data, data_len) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
- data_len, NULL, NULL))
+#define test_cmd_get_hw_info(device_id, data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+ data, data_len, NULL, NULL))
-#define test_err_get_hw_info(_errno, device_id, data, data_len) \
- EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
- data_len, NULL, NULL))
+#define test_err_get_hw_info(_errno, device_id, data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+ data, data_len, NULL, NULL))
-#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \
+#define test_cmd_get_hw_capabilities(device_id, caps) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
0, &caps, NULL))
-#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \
+#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
0, NULL, max_pasid))
static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
@@ -902,7 +913,8 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid,
pasid, fault_fd))
static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
- __u32 type, __u32 flags, __u32 *viommu_id)
+ __u32 flags, __u32 type, void *data,
+ __u32 data_len, __u32 *viommu_id)
{
struct iommu_viommu_alloc cmd = {
.size = sizeof(cmd),
@@ -910,6 +922,8 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
.type = type,
.dev_id = device_id,
.hwpt_id = hwpt_id,
+ .data_uptr = (uint64_t)data,
+ .data_len = data_len,
};
int ret;
@@ -921,13 +935,15 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
return 0;
}
-#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \
- ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \
- type, 0, viommu_id))
-#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \
- EXPECT_ERRNO(_errno, \
- _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \
- type, 0, viommu_id))
+#define test_cmd_viommu_alloc(device_id, hwpt_id, type, data, data_len, \
+ viommu_id) \
+ ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+ type, data, data_len, viommu_id))
+#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, data, \
+ data_len, viommu_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+ type, data, data_len, viommu_id))
static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
__u64 virt_id, __u32 *vdev_id)
@@ -956,6 +972,37 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \
virt_id, vdev_id))
+static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type,
+ __u32 idx, __u64 base_addr, __u64 length,
+ __u32 *hw_queue_id)
+{
+ struct iommu_hw_queue_alloc cmd = {
+ .size = sizeof(cmd),
+ .viommu_id = viommu_id,
+ .type = type,
+ .index = idx,
+ .nesting_parent_iova = base_addr,
+ .length = length,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HW_QUEUE_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (hw_queue_id)
+ *hw_queue_id = cmd.out_hw_queue_id;
+ return 0;
+}
+
+#define test_cmd_hw_queue_alloc(viommu_id, type, idx, base_addr, len, out_qid) \
+ ASSERT_EQ(0, _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+ base_addr, len, out_qid))
+#define test_err_hw_queue_alloc(_errno, viommu_id, type, idx, base_addr, len, \
+ out_qid) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+ base_addr, len, out_qid))
+
static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
__u32 *veventq_id, __u32 *veventq_fd)
{
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index e9dbb84c100a..5e36aeeb9901 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -39,26 +39,26 @@ int restore_queue(struct msgque_data *msgque)
fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
if (fd == -1) {
- printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to open /proc/sys/kernel/msg_next_id\n");
return -errno;
}
sprintf(buf, "%d", msgque->msq_id);
ret = write(fd, buf, strlen(buf));
if (ret != strlen(buf)) {
- printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to write to /proc/sys/kernel/msg_next_id\n");
return -errno;
}
id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
if (id == -1) {
- printf("Failed to create queue\n");
+ ksft_test_result_fail("Failed to create queue\n");
return -errno;
}
if (id != msgque->msq_id) {
- printf("Restored queue has wrong id (%d instead of %d)\n",
- id, msgque->msq_id);
+ ksft_test_result_fail("Restored queue has wrong id (%d instead of %d)\n"
+ , id, msgque->msq_id);
ret = -EFAULT;
goto destroy;
}
@@ -66,7 +66,7 @@ int restore_queue(struct msgque_data *msgque)
for (i = 0; i < msgque->qnum; i++) {
if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
msgque->messages[i].msize, IPC_NOWAIT) != 0) {
- printf("msgsnd failed (%m)\n");
+ ksft_test_result_fail("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
}
@@ -90,23 +90,22 @@ int check_and_destroy_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == ENOMSG)
break;
- printf("Failed to read IPC message: %m\n");
+ ksft_test_result_fail("Failed to read IPC message: %m\n");
ret = -errno;
goto err;
}
if (ret != msgque->messages[cnt].msize) {
- printf("Wrong message size: %d (expected %d)\n", ret,
- msgque->messages[cnt].msize);
+ ksft_test_result_fail("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize);
ret = -EINVAL;
goto err;
}
if (message.mtype != msgque->messages[cnt].mtype) {
- printf("Wrong message type\n");
+ ksft_test_result_fail("Wrong message type\n");
ret = -EINVAL;
goto err;
}
if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
- printf("Wrong message content\n");
+ ksft_test_result_fail("Wrong message content\n");
ret = -EINVAL;
goto err;
}
@@ -114,7 +113,7 @@ int check_and_destroy_queue(struct msgque_data *msgque)
}
if (cnt != msgque->qnum) {
- printf("Wrong message number\n");
+ ksft_test_result_fail("Wrong message number\n");
ret = -EINVAL;
goto err;
}
@@ -139,7 +138,7 @@ int dump_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == EINVAL)
continue;
- printf("Failed to get stats for IPC queue with id %d\n",
+ ksft_test_result_fail("Failed to get stats for IPC queue with id %d\n",
kern_id);
return -errno;
}
@@ -150,7 +149,7 @@ int dump_queue(struct msgque_data *msgque)
msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
if (msgque->messages == NULL) {
- printf("Failed to get stats for IPC queue\n");
+ ksft_test_result_fail("Failed to get stats for IPC queue\n");
return -ENOMEM;
}
@@ -162,7 +161,7 @@ int dump_queue(struct msgque_data *msgque)
ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
if (ret < 0) {
- printf("Failed to copy IPC message: %m (%d)\n", errno);
+ ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno);
return -errno;
}
msgque->messages[i].msize = ret;
@@ -178,7 +177,7 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("First message send failed (%m)\n");
+ ksft_test_result_fail("First message send failed (%m)\n");
return -errno;
}
@@ -186,7 +185,7 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("Second message send failed (%m)\n");
+ ksft_test_result_fail("Second message send failed (%m)\n");
return -errno;
}
return 0;
@@ -202,44 +201,44 @@ int main(int argc, char **argv)
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
- printf("Can't make key: %d\n", -errno);
+ ksft_test_result_fail("Can't make key: %d\n", -errno);
ksft_exit_fail();
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
if (msgque.msq_id == -1) {
err = -errno;
- printf("Can't create queue: %d\n", err);
+ ksft_test_result_fail("Can't create queue: %d\n", err);
goto err_out;
}
err = fill_msgque(&msgque);
if (err) {
- printf("Failed to fill queue: %d\n", err);
+ ksft_test_result_fail("Failed to fill queue: %d\n", err);
goto err_destroy;
}
err = dump_queue(&msgque);
if (err) {
- printf("Failed to dump queue: %d\n", err);
+ ksft_test_result_fail("Failed to dump queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to check and destroy queue: %d\n", err);
+ ksft_test_result_fail("Failed to check and destroy queue: %d\n", err);
goto err_out;
}
err = restore_queue(&msgque);
if (err) {
- printf("Failed to restore queue: %d\n", err);
+ ksft_test_result_fail("Failed to restore queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to test queue: %d\n", err);
+ ksft_test_result_fail("Failed to test queue: %d\n", err);
goto err_out;
}
ksft_exit_pass();
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index e3000ccb9a5d..874cfdd3b75b 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -12,7 +12,7 @@ include ../../../scripts/Makefile.arch
ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
TEST_PROGS += test_kexec_jump.sh
-test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump
+TEST_GEN_PROGS := test_kexec_jump
endif
include ../lib.mk
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 38b95998e1e6..f6fe7a07a0a2 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -59,6 +59,7 @@ TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
TEST_GEN_PROGS_COMMON = demand_paging_test
TEST_GEN_PROGS_COMMON += dirty_log_test
TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += irqfd_test
TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
TEST_GEN_PROGS_COMMON += kvm_page_table_test
@@ -134,6 +135,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test
TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
@@ -156,7 +158,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
-TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index da7196fd1b23..c9de66537ec3 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -596,11 +596,8 @@ int main(int argc, char *argv[])
if (ret)
return ret;
} else {
- page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- __TEST_REQUIRE(page_idle_fd >= 0,
- "Couldn't open /sys/kernel/mm/page_idle/bitmap. "
- "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
-
+ page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR,
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
close(page_idle_fd);
puts("Using page_idle for aging");
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index acb2cb596332..cf8fb67104f1 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void)
static int test_migrate_vcpu(unsigned int vcpu_idx)
{
int ret;
- cpu_set_t cpuset;
uint32_t new_pcpu = test_get_pcpu();
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
- ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
- sizeof(cpuset), &cpuset);
+ ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
/* Allow the error where the vCPU thread is already finished */
TEST_ASSERT(ret == 0 || ret == ESRCH,
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index 4e71740a098b..ce74d069cb7b 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -862,25 +862,6 @@ static uint32_t next_pcpu(void)
return next;
}
-static void migrate_self(uint32_t new_pcpu)
-{
- int ret;
- cpu_set_t cpuset;
- pthread_t thread;
-
- thread = pthread_self();
-
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
- pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
- ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
- TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
- new_pcpu, ret);
-}
-
static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
enum arch_timer timer)
{
@@ -907,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
sched_yield();
break;
case USERSPACE_MIGRATE_SELF:
- migrate_self(next_pcpu());
+ pin_self_to_cpu(next_pcpu());
break;
default:
break;
@@ -919,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
struct ucall uc;
/* Start on CPU 0 */
- migrate_self(0);
+ pin_self_to_cpu(0);
while (true) {
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index c7fb55c9135b..e34963956fbc 100644
--- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -140,7 +140,7 @@ static void enable_os_lock(void)
static void enable_monitor_debug_exceptions(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
@@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
static void install_ss(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..062bf84cced1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+ u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.serror_pending = true;
+ if (vcpu_has_ras(vcpu)) {
+ events.exception.serror_has_esr = true;
+ events.exception.serror_esr = EXPECTED_SERROR_ISS;
+ }
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ if (uc.cmd == cmd)
+ return;
+
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ isb();
+
+ GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+ if (guest_has_ras())
+ GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+ GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+ GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+ local_serror_enable();
+ GUEST_SYNC(0);
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+ sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+ isb();
+
+ test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+ unexpected_dabt_handler);
+ struct kvm_run *run = vcpu->run;
+ u64 pfr1;
+
+ pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+ pr_debug("Skipping %s\n", __func__);
+ return;
+ }
+
+ /*
+ * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+ * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+ */
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+ test_serror();
+ test_serror_masked();
+ test_serror_emulated();
+ test_mmio_ease();
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d01798b6b3b4..011fad95dd02 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -15,6 +15,12 @@
#include "test_util.h"
#include "processor.h"
+#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \
+ sys_reg_Op1(SYS_ ## r), \
+ sys_reg_CRn(SYS_ ## r), \
+ sys_reg_CRm(SYS_ ## r), \
+ sys_reg_Op2(SYS_ ## r))
+
struct feature_id_reg {
__u64 reg;
__u64 id_reg;
@@ -22,37 +28,43 @@ struct feature_id_reg {
__u64 feat_min;
};
-static struct feature_id_reg feat_id_regs[] = {
- {
- ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 0,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
- },
- {
- ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
+#define FEAT(id, f, v) \
+ .id_reg = SYS_REG(id), \
+ .feat_shift = id ## _ ## f ## _SHIFT, \
+ .feat_min = id ## _ ## f ## _ ## v
+
+#define REG_FEAT(r, id, f, v) \
+ { \
+ .reg = SYS_REG(r), \
+ FEAT(id, f, v) \
}
+
+static struct feature_id_reg feat_id_regs[] = {
+ REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP),
+ REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
+ REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
};
bool filter_reg(__u64 reg)
@@ -469,6 +481,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
@@ -686,6 +699,62 @@ static __u64 pauth_generic_regs[] = {
ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
};
+static __u64 el2_regs[] = {
+ SYS_REG(VPIDR_EL2),
+ SYS_REG(VMPIDR_EL2),
+ SYS_REG(SCTLR_EL2),
+ SYS_REG(ACTLR_EL2),
+ SYS_REG(HCR_EL2),
+ SYS_REG(MDCR_EL2),
+ SYS_REG(CPTR_EL2),
+ SYS_REG(HSTR_EL2),
+ SYS_REG(HFGRTR_EL2),
+ SYS_REG(HFGWTR_EL2),
+ SYS_REG(HFGITR_EL2),
+ SYS_REG(HACR_EL2),
+ SYS_REG(ZCR_EL2),
+ SYS_REG(HCRX_EL2),
+ SYS_REG(TTBR0_EL2),
+ SYS_REG(TTBR1_EL2),
+ SYS_REG(TCR_EL2),
+ SYS_REG(TCR2_EL2),
+ SYS_REG(VTTBR_EL2),
+ SYS_REG(VTCR_EL2),
+ SYS_REG(VNCR_EL2),
+ SYS_REG(HDFGRTR2_EL2),
+ SYS_REG(HDFGWTR2_EL2),
+ SYS_REG(HFGRTR2_EL2),
+ SYS_REG(HFGWTR2_EL2),
+ SYS_REG(HDFGRTR_EL2),
+ SYS_REG(HDFGWTR_EL2),
+ SYS_REG(HAFGRTR_EL2),
+ SYS_REG(HFGITR2_EL2),
+ SYS_REG(SPSR_EL2),
+ SYS_REG(ELR_EL2),
+ SYS_REG(AFSR0_EL2),
+ SYS_REG(AFSR1_EL2),
+ SYS_REG(ESR_EL2),
+ SYS_REG(FAR_EL2),
+ SYS_REG(HPFAR_EL2),
+ SYS_REG(MAIR_EL2),
+ SYS_REG(PIRE0_EL2),
+ SYS_REG(PIR_EL2),
+ SYS_REG(POR_EL2),
+ SYS_REG(AMAIR_EL2),
+ SYS_REG(VBAR_EL2),
+ SYS_REG(CONTEXTIDR_EL2),
+ SYS_REG(TPIDR_EL2),
+ SYS_REG(CNTVOFF_EL2),
+ SYS_REG(CNTHCTL_EL2),
+ SYS_REG(CNTHP_CTL_EL2),
+ SYS_REG(CNTHP_CVAL_EL2),
+ SYS_REG(CNTHV_CTL_EL2),
+ SYS_REG(CNTHV_CVAL_EL2),
+ SYS_REG(SP_EL2),
+ SYS_REG(VDISR_EL2),
+ SYS_REG(VSESR_EL2),
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -712,6 +781,14 @@ static __u64 pauth_generic_regs[] = {
.regs = pauth_generic_regs, \
.regs_n = ARRAY_SIZE(pauth_generic_regs), \
}
+#define EL2_SUBLIST \
+ { \
+ .name = "EL2", \
+ .capability = KVM_CAP_ARM_EL2, \
+ .feature = KVM_ARM_VCPU_HAS_EL2, \
+ .regs = el2_regs, \
+ .regs_n = ARRAY_SIZE(el2_regs), \
+ }
static struct vcpu_reg_list vregs_config = {
.sublists = {
@@ -761,6 +838,65 @@ static struct vcpu_reg_list pauth_pmu_config = {
},
};
+static struct vcpu_reg_list el2_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
@@ -768,5 +904,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
&sve_pmu_config,
&pauth_config,
&pauth_pmu_config,
+
+ &el2_vregs_config,
+ &el2_vregs_pmu_config,
+ &el2_sve_config,
+ &el2_sve_pmu_config,
+ &el2_pauth_config,
+ &el2_pauth_pmu_config,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
deleted file mode 100644
index 8b7a80a51b1c..000000000000
--- a/tools/testing/selftests/kvm/arm64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR 0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
- u64 esr = read_sysreg(esr_el1);
-
- GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
- GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
- GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
- GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
- handler_fn dabt_handler)
-{
- struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
- virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
- return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events = {};
-
- events.exception.ext_dabt_pending = true;
- vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
- asm volatile("test_mmio_abort_insn:\n\t"
- "ldr x0, [%0]\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
- TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
- TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
- TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
- asm volatile("test_mmio_nisv_insn:\n\t"
- "ldr x0, [%0], #8\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- unexpected_dabt_handler);
-
- TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
- TEST_ASSERT_EQ(errno, ENOSYS);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
- TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_mmio_abort();
- test_mmio_nisv();
- test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 8f422bfdfcb9..d3bf9204409c 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
};
static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
@@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
@@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
};
@@ -774,8 +784,8 @@ int main(void)
ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
- MPAM_IDREG_TEST + MTE_IDREG_TEST;
+ ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+ ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
ksft_set_plan(test_cnt);
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..a8e0f46bc0ab 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -9,17 +9,18 @@
#include <asm/kvm.h>
#include <asm/kvm_para.h>
+#include <arm64/gic_v3.h>
+
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vgic.h"
+#include "gic_v3.h"
#define NR_VCPUS 4
#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-#define GICR_TYPER 0x8
-
#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
@@ -675,6 +676,44 @@ static void test_v3_its_region(void)
vm_gic_destroy(&v);
}
+static void test_v3_nassgicap(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ bool has_nassgicap;
+ struct vm_gic vm;
+ u32 typer2;
+ int ret;
+
+ vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap;
+
+ typer2 |= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ if (has_nassgicap)
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret));
+ else
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Enabled nASSGIcap even though it's unavailable");
+
+ typer2 &= ~GICD_TYPER2_nASSGIcap;
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ typer2 ^= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ TEST_ASSERT(ret && errno == EBUSY,
+ "Changed nASSGIcap after initializing the VGIC");
+
+ vm_gic_destroy(&vm);
+}
+
/*
* Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
*/
@@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type)
return 0;
}
+struct sr_def {
+ const char *name;
+ u32 encoding;
+};
+
+#define PACK_SR(r) \
+ ((sys_reg_Op0(r) << 14) | \
+ (sys_reg_Op1(r) << 11) | \
+ (sys_reg_CRn(r) << 7) | \
+ (sys_reg_CRm(r) << 3) | \
+ (sys_reg_Op2(r)))
+
+#define SR(r) \
+ { \
+ .name = #r, \
+ .encoding = r, \
+ }
+
+static const struct sr_def sysregs_el1[] = {
+ SR(SYS_ICC_PMR_EL1),
+ SR(SYS_ICC_BPR0_EL1),
+ SR(SYS_ICC_AP0R0_EL1),
+ SR(SYS_ICC_AP0R1_EL1),
+ SR(SYS_ICC_AP0R2_EL1),
+ SR(SYS_ICC_AP0R3_EL1),
+ SR(SYS_ICC_AP1R0_EL1),
+ SR(SYS_ICC_AP1R1_EL1),
+ SR(SYS_ICC_AP1R2_EL1),
+ SR(SYS_ICC_AP1R3_EL1),
+ SR(SYS_ICC_BPR1_EL1),
+ SR(SYS_ICC_CTLR_EL1),
+ SR(SYS_ICC_SRE_EL1),
+ SR(SYS_ICC_IGRPEN0_EL1),
+ SR(SYS_ICC_IGRPEN1_EL1),
+};
+
+static const struct sr_def sysregs_el2[] = {
+ SR(SYS_ICH_AP0R0_EL2),
+ SR(SYS_ICH_AP0R1_EL2),
+ SR(SYS_ICH_AP0R2_EL2),
+ SR(SYS_ICH_AP0R3_EL2),
+ SR(SYS_ICH_AP1R0_EL2),
+ SR(SYS_ICH_AP1R1_EL2),
+ SR(SYS_ICH_AP1R2_EL2),
+ SR(SYS_ICH_AP1R3_EL2),
+ SR(SYS_ICH_HCR_EL2),
+ SR(SYS_ICC_SRE_EL2),
+ SR(SYS_ICH_VTR_EL2),
+ SR(SYS_ICH_VMCR_EL2),
+ SR(SYS_ICH_LR0_EL2),
+ SR(SYS_ICH_LR1_EL2),
+ SR(SYS_ICH_LR2_EL2),
+ SR(SYS_ICH_LR3_EL2),
+ SR(SYS_ICH_LR4_EL2),
+ SR(SYS_ICH_LR5_EL2),
+ SR(SYS_ICH_LR6_EL2),
+ SR(SYS_ICH_LR7_EL2),
+ SR(SYS_ICH_LR8_EL2),
+ SR(SYS_ICH_LR9_EL2),
+ SR(SYS_ICH_LR10_EL2),
+ SR(SYS_ICH_LR11_EL2),
+ SR(SYS_ICH_LR12_EL2),
+ SR(SYS_ICH_LR13_EL2),
+ SR(SYS_ICH_LR14_EL2),
+ SR(SYS_ICH_LR15_EL2),
+};
+
+static void test_sysreg_array(int gic, const struct sr_def *sr, int nr,
+ int (*check)(int, const struct sr_def *, const char *))
+{
+ for (int i = 0; i < nr; i++) {
+ u64 val;
+ u64 attr;
+ int ret;
+
+ /* Assume MPIDR_EL1.Aff*=0 */
+ attr = PACK_SR(sr[i].encoding);
+
+ /*
+ * The API is braindead. A register can be advertised as
+ * available, and yet not be readable or writable.
+ * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and
+ * ICH_APnR{1,2,3}_EL2 do follow suit for consistency.
+ *
+ * On the bright side, no known HW is implementing more than
+ * 5 bits of priority, so we're safe. Sort of...
+ */
+ ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr);
+ TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name);
+
+ /* Check that we can write back what we read */
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name);
+ ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name);
+ }
+}
+
+static u8 get_ctlr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICC_CTLR_EL1), &val);
+ TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable");
+
+ pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICC_AP0R1_EL1:
+ case SYS_ICC_AP1R1_EL1:
+ if (get_ctlr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICC_AP0R2_EL1:
+ case SYS_ICC_AP0R3_EL1:
+ case SYS_ICC_AP1R2_EL1:
+ case SYS_ICC_AP1R3_EL1:
+ if (get_ctlr_pribits(gic) == 7)
+ return 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static u8 get_vtr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICH_VTR_EL2), &val);
+ TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable");
+
+ pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICH_AP0R1_EL2:
+ case SYS_ICH_AP1R1_EL2:
+ if (get_vtr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICH_AP0R2_EL2:
+ case SYS_ICH_AP0R3_EL2:
+ case SYS_ICH_AP1R2_EL2:
+ case SYS_ICH_AP1R3_EL2:
+ if (get_vtr_pribits(gic) == 7)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static void test_v3_sysregs(void)
+{
+ struct kvm_vcpu_init init = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u32 feat = 0;
+ int gic;
+
+ if (kvm_check_cap(KVM_CAP_ARM_EL2))
+ feat |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ vm = vm_create(1);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= feat;
+
+ vcpu = aarch64_vcpu_add(vm, 0, &init, NULL);
+ TEST_ASSERT(vcpu, "Can't create a vcpu?");
+
+ gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ TEST_ASSERT(gic >= 0, "No GIC???");
+
+ kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs);
+ if (feat)
+ test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs);
+ else
+ pr_info("SKIP EL2 registers, not available\n");
+
+ close(gic);
+ kvm_vm_free(vm);
+}
+
void run_tests(uint32_t gic_dev_type)
{
test_vcpus_then_vgic(gic_dev_type);
@@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type)
test_v3_last_bit_single_rdist();
test_v3_redist_ipa_range_check_at_vcpu_run();
test_v3_its_region();
+ test_v3_sysregs();
+ test_v3_nassgicap();
}
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..a09dd423c2d7 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -620,18 +620,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
* that no actual interrupt was injected for those cases.
*/
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- fd[f] = eventfd(0, 0);
- TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
- }
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ fd[f] = kvm_new_eventfd();
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- struct kvm_irqfd irqfd = {
- .fd = fd[f],
- .gsi = i - MIN_SPI,
- };
assert(i <= (uint64_t)UINT_MAX);
- vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 8835fed09e9f..96d874b239eb 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,5 +1,6 @@
CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
+CONFIG_EVENTFD=y
CONFIG_USERFAULTFD=y
CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index b0fc0f945766..255fed769a8a 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -254,6 +254,16 @@ static inline void local_irq_disable(void)
asm volatile("msr daifset, #3" : : : "memory");
}
+static inline void local_serror_enable(void)
+{
+ asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+ asm volatile("msr daifset, #4" : : : "memory");
+}
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index bee65ca08721..23a506d7eca3 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -18,8 +18,11 @@
#include <asm/atomic.h>
#include <asm/kvm.h>
+#include <sys/eventfd.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+
#include "kvm_util_arch.h"
#include "kvm_util_types.h"
#include "sparsebit.h"
@@ -253,6 +256,7 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help);
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
@@ -502,6 +506,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
return fd;
}
+static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = eventfd,
+ .gsi = gsi,
+ .flags = flags,
+ .resamplefd = -1,
+ };
+
+ return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
+}
+
+static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
+}
+
+static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, 0);
+}
+
+static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+static inline int kvm_new_eventfd(void)
+{
+ int fd = eventfd(0, 0);
+
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
+ return fd;
+}
+
static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
{
ssize_t ret;
@@ -1013,7 +1056,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
void kvm_set_files_rlimit(uint32_t nr_vcpus);
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+int __pin_task_to_cpu(pthread_t task, int cpu);
+
+static inline void pin_task_to_cpu(pthread_t task, int cpu)
+{
+ int r;
+
+ r = __pin_task_to_cpu(task, cpu);
+ TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
+}
+
+static inline int pin_task_to_any_cpu(pthread_t task)
+{
+ int cpu = sched_getcpu();
+
+ pin_task_to_cpu(task, cpu);
+ return cpu;
+}
+
+static inline void pin_self_to_cpu(int cpu)
+{
+ pin_task_to_cpu(pthread_self(), cpu);
+}
+
+static inline int pin_self_to_any_cpu(void)
+{
+ return pin_task_to_any_cpu(pthread_self());
+}
+
void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
int nr_vcpus);
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index b11b5a53ebd5..2efb05c2f2fb 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1150,7 +1150,6 @@ do { \
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
struct ex_regs {
uint64_t rax, rcx, rdx, rbx;
@@ -1325,6 +1324,11 @@ static inline bool kvm_is_forced_emulation_enabled(void)
return !!get_kvm_param_integer("force_emulation_prefix");
}
+static inline bool kvm_is_unrestricted_guest_enabled(void)
+{
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
int *level);
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
new file mode 100644
index 000000000000..7c301b4c7005
--- /dev/null
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+
+static struct kvm_vm *vm1;
+static struct kvm_vm *vm2;
+static int __eventfd;
+static bool done;
+
+/*
+ * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
+ * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task
+ * GSI base to avoid false failures due to cross-task de-assign, i.e. so that
+ * the secondary doesn't de-assign the primary's eventfd and cause assign to
+ * unexpectedly succeed on the primary.
+ */
+#define GSI_BASE_PRIMARY 0x20
+#define GSI_BASE_SECONDARY 0x30
+
+static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
+{
+ int r, i;
+
+ /*
+ * The secondary task can encounter EBADF since the primary can close
+ * the eventfd at any time. And because the primary can recreate the
+ * eventfd, at the safe fd in the file table, the secondary can also
+ * encounter "unexpected" success, e.g. if the close+recreate happens
+ * between the first and second assignments. The secondary's role is
+ * mostly to antagonize KVM, not to detect bugs.
+ */
+ for (i = 0; i < 2; i++) {
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
+ TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
+ "Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
+ r, errno);
+
+ /* De-assign should succeed unless the eventfd was closed. */
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ TEST_ASSERT(!r || errno == EBADF,
+ "De-assign should succeed unless the fd was closed");
+ }
+}
+
+static void *secondary_irqfd_juggler(void *ign)
+{
+ while (!READ_ONCE(done)) {
+ juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
+ juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
+ }
+
+ return NULL;
+}
+
+static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
+{
+ int r1, r2;
+
+ /*
+ * At least one of the assigns should fail. KVM disallows assigning a
+ * single eventfd to multiple GSIs (or VMs), so it's possible that both
+ * assignments can fail, too.
+ */
+ r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
+ TEST_ASSERT(!r1 || errno == EBUSY,
+ "Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
+
+ r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
+ TEST_ASSERT(r1 || (r2 && errno == EBUSY),
+ "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
+ r1, r2, errno);
+
+ /*
+ * De-assign should always succeed, even if the corresponding assign
+ * failed.
+ */
+ kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t racing_thread;
+ int r, i;
+
+ /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */
+ vm1 = vm_create(1);
+ vm2 = vm_create(1);
+
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ kvm_irqfd(vm1, 10, __eventfd, 0);
+
+ r = __kvm_irqfd(vm1, 11, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ r = __kvm_irqfd(vm2, 12, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ /*
+ * De-assign all eventfds, along with multiple eventfds that were never
+ * assigned. KVM's ABI is that de-assign is allowed so long as the
+ * eventfd itself is valid.
+ */
+ kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+
+ close(__eventfd);
+
+ pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
+
+ for (i = 0; i < 10000; i++) {
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ juggle_eventfd_primary(vm1, __eventfd);
+ juggle_eventfd_primary(vm2, __eventfd);
+ close(__eventfd);
+ }
+
+ WRITE_ONCE(done, true);
+ pthread_join(racing_thread, NULL);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a055343a7bf7..c3f5142b0a54 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -26,15 +26,27 @@ static uint32_t last_guest_seed;
static int vcpu_mmap_sz(void);
-int open_path_or_exit(const char *path, int flags)
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
{
int fd;
fd = open(path, flags);
- __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
- TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+ if (fd < 0)
+ goto error;
return fd;
+
+error:
+ if (errno == EACCES || errno == ENOENT)
+ ksft_exit_skip("- Cannot open '%s': %s. %s\n",
+ path, strerror(errno),
+ errno == EACCES ? "Root required?" : enoent_help);
+ TEST_FAIL("Failed to open '%s'", path);
+}
+
+int open_path_or_exit(const char *path, int flags)
+{
+ return __open_path_or_exit(path, flags, "");
}
/*
@@ -48,7 +60,7 @@ int open_path_or_exit(const char *path, int flags)
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
- return open_path_or_exit(KVM_DEV_PATH, flags);
+ return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
}
int open_kvm_dev_path_or_exit(void)
@@ -64,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param,
ssize_t bytes_read;
int fd, r;
+ /* Verify KVM is loaded, to provide a more helpful SKIP message. */
+ close(open_kvm_dev_path_or_exit());
+
r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
module_name, param);
TEST_ASSERT(r < path_size,
@@ -605,15 +620,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
return vm_vcpu_recreate(vm, 0);
}
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+int __pin_task_to_cpu(pthread_t task, int cpu)
{
- cpu_set_t mask;
- int r;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
- CPU_ZERO(&mask);
- CPU_SET(pcpu, &mask);
- r = sched_setaffinity(0, sizeof(mask), &mask);
- TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+ return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
}
static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
@@ -667,7 +681,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
/* 2. Check if the main worker needs to be pinned. */
if (cpu) {
- kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
cpu = strtok(NULL, delim);
}
@@ -1716,7 +1730,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
/* Create an interrupt controller chip for the specified VM. */
void vm_create_irqchip(struct kvm_vm *vm)
{
- vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ int r;
+
+ /*
+ * Allocate a fully in-kernel IRQ chip by default, but fall back to a
+ * split model (x86 only) if that fails (KVM x86 allows compiling out
+ * support for KVM_CREATE_IRQCHIP).
+ */
+ r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+ vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+ else
+ TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
vm->has_irqchip = true;
}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 313277486a1d..557c0a0a5658 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data)
int vcpu_idx = vcpu->vcpu_idx;
if (memstress_args.pin_vcpus)
- kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+ pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
WRITE_ONCE(vcpu->running, true);
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index cfed9d26cc71..a99188f87a38 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -116,7 +116,7 @@
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*
@@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*/
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index a92dc1dad085..d4c19ac885a9 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1264,16 +1264,6 @@ done:
return min(max_gfn, ht_gfn - 1);
}
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- /* Ensure that a KVM vendor-specific module is loaded. */
- if (vm == NULL)
- close(open_kvm_dev_path_or_exit());
-
- return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
void kvm_selftest_arch_init(void)
{
host_cpu_is_intel = this_cpu_is_intel();
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..8b15a13df939
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+ return open_path_or_exit(path, O_RDONLY);
+}
+
+static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
+{
+ uint64_t data;
+ ssize_t rc;
+
+ rc = pread(msr_fd, &data, sizeof(data), msr);
+ TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+ return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_ITERATIONS; i++)
+ GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_read_aperf_mperf();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+ * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+ */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+ guest_read_aperf_mperf();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(nested_test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(nested_test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+ uint64_t msr_val;
+ uint8_t vector;
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ uint64_t host_aperf_before, host_mperf_before;
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int msr_fd, cpu, i;
+
+ /* Sanity check that APERF/MPERF are unsupported by default. */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ kvm_vm_free(vm);
+
+ cpu = pin_self_to_any_cpu();
+
+ msr_fd = open_dev_msr(cpu);
+
+ /*
+ * This test requires a non-standard VM initialization, because
+ * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+ * a VCPU has been created.
+ */
+ vm = vm_create(1);
+
+ TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (!has_nested)
+ nested_test_data_gva = NONCANONICAL;
+ else if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+ uint64_t host_aperf_after, host_mperf_after;
+ uint64_t guest_aperf, guest_mperf;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ guest_aperf = uc.args[0];
+ guest_mperf = uc.args[1];
+
+ host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ TEST_ASSERT(host_aperf_before < guest_aperf,
+ "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_aperf_before, guest_aperf);
+ TEST_ASSERT(guest_aperf < host_aperf_after,
+ "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_aperf, host_aperf_after);
+ TEST_ASSERT(host_mperf_before < guest_mperf,
+ "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_mperf_before, guest_mperf);
+ TEST_ASSERT(guest_mperf < host_mperf_after,
+ "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_mperf, host_mperf_after);
+
+ host_aperf_before = host_aperf_after;
+ host_mperf_before = host_mperf_after;
+
+ break;
+ }
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+ kvm_vm_free(vm);
+ close(msr_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..8463a9956410 100644
--- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
data = test_rdmsr(MSR_GS_BASE);
GUEST_ASSERT(data == MSR_GS_BASE);
+ /* Access the MSRs again to ensure KVM has disabled interception.*/
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
GUEST_DONE();
}
@@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
"Expected ucall state to be UCALL_SYNC.");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
run_guest_then_process_ucall_done(vcpu);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..2cae86d9d5e2 100644
--- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -110,7 +110,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
TEST_REQUIRE(host_cpu_is_intel);
- TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+ TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
get_set_sigalrm_vcpu(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index 287829f850f7..23909b501ac2 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -547,15 +547,9 @@ int main(int argc, char *argv[])
int irq_fd[2] = { -1, -1 };
if (do_eventfd_tests) {
- irq_fd[0] = eventfd(0, 0);
- irq_fd[1] = eventfd(0, 0);
+ irq_fd[0] = kvm_new_eventfd();
+ irq_fd[1] = kvm_new_eventfd();
- /* Unexpected, but not a KVM failure */
- if (irq_fd[0] == -1 || irq_fd[1] == -1)
- do_evtchn_tests = do_eventfd_tests = false;
- }
-
- if (do_eventfd_tests) {
irq_routes.info.nr = 2;
irq_routes.entries[0].gsi = 32;
@@ -572,15 +566,8 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
- struct kvm_irqfd ifd = { };
-
- ifd.fd = irq_fd[0];
- ifd.gsi = 32;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
-
- ifd.fd = irq_fd[1];
- ifd.gsi = 33;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
+ kvm_assign_irqfd(vm, 32, irq_fd[0]);
+ kvm_assign_irqfd(vm, 33, irq_fd[1]);
struct sigaction sa = { };
sa.sa_handler = handle_alrm;
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index 18a6014920b5..b16986aa6442 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -403,11 +403,12 @@ static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
/* It is assume that there is not already filtering rules. */
filter->record_type = AUDIT_EXE;
if (!path) {
- filter->exe_len = readlink("/proc/self/exe", filter->exe,
- sizeof(filter->exe) - 1);
- if (filter->exe_len < 0)
+ int ret = readlink("/proc/self/exe", filter->exe,
+ sizeof(filter->exe) - 1);
+ if (ret < 0)
return -errno;
+ filter->exe_len = ret;
return 0;
}
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index cfc571afd0eb..46d02d49835a 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -7,6 +7,7 @@
#define _GNU_SOURCE
#include <errno.h>
+#include <fcntl.h>
#include <limits.h>
#include <linux/landlock.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 73729382d40f..fa0f18ec62c4 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -1832,6 +1832,46 @@ TEST_F_FORK(layout1, release_inodes)
ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
}
+/*
+ * This test checks that a rule on a directory used as a mount point does not
+ * grant access to the mount covering it. It is a generalization of the bind
+ * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points.
+ */
+TEST_F_FORK(layout1, covered_rule)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s3d2,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ /* Unmount to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Creates a ruleset with the future hidden directory. */
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Covers with a new mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
+}
+
enum relative_access {
REL_OPEN,
REL_CHDIR,
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 7afe05e8c4d7..bd09fdaf53e0 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -2,7 +2,7 @@ CONFIG_LKDTM=y
CONFIG_DEBUG_LIST=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_FORTIFY_SOURCE=y
-CONFIG_GCC_PLUGIN_STACKLEAK=y
+CONFIG_KSTACK_ERASE=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_FREE_DEFAULT_ON=y
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index aa7400ed0e99..f0d9c035641d 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -31,6 +31,7 @@ uint64_t pmd_pagesize;
#define INPUT_MAX 80
#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
#define PATH_FMT "%s,0x%lx,0x%lx,%d"
#define PFN_MASK ((1UL<<55)-1)
@@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
(uint64_t)addr + fd_size, order);
else
- write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
+ write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr,
(uint64_t)addr + fd_size, order, offset);
for (i = 0; i < fd_size; i++)
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c6dd2a335cf4..47c293c2962f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -34,6 +34,7 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
+scm_inq
scm_pidfd
scm_rights
sk_bind_sendto_listen
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 332f387615d7..b31a71f2b372 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh
TEST_PROGS += link_netns.py
TEST_PROGS += nl_netdev.py
TEST_PROGS += rtnetlink.py
+TEST_PROGS += rtnetlink_notification.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -62,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
+TEST_PROGS += netdev-l2addr.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -99,6 +101,7 @@ TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
+TEST_PROGS += test_neigh.sh
TEST_PROGS += fdb_flush.sh fdb_notify.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
@@ -112,6 +115,8 @@ TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
+TEST_PROGS += broadcast_pmtu.sh
+TEST_PROGS += ipv6_force_forwarding.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..a4b61c6d0290 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..9d22561e7b8f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "../../kselftest_harness.h"
+
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+
+struct scm_inq {
+ struct cmsghdr cmsghdr;
+ int inq;
+};
+
+FIXTURE(scm_inq)
+{
+ int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+ int err;
+
+ err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+ close(self->fd[0]);
+ close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char buf[MSG_LEN] = {};
+ int i, ret;
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+ }
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct scm_inq cmsg;
+ char buf[MSG_LEN];
+ int i, ret;
+ int inq;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cmsg;
+ msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq));
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ memset(buf, 0, sizeof(buf));
+ memset(&cmsg, 0, sizeof(cmsg));
+
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+ ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg));
+ ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type);
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(cmsg.inq, inq);
+ }
+}
+
+TEST_F(scm_inq, basic)
+{
+ int err, inq;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+
+ ASSERT_EQ(0, err);
+
+ err = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(0, inq);
+
+ send_chunks(_metadata, self);
+ recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index 7e534594167e..37e034874034 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -15,6 +15,7 @@
#include <sys/types.h>
#include <sys/wait.h>
+#include "../../pidfd/pidfd.h"
#include "../../kselftest_harness.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
@@ -26,6 +27,8 @@
#define SCM_PIDFD 0x04
#endif
+#define CHILD_EXIT_CODE_OK 123
+
static void child_die()
{
exit(1);
@@ -126,16 +129,65 @@ out:
return result;
}
+struct cmsg_data {
+ struct ucred *ucred;
+ int *pidfd;
+};
+
+static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res)
+{
+ struct cmsghdr *cmsg;
+ int data = 0;
+
+ if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*res->pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ res->pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*res->ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ res->ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!res->pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!res->ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ return 0;
+}
+
static int cmsg_check(int fd)
{
struct msghdr msg = { 0 };
- struct cmsghdr *cmsg;
+ struct cmsg_data res;
struct iovec iov;
- struct ucred *ucred = NULL;
int data = 0;
char control[CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(sizeof(int))] = { 0 };
- int *pidfd = NULL;
pid_t parent_pid;
int err;
@@ -158,53 +210,99 @@ static int cmsg_check(int fd)
return 1;
}
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_PIDFD) {
- if (cmsg->cmsg_len < sizeof(*pidfd)) {
- log_err("CMSG parse: SCM_PIDFD wrong len");
- return 1;
- }
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
- pidfd = (void *)CMSG_DATA(cmsg);
- }
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
+ return 1;
+ }
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_CREDENTIALS) {
- if (cmsg->cmsg_len < sizeof(*ucred)) {
- log_err("CMSG parse: SCM_CREDENTIALS wrong len");
- return 1;
- }
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ close(*res.pidfd);
+ return 1;
+ }
- ucred = (void *)CMSG_DATA(cmsg);
- }
+ close(*res.pidfd);
+ return 0;
+}
+
+static int cmsg_check_dead(int fd, int expected_pid)
+{
+ int err;
+ struct msghdr msg = { 0 };
+ struct cmsg_data res;
+ struct iovec iov;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ pid_t client_pid;
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_EXIT,
+ };
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
}
- /* send(pfd, "x", sizeof(char), 0) */
- if (data != 'x') {
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ /* send(cfd, "y", sizeof(char), 0) */
+ if (data != 'y') {
log_err("recvmsg: data corruption");
return 1;
}
- if (!pidfd) {
- log_err("CMSG parse: SCM_PIDFD not found");
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
return 1;
}
- if (!ucred) {
- log_err("CMSG parse: SCM_CREDENTIALS not found");
+ /*
+ * pidfd from SCM_PIDFD should point to the client_pid.
+ * Let's read exit information and check if it's what
+ * we expect to see.
+ */
+ if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) {
+ log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__);
+ close(*res.pidfd);
return 1;
}
- /* pidfd from SCM_PIDFD should point to the parent process PID */
- parent_pid =
- get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
- if (parent_pid != getppid()) {
- log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ if (!(info.mask & PIDFD_INFO_EXIT)) {
+ log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__);
+ close(*res.pidfd);
return 1;
}
+ err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1;
+ if (err != CHILD_EXIT_CODE_OK) {
+ log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK);
+ close(*res.pidfd);
+ return 1;
+ }
+
+ close(*res.pidfd);
return 0;
}
@@ -291,6 +389,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract)
memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
}
+static int sk_enable_cred_pass(int sk)
+{
+ int on = 0;
+
+ on = 1;
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ return 1;
+ }
+
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ return 1;
+ }
+
+ return 0;
+}
+
static void client(FIXTURE_DATA(scm_pidfd) *self,
const FIXTURE_VARIANT(scm_pidfd) *variant)
{
@@ -299,7 +415,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
struct ucred peer_cred;
int peer_pidfd;
pid_t peer_pid;
- int on = 0;
cfd = socket(AF_UNIX, variant->type, 0);
if (cfd < 0) {
@@ -322,14 +437,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
- on = 1;
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSCRED");
- child_die();
- }
-
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSPIDFD");
+ if (sk_enable_cred_pass(cfd)) {
+ log_err("sk_enable_cred_pass() failed");
child_die();
}
@@ -340,6 +449,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
+ /* send something to the parent so it can receive SCM_PIDFD too and validate it */
+ if (send(cfd, "y", sizeof(char), 0) == -1) {
+ log_err("Failed to send(cfd, \"y\", sizeof(char), 0)");
+ child_die();
+ }
+
/* skip further for SOCK_DGRAM as it's not applicable */
if (variant->type == SOCK_DGRAM)
return;
@@ -398,7 +513,13 @@ TEST_F(scm_pidfd, test)
close(self->server);
close(self->startup_pipe[0]);
client(self, variant);
- exit(0);
+
+ /*
+ * It's a bit unusual, but in case of success we return non-zero
+ * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it
+ * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead().
+ */
+ exit(CHILD_EXIT_CODE_OK);
}
close(self->startup_pipe[1]);
@@ -421,9 +542,17 @@ TEST_F(scm_pidfd, test)
ASSERT_NE(-1, err);
}
- close(pfd);
waitpid(self->client_pid, &child_status, 0);
- ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+ /* see comment before exit(CHILD_EXIT_CODE_OK) */
+ ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+
+ err = sk_enable_cred_pass(pfd);
+ ASSERT_EQ(0, err);
+
+ err = cmsg_check_dead(pfd, self->client_pid);
+ ASSERT_EQ(0, err);
+
+ close(pfd);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m += bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+ bit_run_bench_baseline,
+ bit_run_bench_no_softirq01,
+ bit_run_bench_no_softirq02,
+ bit_run_bench_no_softirq03,
+};
+
+#define bit(b) (1 << (b))
+#define enabled(b) ((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ int i;
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ atomic_t cnt;
+ int i;
+
+ atomic_set(&cnt, 0);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ atomic_inc(&cnt);
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ loops_cnt = atomic_read(&cnt);
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ spinlock_t lock;
+ int i;
+
+ spin_lock_init(&lock);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ spin_lock(&lock);
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ spin_unlock(&lock);
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+ /* GFP_ATOMIC needed when under run softirq */
+ gfp_t gfp_mask = GFP_ATOMIC;
+ struct page **array;
+ int i;
+
+ array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+ for (i = 0; i < elems; i++)
+ array[i] = page_pool_alloc_pages(pp, gfp_mask);
+ for (i = 0; i < elems; i++)
+ page_pool_put_page(pp, array[i], -1, false);
+
+ kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+ enum test_type type, const char *func)
+{
+ uint64_t loops_cnt = 0;
+ gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+ int i, err;
+
+ struct page_pool *pp;
+ struct page *page;
+
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = MY_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = NULL, /* Only use for DMA mapping */
+ .dma_dir = DMA_BIDIRECTIONAL,
+ };
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+ goto out;
+ }
+ pp_fill_ptr_ring(pp, 64);
+
+ if (in_serving_softirq())
+ pr_warn("%s(): in_serving_softirq fast-path\n", func);
+ else
+ pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ /* Common fast-path alloc that depend on in_serving_softirq() */
+ page = page_pool_alloc_pages(pp, gfp_mask);
+ if (!page)
+ break;
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+
+ /* The benchmarks purpose it to test different return paths.
+ * Compiler should inline optimize other function calls out
+ */
+ if (type == type_fast_path) {
+ /* Fast-path recycling e.g. XDP_DROP use-case */
+ page_pool_recycle_direct(pp, page);
+
+ } else if (type == type_ptr_ring) {
+ /* Normal return path */
+ page_pool_put_page(pp, page, -1, false);
+
+ } else if (type == type_page_allocator) {
+ /* Test if not pages are recycled, but instead
+ * returned back into systems page allocator
+ */
+ get_page(page); /* cause no-recycling */
+ page_pool_put_page(pp, page, -1, false);
+ put_page(page);
+ } else {
+ BUILD_BUG();
+ }
+ }
+ time_bench_stop(rec, loops_cnt);
+out:
+ page_pool_destroy(pp);
+ return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+ uint32_t nr_loops = loops;
+
+ /* Baseline tests */
+ if (enabled(bit_run_bench_baseline)) {
+ time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+ time_bench_for_loop);
+ time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+ time_bench_atomic_inc);
+ time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+ }
+
+ /* This test cannot activate correct code path, due to no-softirq ctx */
+ if (enabled(bit_run_bench_no_softirq01))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+ time_bench_page_pool01_fast_path);
+ if (enabled(bit_run_bench_no_softirq02))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+ time_bench_page_pool02_ptr_ring);
+ if (enabled(bit_run_bench_no_softirq03))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+ time_bench_page_pool03_slow);
+
+ return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+ if (verbose)
+ pr_info("Loaded\n");
+
+ if (loops > U32_MAX) {
+ pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+ U32_MAX);
+ return -ECHRNG;
+ }
+
+ run_benchmark_tests();
+
+ return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+ if (verbose)
+ pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ * tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT \
+ (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+ uint64_t config; /* event */
+ uint64_t config1; /* umask */
+ struct perf_event *save;
+ char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+ { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+ { 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+ int i;
+ struct perf_event_attr perf_conf;
+ struct perf_event *perf_event;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ pr_info("DEBUG: cpu:%d\n", cpu);
+ preempt_enable();
+
+ memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+ perf_conf.type = PERF_TYPE_RAW;
+ perf_conf.size = sizeof(struct perf_event_attr);
+ perf_conf.read_format = PERF_FORMAT;
+ perf_conf.pinned = 1;
+ perf_conf.exclude_user = 1; /* No userspace events */
+ perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+ for (i = 0; i < NUM_EVTS; i++) {
+ perf_conf.disabled = enable;
+ //perf_conf.disabled = (i == 0) ? 1 : 0;
+ perf_conf.config = perf_events[i].config;
+ perf_conf.config1 = perf_events[i].config1;
+ if (verbose)
+ pr_info("%s() enable PMU counter: %s\n",
+ __func__, perf_events[i].desc);
+ perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+ NULL /* task */,
+ NULL /* overflow_handler*/,
+ NULL /* context */);
+ if (perf_event) {
+ perf_events[i].save = perf_event;
+ pr_info("%s():DEBUG perf_event success\n", __func__);
+
+ perf_event_enable(perf_event);
+ } else {
+ pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+ }
+ }
+
+ return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+ uint64_t ns_per_call_tmp_rem = 0;
+ uint32_t ns_per_call_remainder = 0;
+ uint64_t pmc_ipc_tmp_rem = 0;
+ uint32_t pmc_ipc_remainder = 0;
+ uint32_t pmc_ipc_div = 0;
+ uint32_t invoked_cnt_precision = 0;
+ uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ if (rec->invoked_cnt < 1000) {
+ pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+ /* div_u64_rem() can only support div with 32bit*/
+ pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ invoked_cnt = (uint32_t)rec->invoked_cnt;
+ }
+
+ /* TSC (Time-Stamp Counter) records */
+ if (rec->flags & TIME_BENCH_TSC) {
+ rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+ if (rec->tsc_interval == 0) {
+ pr_err("ABORT: timing took ZERO TSC time\n");
+ return false;
+ }
+ /* Calculate stats */
+ if (rec->flags & TIME_BENCH_LOOP)
+ rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+ else
+ rec->tsc_cycles = rec->tsc_interval;
+ }
+
+ /* Wall-clock time calc */
+ if (rec->flags & TIME_BENCH_WALLCLOCK) {
+ rec->time_start = rec->ts_start.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+ rec->time_stop = rec->ts_stop.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+ rec->time_interval = rec->time_stop - rec->time_start;
+ if (rec->time_interval == 0) {
+ pr_err("ABORT: timing took ZERO wallclock time\n");
+ return false;
+ }
+ /* Calculate stats */
+ /*** Division in kernel it tricky ***/
+ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+ rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+ &rec->time_sec_remainder);
+ //TODO: use existing struct timespec records instead of div?
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ /*** Division in kernel it tricky ***/
+ /* Orig: ns = ((double)time_interval / invoked_cnt); */
+ /* First get quotient */
+ rec->ns_per_call_quotient =
+ div_u64_rem(rec->time_interval, invoked_cnt,
+ &ns_per_call_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ ns_per_call_tmp_rem = ns_per_call_remainder;
+ invoked_cnt_precision = invoked_cnt / 1000;
+ if (invoked_cnt_precision > 0) {
+ rec->ns_per_call_decimal =
+ div_u64_rem(ns_per_call_tmp_rem,
+ invoked_cnt_precision,
+ &ns_per_call_remainder);
+ }
+ }
+ }
+
+ /* Performance Monitor Unit (PMU) counters */
+ if (rec->flags & TIME_BENCH_PMU) {
+ //FIXME: Overflow handling???
+ rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+ rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+ /* Calc Instruction Per Cycle (IPC) */
+ /* First get quotient */
+ rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+ &pmc_ipc_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ pmc_ipc_tmp_rem = pmc_ipc_remainder;
+ pmc_ipc_div = rec->pmc_clk / 1000;
+ if (pmc_ipc_div > 0) {
+ rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+ pmc_ipc_div,
+ &pmc_ipc_remainder);
+ }
+ }
+
+ return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats. The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ struct time_bench_record rec;
+
+ /* Setup record */
+ memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+ rec.version_abi = 1;
+ rec.loops = loops;
+ rec.step = step;
+ rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+ /*** Loop function being timed ***/
+ if (!func(&rec, data)) {
+ pr_err("ABORT: function being timed failed\n");
+ return false;
+ }
+
+ if (rec.invoked_cnt < loops)
+ pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+ rec.invoked_cnt, loops);
+
+ /* Calculate stats */
+ time_bench_calc_stats(&rec);
+
+ pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+ rec.ns_per_call_decimal, rec.step, rec.time_sec,
+ rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+ rec.tsc_interval);
+ if (rec.flags & TIME_BENCH_PMU)
+ pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+ txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+ rec.pmc_ipc_decimal);
+ return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+ struct time_bench_cpu *cpu = private;
+ struct time_bench_sync *sync = cpu->sync;
+ cpumask_t newmask = CPU_MASK_NONE;
+ void *data = cpu->data;
+
+ /* Restrict CPU */
+ cpumask_set_cpu(cpu->rec.cpu, &newmask);
+ set_cpus_allowed_ptr(current, &newmask);
+
+ /* Synchronize start of concurrency test */
+ atomic_inc(&sync->nr_tests_running);
+ wait_for_completion(&sync->start_event);
+
+ /* Start benchmark function */
+ if (!cpu->bench_func(&cpu->rec, data)) {
+ pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+ cpu->rec.cpu, smp_processor_id());
+ } else {
+ if (verbose)
+ pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+ smp_processor_id());
+ }
+ cpu->did_bench_run = true;
+
+ /* End test */
+ atomic_dec(&sync->nr_tests_running);
+ /* Wait for kthread_stop() telling us to stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask)
+{
+ uint64_t average = 0;
+ int cpu;
+ int step = 0;
+ struct sum {
+ uint64_t tsc_cycles;
+ int records;
+ } sum = { 0 };
+
+ /* Get stats */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+ struct time_bench_record *rec = &c->rec;
+
+ /* Calculate stats */
+ time_bench_calc_stats(rec);
+
+ pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+ rec->ns_per_call_decimal, rec->step, rec->time_sec,
+ rec->time_sec_remainder, rec->time_interval,
+ rec->invoked_cnt, rec->tsc_interval);
+
+ /* Collect average */
+ sum.records++;
+ sum.tsc_cycles += rec->tsc_cycles;
+ step = rec->step;
+ }
+
+ if (sum.records) /* avoid div-by-zero */
+ average = sum.tsc_cycles / sum.records;
+ pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+ average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync,
+ struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ int cpu, running = 0;
+
+ if (verbose) // DEBUG
+ pr_warn("%s() Started on CPU:%d\n", __func__,
+ smp_processor_id());
+
+ /* Reset sync conditions */
+ atomic_set(&sync->nr_tests_running, 0);
+ init_completion(&sync->start_event);
+
+ /* Spawn off jobs on all CPUs */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ running++;
+ c->sync = sync; /* Send sync variable along */
+ c->data = data; /* Send opaque along */
+
+ /* Init benchmark record */
+ memset(&c->rec, 0, sizeof(struct time_bench_record));
+ c->rec.version_abi = 1;
+ c->rec.loops = loops;
+ c->rec.step = step;
+ c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+ TIME_BENCH_WALLCLOCK);
+ c->rec.cpu = cpu;
+ c->bench_func = func;
+ c->task = kthread_run(invoke_test_on_cpu_func, c,
+ "time_bench%d", cpu);
+ if (IS_ERR(c->task)) {
+ pr_err("%s(): Failed to start test func\n", __func__);
+ return; /* Argh, what about cleanup?! */
+ }
+ }
+
+ /* Wait until all processes are running */
+ while (atomic_read(&sync->nr_tests_running) < running) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ /* Kick off all CPU concurrently on completion event */
+ complete_all(&sync->start_event);
+
+ /* Wait for CPUs to finish */
+ while (atomic_read(&sync->nr_tests_running)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ /* Stop the kthreads */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ kthread_stop(c->task);
+ }
+
+ if (verbose) // DEBUG - happens often, finish on another CPU
+ pr_warn("%s() Finished on CPU:%d\n", __func__,
+ smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ * for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+ uint32_t version_abi;
+ uint32_t loops; /* Requested loop invocations */
+ uint32_t step; /* option for e.g. bulk invocations */
+
+ uint32_t flags; /* Measurements types enabled */
+#define TIME_BENCH_LOOP BIT(0)
+#define TIME_BENCH_TSC BIT(1)
+#define TIME_BENCH_WALLCLOCK BIT(2)
+#define TIME_BENCH_PMU BIT(3)
+
+ uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+ /* Records */
+ uint64_t invoked_cnt; /* Returned actual invocations */
+ uint64_t tsc_start;
+ uint64_t tsc_stop;
+ struct timespec64 ts_start;
+ struct timespec64 ts_stop;
+ /* PMU counters for instruction and cycles
+ * instructions counter including pipelined instructions
+ */
+ uint64_t pmc_inst_start;
+ uint64_t pmc_inst_stop;
+ /* CPU unhalted clock counter */
+ uint64_t pmc_clk_start;
+ uint64_t pmc_clk_stop;
+
+ /* Result records */
+ uint64_t tsc_interval;
+ uint64_t time_start, time_stop, time_interval; /* in nanosec */
+ uint64_t pmc_inst, pmc_clk;
+
+ /* Derived result records */
+ uint64_t tsc_cycles; // +decimal?
+ uint64_t ns_per_call_quotient, ns_per_call_decimal;
+ uint64_t time_sec;
+ uint32_t time_sec_remainder;
+ uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+ atomic_t nr_tests_running;
+ struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+ struct time_bench_record rec;
+ struct time_bench_sync *sync; /* back ptr */
+ struct task_struct *task;
+ /* "data" opaque could have been placed in time_bench_sync,
+ * but to avoid any false sharing, place it per CPU
+ */
+ void *data;
+ /* Support masking outsome CPUs, mark if it ran */
+ bool did_bench_run;
+ /* int cpu; // note CPU stored in time_bench_record */
+ int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ * unsigned long flags;
+ * preempt_disable();
+ * raw_local_irq_save(flags);
+ * _your_code_
+ * raw_local_irq_restore(flags);
+ * preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ * RDTSC only change "%rax" and "%rdx" but
+ * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("CPUID\n\t"
+ "RDTSC\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ //FIXME: on 32bit use clobbered %eax + %edx
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("RDTSCP\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ "CPUID\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ * getnstimeofday(&rec->ts_start);
+ * getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ * replacement, but consider using monotonic time (ktime_get_ts64())
+ * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+ FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+ FIXED_INST_RETIRED_ANY = 0,
+ FIXED_CPU_CLK_UNHALTED_CORE = 1,
+ FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+ unsigned int d, a;
+
+ asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+ return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented. My current hack is running:
+ * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+ return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+ //getnstimeofday(&rec->ts_start);
+ ktime_get_real_ts64(&rec->ts_start);
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_start = pmc_inst();
+ rec->pmc_clk_start = pmc_clk();
+ }
+ rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+ uint64_t invoked_cnt)
+{
+ rec->tsc_stop = tsc_stop_clock();
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_stop = pmc_inst();
+ rec->pmc_clk_stop = pmc_clk();
+ }
+ //getnstimeofday(&rec->ts_stop);
+ ktime_get_real_ts64(&rec->ts_stop);
+ rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+ rmmod "bench_page_pool.ko" || true
+ insmod $DRIVER > /dev/null 2>&1
+ result=$(dmesg | tail -10)
+ echo "$result"
+
+ echo
+ echo "Fast path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "ptr_ring results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "slow path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+ ip netns add "${CLIENT_NS}"
+ ip netns add "${SERVER_NS}"
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" link set link0 mtu 9000
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" link set link1 mtu 1500
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+ read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+ ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+ ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+ ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ ip netns del "${CLIENT_NS}"
+ ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+ echo "Testing for broadcast route MTU" &&
+ ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3cfef5153823..c24417d0047b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -30,16 +30,25 @@ CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_SEG6_LWTUNNEL=y
@@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 00bde7b6f39e..d7bb2e80e88c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_mc_ul.sh \
vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 508f3c700d71..890b3374dacd 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
: "${TEAMD:=teamd}"
: "${MCD:=smcrouted}"
: "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
# Constants for netdevice bring-up:
# Default time in seconds to wait for an interface to come up before giving up
@@ -141,6 +142,20 @@ check_tc_version()
fi
}
+check_tc_erspan_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing erspan support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+}
+
# Old versions of tc don't understand "mpls_uc"
check_tc_mpls_support()
{
@@ -525,9 +540,9 @@ setup_wait_dev_with_timeout()
return 1
}
-setup_wait()
+setup_wait_n()
{
- local num_netifs=${1:-$NUM_NETIFS}
+ local num_netifs=$1; shift
local i
for ((i = 1; i <= num_netifs; ++i)); do
@@ -538,6 +553,11 @@ setup_wait()
sleep $WAIT_TIME
}
+setup_wait()
+{
+ setup_wait_n "$NUM_NETIFS"
+}
+
wait_for_dev()
{
local dev=$1; shift
@@ -1757,6 +1777,51 @@ mc_send()
msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
}
+adf_mcd_start()
+{
+ local ifs=("$@")
+
+ local table_name="$MCD_TABLE_NAME"
+ local smcroutedir
+ local pid
+ local if
+ local i
+
+ check_command "$MCD" || return 1
+ check_command "$MC_CLI" || return 1
+
+ smcroutedir=$(mktemp -d)
+ defer rm -rf "$smcroutedir"
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ for if in "${ifs[@]}"; do
+ if ! ip_link_has_flag "$if" MULTICAST; then
+ ip link set dev "$if" multicast on
+ defer ip link set dev "$if" multicast off
+ fi
+
+ echo "phyint $if enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+ -P "$smcroutedir/$table_name.pid"
+ busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+ pid=$(cat "$smcroutedir/$table_name.pid")
+ defer kill_process "$pid"
+}
+
+mc_cli()
+{
+ local table_name="$MCD_TABLE_NAME"
+
+ "$MC_CLI" -I "$table_name" "$@"
+}
+
start_ip_monitor()
{
local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
source lib.sh
source tc_common.sh
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
h1_create()
{
simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
ip link set dev $rp1 down
}
-start_mcd()
-{
- SMCROUTEDIR="$(mktemp -d)"
-
- for ((i = 1; i <= $NUM_NETIFS; ++i)); do
- echo "phyint ${NETIFS[p$i]} enable" >> \
- $SMCROUTEDIR/$table_name.conf
- done
-
- $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
- -P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
- pkill $MCD
- rm -rf $SMCROUTEDIR
-}
-
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
rp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
- start_mcd
+ adf_mcd_start || exit "$EXIT_STATUS"
vrf_prepare
@@ -206,7 +183,7 @@ cleanup()
vrf_cleanup
- kill_mcd
+ defer_scopes_cleanup
}
create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+ mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+ mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_ip_tos_test match_indev_test match_ip_ttl_test
match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
- match_mpls_lse_test"
+ match_mpls_lse_test match_erspan_opts_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
log_test "mpls lse match ($tcflags)"
}
+match_erspan_opts_test()
+{
+ RET=0
+
+ check_tc_erspan_support $h2 || return 0
+
+ # h1 erspan setup
+ tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+ tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+ tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+ tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+ # ERSPAN Type III
+ ip link set dev erspan1 master v$h1
+ ip link set dev erspan2 master v$h1
+ # h2 erspan setup
+ ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+ ip link set ep-ex up
+ ip link set dev ep-ex master v$h2
+ tc qdisc add dev ep-ex clsact
+
+ # ERSPAN Type II [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 101 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1001 erspan_opts 1:6789:0:0 \
+ action drop
+ # ERSPAN Type III [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 102 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+ ep1mac=$(mac_get erspan1)
+ $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 101 1
+ check_err $? "ERSPAN Type II"
+
+ ep2mac=$(mac_get erspan2)
+ $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 102 1
+ check_err $? "ERSPAN Type III"
+
+ # h2 erspan cleanup
+ tc qdisc del dev ep-ex clsact
+ tunnel_destroy ep-ex
+ # h1 erspan cleanup
+ tunnel_destroy erspan2 # ERSPAN Type III
+ tunnel_destroy erspan1 # ERSPAN Type II
+
+ log_test "erspan_opts match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..462db0b603e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10 + $h1.20 |
+# | | 192.0.2.1/28 | 2001:db8:1::1/64 |
+# | \________ ________/ |
+# | \ / |
+# | + $h1 H1 (vrf) |
+# +-----------|-----------------------------+
+# |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+ SWITCH (main vrf) |
+# | | + $swp1 BR1 (802.1q) | |
+# | | vid 10 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) |
+# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 |
+# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 |
+# | | id 1000 id 2000 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +------------------------------------------------+ |
+# | |
+# | + $swp2 $swp3 + |
+# | | 192.0.2.33/28 192.0.2.65/28 | |
+# | | 2001:db8:2::1/64 2001:db8:3::1/64 | |
+# | | | |
+# +---|--------------------------------------------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | | H2 (vrf) | | H3 (vrf) | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | |
+# | | | | | | | |
+# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | | | | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + |
+# | 192.0.2.34/28 | | 192.0.2.66/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 |
+# | | | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# | | BR1 (802.1q) | | | | BR1 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | |
+# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 20 | | | | | vid 10 20 | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | | | | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# +------------------------------------+ +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+ ipv4_nomcroute
+ ipv4_mcroute
+ ipv4_mcroute_changelink
+ ipv4_mcroute_starg
+ ipv4_mcroute_noroute
+ ipv4_mcroute_fdb
+ ipv4_mcroute_fdb_oif0
+ ipv4_mcroute_fdb_oif0_sep
+
+ ipv6_nomcroute
+ ipv6_mcroute
+ ipv6_mcroute_changelink
+ ipv6_mcroute_starg
+ ipv6_mcroute_noroute
+ ipv6_mcroute_fdb
+ ipv6_mcroute_fdb_oif0
+
+ ipv4_nomcroute_rx
+ ipv4_mcroute_rx
+ ipv4_mcroute_starg_rx
+ ipv4_mcroute_fdb_oif0_sep_rx
+ ipv4_mcroute_fdb_sep_rx
+
+ ipv6_nomcroute_rx
+ ipv6_mcroute_rx
+ ipv6_mcroute_starg_rx
+ ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init "$h1"
+ defer simple_if_fini "$h1"
+
+ ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+ ip_link_set_up "$h1.10"
+ ip_addr_add "$h1.10" 192.0.2.1/28
+
+ ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+ ip_link_set_up "$h1.20"
+ ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev "$dev" clsact
+ defer tc qdisc del dev "$dev" clsact
+
+ tc filter add dev "$dev" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ip pref 104
+
+ tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+ # $h2
+ ip_link_set_up "$h2"
+
+ # H2
+ vrf_create "v$h2"
+ defer vrf_destroy "v$h2"
+
+ ip_link_set_up "v$h2"
+
+ # br2
+ ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br2 "v$h2"
+ ip_link_set_up br2
+
+ # $h2
+ ip_link_set_master "$h2" br2
+ install_capture "$h2"
+
+ # v1$h2
+ ip_link_set_up "v1$h2"
+ ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+ # $h3
+ ip_link_set_up "$h3"
+
+ # H3
+ vrf_create "v$h3"
+ defer vrf_destroy "v$h3"
+
+ ip_link_set_up "v$h3"
+
+ # br3
+ ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br3 "v$h3"
+ ip_link_set_up br3
+
+ # $h3
+ ip_link_set_master "$h3" br3
+ install_capture "$h3"
+
+ # v1$h3
+ ip_link_set_up "v1$h3"
+ ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+ local swp1_mac
+
+ # br1
+ swp1_mac=$(mac_get "$swp1")
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_addr br1 "$swp1_mac"
+ ip_link_set_up br1
+
+ # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+ # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+ ip_link_add "X$IPMR" up type dummy
+
+ # IPMR
+ ip_link_add "$IPMR" up type dummy
+ ip_addr_add "$IPMR" 192.0.2.100/28
+ ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+ # $swp1
+ ip_link_set_up "$swp1"
+ ip_link_set_master "$swp1" br1
+ bridge_vlan_add vid 10 dev "$swp1"
+ bridge_vlan_add vid 20 dev "$swp1"
+
+ # $swp2
+ ip_link_set_up "$swp2"
+ ip_addr_add "$swp2" 192.0.2.33/28
+ ip_addr_add "$swp2" 2001:db8:2::1/64
+
+ # $swp3
+ ip_link_set_up "$swp3"
+ ip_addr_add "$swp3" 192.0.2.65/28
+ ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+ local name=$1; shift
+ local vid=$1; shift
+
+ ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 16 \
+ "$@"
+ ip_link_set_master "$name" br1
+ bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+ # Wait for all the ARP, IGMP etc. noise to settle down so that the
+ # tunnel is clear for measurements.
+ sleep 10
+}
+
+vx10_create()
+{
+ vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+ vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+ vx10_create "$@"
+ vx_wait
+}
+
+vx20_create_wait()
+{
+ vx20_create "$@"
+ vx_wait
+}
+
+ns_init_common()
+{
+ local ns=$1; shift
+ local if_in=$1; shift
+ local ipv4_in=$1; shift
+ local ipv6_in=$1; shift
+ local ipv4_host=$1; shift
+ local ipv6_host=$1; shift
+
+ # v2$h2 / v2$h3
+ ip_link_set_up "$if_in"
+ ip_addr_add "$if_in" "$ipv4_in"
+ ip_addr_add "$if_in" "$ipv6_in"
+
+ # br1
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_up br1
+
+ # vx10, vx20
+ vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+ vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+ # w1
+ ip_link_add w1 type veth peer name w2
+ ip_link_set_master w1 br1
+ ip_link_set_up w1
+ bridge_vlan_add vid 10 dev w1
+ bridge_vlan_add vid 20 dev w1
+
+ # w2
+ simple_if_init w2
+ defer simple_if_fini w2
+
+ # w2.10
+ ip_link_add w2.10 master vw2 link w2 type vlan id 10
+ ip_link_set_up w2.10
+ ip_addr_add w2.10 "$ipv4_host"
+
+ # w2.20
+ ip_link_add w2.20 master vw2 link w2 type vlan id 20
+ ip_link_set_up w2.20
+ ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+ # NS2
+ ip netns add ns2
+ defer ip netns del ns2
+
+ # v2$h2
+ ip link set dev "v2$h2" netns ns2
+ defer ip -n ns2 link set dev "v2$h2" netns 1
+
+ in_ns ns2 \
+ ns_init_common ns2 "v2$h2" \
+ 192.0.2.34/28 2001:db8:2::2/64 \
+ 192.0.2.3/28 2001:db8:1::3/64
+}
+
+ns3_create()
+{
+ # NS3
+ ip netns add ns3
+ defer ip netns del ns3
+
+ # v2$h3
+ ip link set dev "v2$h3" netns ns3
+ defer ip -n ns3 link set dev "v2$h3" netns 1
+
+ ip -n ns3 link set dev "v2$h3" up
+
+ in_ns ns3 \
+ ns_init_common ns3 "v2$h3" \
+ 192.0.2.66/28 2001:db8:3::2/64 \
+ 192.0.2.4/28 2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ forwarding_enable
+ defer forwarding_restore
+
+ ip_link_add "v1$h2" type veth peer name "v2$h2"
+ ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+ ns2_create
+ ns3_create
+}
+
+adf_install_broken_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+ mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+ mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+ mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+ adf_mcd_start lo || exit "$EXIT_STATUS"
+
+ mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+ local lo=$1; shift
+
+ adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+ mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_starg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+do_packets_v4()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+ local ipv=$1; shift
+ local expect_h2=$1; shift
+ local expect_h3=$1; shift
+ local what=$1; shift
+
+ local pref=$((100 + ipv))
+ local t0_h2
+ local t0_h3
+ local t1_h2
+ local t1_h3
+ local d_h2
+ local d_h3
+
+ RET=0
+
+ t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ "do_packets_v$ipv"
+ sleep 1
+
+ t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ d_h2=$((t1_h2 - t0_h2))
+ d_h3=$((t1_h3 - t0_h3))
+
+ ((d_h2 == expect_h2))
+ check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+ ((d_h3 == expect_h3))
+ check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping_do "$h1.10" 192.0.2.3
+ check_err $? "H2 should respond"
+
+ ping_do "$h1.10" 192.0.2.4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping6_do "$h1.20" 2001:db8:1::3
+ check_err $? "H2 should respond"
+
+ ping6_do "$h1.20" 2001:db8:1::4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+ # Install a misleading (S,G) rule to attempt to trick the system into
+ # pushing the packets elsewhere.
+ adf_install_broken_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+ # Like for IPv4, install a misleading (S,G).
+ adf_install_broken_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ ip link set dev vx10 type vxlan mcroute
+ sleep 1
+ do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ip link set dev vx20 type vxlan mcroute
+ sleep 1
+ do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ bridge fdb add dev vx10 \
+ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+ do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ bridge -6 fdb add dev vx20 \
+ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+ do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+ # The IPv6 tunnel lookup does not fall back to selection by source
+ # address. Instead it just does a FIB match, and that would find one of
+ # the several ff00::/8 multicast routes -- each device has one. In order
+ # to reliably force the $IPMR device, add a /128 route for the
+ # destination group address.
+ ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+ defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+ do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+ adf_install_sg_sep
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add \
+ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx "X$IPMR"
+
+ ip_addr_add "X$IPMR" 2001:db8:5::1/64
+ vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+ self static dst "$GROUP6" via "X$IPMR"
+ ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+# regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+ # Create three namespaces: sender, router, receiver
+ setup_ns ns1 ns2 ns3
+
+ # Create veth pairs: ns1 <-> ns2 <-> ns3
+ ip link add name veth12 type veth peer name veth21
+ ip link add name veth23 type veth peer name veth32
+
+ # Move interfaces to namespaces
+ ip link set veth12 netns $ns1
+ ip link set veth21 netns $ns2
+ ip link set veth23 netns $ns2
+ ip link set veth32 netns $ns3
+
+ # Configure interfaces
+ ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+ ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+ ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+ ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+ # Bring up interfaces
+ ip -n $ns1 link set veth12 up
+ ip -n $ns2 link set veth21 up
+ ip -n $ns2 link set veth23 up
+ ip -n $ns3 link set veth32 up
+
+ # Add routes
+ ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+ # Disable global forwarding
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+ local ret=0
+
+ echo "TEST: force_forwarding functionality"
+
+ # Check if force_forwarding sysctl exists
+ if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+ echo "SKIP: force_forwarding not available"
+ return $ksft_skip
+ fi
+
+ # Test 1: Without force_forwarding, ping should fail
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "FAIL: ping succeeded when forwarding disabled"
+ ret=1
+ else
+ echo "PASS: forwarding disabled correctly"
+ fi
+
+ # Test 2: With force_forwarding enabled, ping should succeed
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "PASS: force_forwarding enabled forwarding"
+ else
+ echo "FAIL: ping failed with force_forwarding enabled"
+ ret=1
+ fi
+
+ return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+ echo "OK"
+ exit 0
+elif [ $ret -eq $ksft_skip ]; then
+ echo "SKIP"
+ exit $ksft_skip
+else
+ echo "FAIL"
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 86a216e9aca8..c7add0dc4c60 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -240,6 +240,29 @@ create_netdevsim() {
echo nsim$id
}
+create_netdevsim_port() {
+ local nsim_id="$1"
+ local ns="$2"
+ local port_id="$3"
+ local perm_addr="$4"
+ local orig_dev
+ local new_dev
+ local nsim_path
+
+ nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+ echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+ orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+ orig_dev=$(basename "$orig_dev")
+ new_dev="nsim${nsim_id}p$port_id"
+
+ ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+ ip -netns "$ns" link set dev "$new_dev" up
+
+ echo "$new_dev"
+}
+
# Remove netdevsim with given id.
cleanup_netdevsim() {
local id="$1"
@@ -547,13 +570,19 @@ ip_link_set_addr()
defer ip link set dev "$name" address "$old_addr"
}
-ip_link_is_up()
+ip_link_has_flag()
{
local name=$1; shift
+ local flag=$1; shift
local state=$(ip -j link show "$name" |
- jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
- [[ $state == "UP" ]]
+ jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ [[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+ ip_link_has_flag "$1" UP
}
ip_link_set_up()
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 8697bd27dc30..02be28dcc089 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter
from .nsim import *
from .utils import *
from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
-from .ynl import NetshaperFamily
+from .ynl import NetshaperFamily, DevlinkFamily
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 61287c203b6e..8e35ed12ed9e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ kwargs["flush"] = True
print("#", *objs, **kwargs)
@@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "." + str(case.__name__)
if comment:
res += " # " + comment
- print(res)
+ print(res, flush=True)
def ksft_flush_defer():
@@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("TAP version 13")
- print("1.." + str(len(cases)))
+ print("TAP version 13", flush=True)
+ print("1.." + str(len(cases)), flush=True)
global KSFT_RESULT
cnt = 0
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 34470d65d871..f395c90fb0f1 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None):
return cmd_obj
+def bpftool(args, json=None, ns=None, host=None):
+ return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
@@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
+ """
+ Run bpftrace and return map data (if json=True).
+ The output of bpftrace is inconvenient, so the helper converts
+ to a dict indexed by map name, e.g.:
+ {
+ "@": { ... },
+ "@map2": { ... },
+ }
+ """
+ cmd_arr = ['bpftrace']
+ # Throw in --quiet if json, otherwise the output has two objects
+ if json:
+ cmd_arr += ['-f', 'json', '-q']
+ if timeout:
+ expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+ cmd_arr += ['-e', expr]
+ cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False)
+ if json:
+ # bpftrace prints objects as lines
+ ret = {}
+ for l in cmd_obj.stdout.split('\n'):
+ if not l.strip():
+ continue
+ one = _json.loads(l)
+ if one.get('type') != 'map':
+ continue
+ for k, v in one["data"].items():
+ if k.startswith('@'):
+ k = k.lstrip('@')
+ ret[k] = v
+ return ret
+ return cmd_obj
+
+
def rand_port(type=socket.SOCK_STREAM):
"""
Get a random unprivileged port.
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 6329ae805abf..2b3a61ea3bfa 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily):
def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
schema='', recv_size=recv_size)
+
+class DevlinkFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..521ba38f2ddd
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+enum {
+ XDP_MODE = 0,
+ XDP_PORT = 1,
+ XDP_ADJST_OFFSET = 2,
+ XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+ XDP_MODE_PASS = 0,
+ XDP_MODE_DROP = 1,
+ XDP_MODE_TX = 2,
+ XDP_MODE_TAIL_ADJST = 3,
+ XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+ STATS_RX = 0,
+ STATS_PASS = 1,
+ STATS_DROP = 2,
+ STATS_TX = 3,
+ STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+ return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+ __u64 *count;
+
+ count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+ if (count)
+ __sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ } else {
+ return NULL;
+ }
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ if (udph->dest != bpf_htons(port))
+ return NULL;
+
+ record_stats(ctx, STATS_RX);
+
+ return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_PASS);
+
+ return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_DROP);
+
+ return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+ struct ethhdr *eth = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+ __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_PASS;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __be32 tmp_ip = iph->saddr;
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ iph->saddr = iph->daddr;
+ iph->daddr = tmp_ip;
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ struct in6_addr tmp_ipv6;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+ sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+ }
+
+ return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+ __u32 len, len_new;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __u16 total_len;
+
+ if (iph + 1 > (struct iphdr *)data_end)
+ return NULL;
+
+ iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ __u16 payload_len;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ *udp_csum = ~((__u32)udph->check);
+
+ len = ipv6h->payload_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ ipv6h->payload_len = len_new;
+
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ } else {
+ return NULL;
+ }
+
+ udph->len = len_new;
+
+ return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+ __u32 hdr_len)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ struct udphdr *udph = NULL;
+ __u32 buff_len;
+
+ udph = update_pkt(ctx, 0 - offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ /* Make sure we have enough data to avoid eating the header */
+ if (buff_len - offset < hdr_len)
+ return -1;
+
+ buff_pos = buff_len - offset;
+ if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ __u32 buff_len, hdr_len, key;
+ struct udphdr *udph;
+ __s32 *val;
+ __u8 tag;
+
+ /* Proceed to update the packet headers before attempting to adjuste
+ * the tail. Once the tail is adjusted we lose access to the offset
+ * amount of data at the end of the packet which is crucial to update
+ * the checksum.
+ * Since any failure beyond this would abort the packet, we should
+ * not worry about passing a packet up the stack with wrong headers
+ */
+ udph = update_pkt(ctx, offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+ bpf_printk("Failed to adjust tail\n");
+ return -1;
+ }
+
+ if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ __s32 *adjust_offset, *val;
+ __u32 key, hdr_len;
+ void *offset_ptr;
+ __u8 tag;
+ int ret;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph - data + sizeof(struct udphdr);
+ key = XDP_ADJST_OFFSET;
+ adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!adjust_offset)
+ return XDP_PASS;
+
+ if (*adjust_offset < 0)
+ ret = xdp_adjst_tail_shrnk_data(ctx,
+ (__u16)(0 - *adjust_offset),
+ hdr_len);
+ else
+ ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+ if (ret)
+ goto abort_pkt;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort_pkt:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ struct udphdr *udph;
+ void *offset_ptr;
+ __u32 udp_csum = 0;
+
+ /* Update the length information in the IP and UDP headers before
+ * adjusting the headroom. This simplifies accessing the relevant
+ * fields in the IP and UDP headers for fragmented packets. Any
+ * failure beyond this point will result in the packet being aborted,
+ * so we don't need to worry about incorrect length information for
+ * passed packets.
+ */
+ udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, offset) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char hdr_buff[MAX_HDR_LEN];
+ char data_buff[MAX_ADJST_OFFSET];
+ void *offset_ptr;
+ __s32 *val;
+ __u32 key;
+ __u8 tag;
+ __u32 udp_csum = 0;
+ struct udphdr *udph;
+
+ udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&data_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph_ptr = NULL;
+ __u32 key, size, hdr_len;
+ __s32 *val;
+ int res;
+
+ /* Filter packets based on UDP port */
+ udph_ptr = filter_udphdr(ctx, port);
+ if (!udph_ptr)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr);
+
+ key = XDP_ADJST_OFFSET;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return XDP_PASS;
+
+ switch (*val) {
+ case -16:
+ case 16:
+ size = 16;
+ break;
+ case -32:
+ case 32:
+ size = 32;
+ break;
+ case -64:
+ case 64:
+ size = 64;
+ break;
+ case -128:
+ case 128:
+ size = 128;
+ break;
+ case -256:
+ case 256:
+ size = 256;
+ break;
+ default:
+ bpf_printk("Invalid adjustment offset: %d\n", *val);
+ goto abort;
+ }
+
+ if (*val < 0)
+ res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+ else
+ res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+ if (res)
+ goto abort;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+ __u32 key, *port;
+ __s32 *mode;
+
+ key = XDP_MODE;
+ mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!mode)
+ return XDP_PASS;
+
+ key = XDP_PORT;
+ port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!port)
+ return XDP_PASS;
+
+ switch (*mode) {
+ case XDP_MODE_PASS:
+ return xdp_mode_pass(ctx, (__u16)(*port));
+ case XDP_MODE_DROP:
+ return xdp_mode_drop_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TX:
+ return xdp_mode_tx_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TAIL_ADJST:
+ return xdp_adjst_tail(ctx, (__u16)(*port));
+ case XDP_MODE_HEAD_ADJST:
+ return xdp_head_adjst(ctx, (__u16)(*port));
+ }
+
+ /* Default action is to simple pass */
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index e47788bfa671..4c7e51336ab2 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -4,7 +4,8 @@ top_srcdir = ../../../../..
CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \
+ mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..968d440c03fe 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m
CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
@@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
new file mode 100755
index 000000000000..ce93ec2f107f
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -C "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
new file mode 100755
index 000000000000..5dd30f9394af
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
new file mode 100755
index 000000000000..1d16fb1cc9bb
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}"
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_expect_zerocopy = -1;
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
+static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
-static int zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
- if (zerocopied == -1)
- zerocopied = zerocopy;
- else if (zerocopied != zerocopy) {
- fprintf(stderr, "serr: inconsistent\n");
- zerocopied = zerocopy;
+ if (cfg_expect_zerocopy != -1 &&
+ cfg_expect_zerocopy != zerocopy) {
+ fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+ zerocopy, cfg_expect_zerocopy);
+ exitcode = 1;
+ /* suppress repeated messages */
+ cfg_expect_zerocopy = zerocopy;
}
if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
- zerocopied == 1 ? 'y' : 'n');
+ cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ case 'Z':
+ cfg_expect_zerocopy = !!atoi(optarg);
+ break;
}
}
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
else
error(1, 0, "unknown cfg_test %s", cfg_test);
- return 0;
+ return exitcode;
}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
set -e
readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
- $0 4 tcp -t 1
- $0 6 tcp -t 1
- $0 4 udp -t 1
- $0 6 udp -t 1
- echo "OK. All tests passed"
- exit 0
+ ret=0
+
+ $0 4 tcp -t 1 || ret=1
+ $0 6 tcp -t 1 || ret=1
+ $0 4 udp -t 1 || ret=1
+ $0 6 udp -t 1 || ret=1
+
+ [[ "$ret" == "0" ]] && echo "OK. All tests passed"
+ exit $ret
fi
# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
- readonly SADDR="${SADDR4}"
- readonly DADDR="${DADDR4}"
+ readonly SADDR="${LPREFIX4}.1"
+ readonly DADDR="${LPREFIX4}.2"
+ readonly DUMMY_ADDR="${RPREFIX4}.1"
+ readonly DADDR_TXONLY="${RPREFIX4}.2"
+ readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
- readonly SADDR="${SADDR6}"
- readonly DADDR="${DADDR6}"
+ readonly SADDR="${LPREFIX6}::1"
+ readonly DADDR="${LPREFIX6}::2"
+ readonly DUMMY_ADDR="${RPREFIX6}::1"
+ readonly DADDR_TXONLY="${RPREFIX6}::2"
+ readonly MASK="64"
+ readonly NODAD="nodad"
else
echo "Invalid IP version ${IP}"
exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+ret=0
+
do_test() {
local readonly ARGS="$1"
- echo "ipv${IP} ${TXMODE} ${ARGS}"
- ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ # tx-rx test
+ # packets queued to a local socket are copied,
+ # sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
- ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
wait
+
+ # next test is unconnected tx to dummy0, cannot exercise with tcp
+ [[ "${TXMODE}" == "tcp" ]] && return
+
+ # tx-only test: send out dummy0
+ # packets leaving the host are not copied,
+ # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_netdevsim "$NSIM_ADDR"
+ cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+get_addr()
+{
+ local type="$1"
+ local dev="$2"
+ local ns="$3"
+
+ ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+ fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 363646f4fefe..79d5b33966ba 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -1,6 +1,8 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
@@ -14,7 +16,10 @@ CONFIG_INET_ESP=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
@@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
CONFIG_INET_DIAG=m
-CONFIG_SCTP_DIAG=m
+CONFIG_INET_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
index 3712c1b9b38b..606a43a60f73 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -93,32 +93,28 @@ ping_test()
run_one_clash_test()
{
local ns="$1"
- local daddr="$2"
- local dport="$3"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
local entries
local cre
if ! ip netns exec "$ns" ./udpclash $daddr $dport;then
- echo "FAIL: did not receive expected number of replies for $daddr:$dport"
- ret=1
- return 1
+ echo "INFO: did not receive expected number of replies for $daddr:$dport"
+ ip netns exec "$ctns" conntrack -S
+ # don't fail: check if clash resolution triggered after all.
fi
- entries=$(conntrack -S | wc -l)
- cre=$(conntrack -S | grep -v "clash_resolve=0" | wc -l)
+ entries=$(ip netns exec "$ctns" conntrack -S | wc -l)
+ cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l)
- if [ "$cre" -ne "$entries" ] ;then
+ if [ "$cre" -ne "$entries" ];then
clash_resolution_active=1
return 0
fi
- # 1 cpu -> parallel insertion impossible
- if [ "$entries" -eq 1 ]; then
- return 0
- fi
-
- # not a failure: clash resolution logic did not trigger, but all replies
- # were received. With right timing, xmit completed sequentially and
+ # not a failure: clash resolution logic did not trigger.
+ # With right timing, xmit completed sequentially and
# no parallel insertion occurs.
return $ksft_skip
}
@@ -126,20 +122,23 @@ run_one_clash_test()
run_clash_test()
{
local ns="$1"
- local daddr="$2"
- local dport="$3"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local softerr=0
for i in $(seq 1 10);do
- run_one_clash_test "$ns" "$daddr" "$dport"
+ run_one_clash_test "$ns" "$ctns" "$daddr" "$dport"
local rv=$?
if [ $rv -eq 0 ];then
echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
return 0
- elif [ $rv -eq 1 ];then
- echo "FAIL: clash resolution test for $daddr:$dport on attempt $i"
- return 1
+ elif [ $rv -eq $ksft_skip ]; then
+ softerr=1
fi
done
+
+ [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
}
ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
@@ -161,11 +160,11 @@ spawn_servers "$nsclient2"
# exercise clash resolution with nat:
# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
-run_clash_test "$nsclient1" 10.0.1.99 "$dport"
+run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport"
# exercise clash resolution without nat.
load_simple_ruleset "$nsclient2"
-run_clash_test "$nsclient2" 127.0.0.1 9001
+run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
if [ $clash_resolution_active -eq 0 ];then
[ "$ret" -eq 0 ] && ret=$ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index 6af2ea3ad6b8..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -151,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
- ip netns exec "${ns1}" modprobe -q ipip
+ modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -160,10 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
- ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
index 5ff7be9daeee..c0fffaa6dbd9 100755
--- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -10,6 +10,8 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
checktool "iperf3 --version" "run test without iperf3 tool"
+read kernel_tainted < /proc/sys/kernel/tainted
+
# how many seconds to torture the kernel?
# default to 80% of max run time but don't exceed 48s
TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
@@ -135,7 +137,8 @@ else
wait
fi
-[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
echo "FAIL: Kernel is tainted!"
exit $ksft_fail
}
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
- log_err_errno("setsockopt(SO_BINDTODEVICE)");
+ log_err_errno("getsockopt(SO_BINDTODEVICE)");
return rc;
}
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
break;
case AF_INET6:
if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
- log_err_errno("setsockopt(IPV6_FREEBIND");
+ log_err_errno("setsockopt(IPV6_FREEBIND)");
rc = -1;
}
break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
sep++;
if (str_to_uint(sep, 1, pfx_len_max,
&args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
+ fprintf(stderr, "Invalid prefix length\n");
return 1;
}
} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
}
}
- nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
while (1) {
FD_ZERO(&rfds);
FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
sd = socket(args->version, args->type, args->protocol);
if (sd < 0) {
log_err_errno("Error opening socket");
- return -1;
+ return -1;
}
if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
* waiting to be told when to continue
*/
if (read(fd, &buf, sizeof(buf)) <= 0) {
- log_err_errno("Failed to read IPC status from status");
+ log_err_errno("Failed to read IPC status from pipe");
return 1;
}
if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index beaee5e4e2aa..5c66421ab8aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -34,6 +35,128 @@ def napi_list_check(nf) -> None:
ksft_eq(len(napis), 100,
comment=f"queue count after reset queue {q} mode {i}")
+def napi_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at napi and device level.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ ip(f"link set dev {nsim.ifname} down")
+ ip(f"link set dev {nsim.ifname} up")
+
+ # verify if napi threaded is still set
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is still not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # unset napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+
+ # set threaded at device level
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # set napi threaded for napi0
+ nf.napi_set({'id': napi0_id, 'threaded': 1})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
def nsim_rxq_reset_down(nf) -> None:
"""
@@ -122,7 +245,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
- nsim_rxq_reset_down],
+ dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index ef8b25a606d8..c5b01e1bd4c7 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
# xfail tests that are known flaky with dbg config, not fixable.
# still run them for coverage (and expect 100% pass without dbg).
declare -ar xfail_list=(
+ "tcp_blocking_blocking-connect.pkt"
+ "tcp_blocking_blocking-read.pkt"
"tcp_eor_no-coalesce-retrans.pkt"
"tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_sack_sack-route-refresh-ip-tos.pkt"
"tcp_slow_start_slow-start-after-win-update.pkt"
"tcp_timestamping.*.pkt"
"tcp_user_timeout_user-timeout-probe.pkt"
+ "tcp_zerocopy_cl.*.pkt"
"tcp_zerocopy_epoll_.*.pkt"
"tcp_tcp_info_tcp-info-.*-limited.pkt"
)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
index 914eabab367a..657e42ca65b5 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test for blocking read.
+
--tolerance_usecs=10000
+--mss=1000
`./defaults.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+ +0 < P. 1001:2001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+ +0 < P. 2001:3001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+ +0 < P. 4001:5001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+ +0 < P. 7001:8001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+ +0 < P. 1:6001(6000) ack 1 win 1024
+ +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+ +0 < P. 6001:8001(2000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+ +0 < P. 1:1001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
index df49c67645ac..e13f0eee9795 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
index 04a5e2590c62..14dd5f813d50 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:11001(9000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+ +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+ +0 < . 11001:21001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..3848b419e68c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:4001(4000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +1 < P. 5001:55001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 70001:80001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+ +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..f575c0ff89da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+// A too big packet is accepted if the receive queue is empty
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 80001 win 0
+
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 2e8243a65b50..d6c00efeb664 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -21,6 +21,7 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_macsec_vlan
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
@@ -30,6 +31,7 @@ ALL_TESTS="
kci_test_address_proto
kci_test_enslave_bonding
kci_test_mngtmpaddr
+ kci_test_operstate
"
devdummy="test-dummy0"
@@ -291,6 +293,17 @@ kci_test_route_get()
end_test "PASS: route get"
}
+check_addr_not_exist()
+{
+ dev=$1
+ addr=$2
+ if ip addr show dev $dev | grep -q $addr; then
+ return 1
+ else
+ return 0
+ fi
+}
+
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
@@ -298,9 +311,8 @@ kci_test_addrlft()
run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
- sleep 5
- run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
- if [ $? -eq 0 ]; then
+ slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+ if [ $? -eq 1 ]; then
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -561,6 +573,41 @@ kci_test_macsec()
end_test "PASS: macsec"
}
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/
+kci_test_macsec_vlan()
+{
+ msname="test_macsec1"
+ vlanname="test_vlan1"
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ run_cmd ip link set dev "$msname" up
+ ip link add link "$msname" name "$vlanname" type vlan id 1
+ ip link set dev "$vlanname" address 00:11:22:33:44:88
+ ip link set dev "$vlanname" up
+ run_cmd ip link del dev "$vlanname"
+ run_cmd ip link del dev "$msname"
+
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: macsec_vlan"
+ return 1
+ fi
+
+ end_test "PASS: macsec_vlan"
+}
+
#-------------------------------------------------------------------
# Example commands
# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
@@ -673,6 +720,11 @@ kci_test_ipsec_offload()
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ esp4_offload_probed_default=false
+
+ if lsmod | grep -q esp4_offload; then
+ esp4_offload_probed_default=true
+ fi
if ! mount | grep -q debugfs; then
mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -766,6 +818,7 @@ EOF
fi
# clean up any leftovers
+ ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
@@ -1334,6 +1387,39 @@ kci_test_mngtmpaddr()
return $ret
}
+kci_test_operstate()
+{
+ local ret=0
+
+ # Check that it is possible to set operational state during device
+ # creation and that it is preserved when the administrative state of
+ # the device is toggled.
+ run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UP" ip link show dev vx0
+ run_cmd ip link set dev vx0 down
+ run_cmd_grep "state DOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ # Check that it is possible to set the operational state of the device
+ # after creation.
+ run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UNKNOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 state up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ if [ "$ret" -ne 0 ]; then
+ end_test "FAIL: operstate"
+ return 1
+ fi
+
+ end_test "PASS: operstate"
+}
+
kci_test_rtnl()
{
local current_test
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+ kci_test_mcast_addr_notification
+ kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor maddr > $tmpfile &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "mcast addr notification: iproute2 too old"
+ return $RET
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 4 line matches as follows.
+ # 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ # 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+ if [ "$match_result" -ne 4 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "mcast addr notification: Expected 4 matches, got $match_result"
+ return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor acaddress > "$tmpfile" &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "anycast addr notification: iproute2 too old"
+ return "$RET"
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 2 line matches as follows.
+ # 9: dummy2 inet6 any fe80:: scope global
+ # Deleted 9: dummy2 inet6 any fe80:: scope global
+ match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+ if [ "$match_result" -ne 2 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "anycast addr notification: Expected 2 matches, got $match_result"
+ return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ RET=$ksft_skip
+ log_test "need root privileges"
+ exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index ba730655a7bf..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -594,7 +594,7 @@ setup_rt_local_sids()
dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index 4b86040c58c6..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
# the selftest network.
#
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
# Local SID/C-SID table
# =====================
#
@@ -521,6 +524,9 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+ ip -netns "${nsname}" addr \
+ add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
ip -netns "${nsname}" link set "${devname}" up
done
@@ -609,6 +615,27 @@ set_end_x_nextcsid()
nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
}
+set_end_x_ll_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ eval nsname=\${$(get_rtname "${rt}")}
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+ nh6_ll_addr="fe80::${adj}:${rt}"
+ oifname="veth-rt-${rt}-${adj}"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+ # address (note that "dev" is the dummy dum0 device chosen for the sake
+ # of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+ oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
set_underlay_sids_reachability()
{
local rt="$1"
@@ -654,7 +681,7 @@ setup_rt_local_sids()
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -1016,6 +1043,27 @@ host_vpn_tests()
check_and_log_hs_ipv4_connectivity 1 2
check_and_log_hs_ipv4_connectivity 2 1
+
+ # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+ # addresses.
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_ll_nextcsid 3 4
+ set_end_x_ll_nextcsid 4 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+
+ # Restore the previous adjacencies.
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
}
__nextcsid_end_x_behavior_test()
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 3efce1718c5f..6a68c7eff1dc 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -395,7 +395,7 @@ setup_rt_local_sids()
dev "${VRF_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cabc70538ffe..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -343,7 +343,7 @@ setup_rt_local_sids()
encap seg6local action End dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # Endpoint behaviors instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule add \
to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index f00245263b20..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Check that after SEQ number wrap-around:
* 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
* In order to test (2), the test doesn't just adjust seq number for a queue
* on a connected socket, but migrates it to another sk+port number, so
* that there won't be any delayed packets that will fail to verify
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..388056472b5b
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ extern_valid_ipv4
+ extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ set -e
+
+ setup_ns ns1 ns2
+
+ ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+ ip -n "$ns1" link set dev veth0 up
+ ip -n "$ns2" link set dev veth1 up
+
+ ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+ ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+ ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+ ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+ ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+ sleep 5
+
+ set +e
+}
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+ local af_str=$1; shift
+ local ip_addr=$1; shift
+ local tbl_name=$1; shift
+ local subnet=$1; shift
+ local mac
+
+ mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+ RET=0
+
+ # Check that simple addition works.
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "No \"extern_valid\" flag after addition"
+
+ log_test "$af_str \"extern_valid\" flag: Add entry"
+
+ RET=0
+
+ # Check that an entry cannot be added with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+ RET=0
+
+ # Check that entry cannot be added with both "extern_valid" flag and
+ # "use" / "managed" flag.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+ log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+ RET=0
+
+ # Check that "extern_valid" flag can be toggled using replace.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+ RET=0
+
+ # Check that an existing "extern_valid" entry can be marked as
+ # "managed".
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+ check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+ RET=0
+
+ # Check that entry cannot be replaced with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is flushed when the interface is
+ # put administratively down.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 link set dev veth0 down"
+ run_cmd "ip -n $ns1 link set dev veth0 up"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+ log_test "$af_str \"extern_valid\" flag: Interface down"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is not flushed when the interface
+ # loses its carrier.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns2 link set dev veth1 down"
+ run_cmd "ip -n $ns2 link set dev veth1 up"
+ run_cmd "sleep 2"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+ log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+ RET=0
+
+ # Check that when entry transitions to "reachable" state it maintains
+ # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+ # NS to be sent.
+ local delay_probe
+
+ delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ run_cmd "sleep $((delay_probe / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+ check_err $? "Entry did not transition to \"reachable\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+ log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+ RET=0
+
+ # Drop all packets, trigger resolution and check that entry goes back
+ # to "stale" state instead of "failed".
+ local mcast_reprobes
+ local retrans_time
+ local ucast_probes
+ local app_probes
+ local probes
+ local delay
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+ run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+ ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+ app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+ mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+ delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+ run_cmd "sleep $((delay / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+ check_err $? "Entry did not return to \"stale\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+ probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+ if [[ $probes -eq 0 ]]; then
+ check_err 1 "No probes were sent"
+ fi
+
+ log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+ run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+ RET=0
+
+ # Forced garbage collection runs whenever the number of entries is
+ # larger than "thresh3" and deletes stale entries that have not been
+ # updated in the last 5 seconds.
+ #
+ # Check that an "extern_valid" entry survives a forced garbage
+ # collection. Add an entry, wait 5 seconds and add more entries than
+ # "thresh3" so that forced garbage collection will run.
+ #
+ # Note that the garbage collection thresholds are global resources and
+ # that changes in the initial namespace affect all the namespaces.
+ local forced_gc_runs_t0
+ local forced_gc_runs_t1
+ local orig_thresh1
+ local orig_thresh2
+ local orig_thresh3
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+ orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+ run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ run_cmd "sleep 5"
+ forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+ check_err 1 "Forced garbage collection did not run"
+ fi
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+ run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+ RET=0
+
+ # Periodic garbage collection runs every "base_reachable"/2 seconds and
+ # if the number of entries is larger than "thresh1", then it deletes
+ # stale entries that have not been used in the last "gc_stale" seconds.
+ #
+ # Check that an "extern_valid" entry survives a periodic garbage
+ # collection. Add an "extern_valid" entry, add more than "thresh1"
+ # regular entries, wait "base_reachable" (longer than "gc_stale")
+ # seconds and check that the "extern_valid" entry was not deleted.
+ #
+ # Note that the garbage collection thresholds and "base_reachable" are
+ # global resources and that changes in the initial namespace affect all
+ # the namespaces.
+ local periodic_gc_runs_t0
+ local periodic_gc_runs_t1
+ local orig_base_reachable
+ local orig_gc_stale
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+ run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+ orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000"
+ # Wait orig_base_reachable/2 for the new interval to take effect.
+ run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ run_cmd "sleep 10"
+ periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+ check_err $? "Periodic garbage collection did not run"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+ run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+ extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+ extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+ echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
}
check_hv_connectivity() {
- ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
- sleep 1
- ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
return $?
}
check_vm_connectivity() {
- run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
- run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
check_connectivity()
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 94176ffe4646..40f5c2908dda 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -1,341 +1,26 @@
# SPDX-License-Identifier: GPL-2.0
-# Makefile for nolibc tests
-# we're in ".../tools/testing/selftests/nolibc"
-ifeq ($(srctree),)
-srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
-endif
-
-include $(srctree)/tools/scripts/utilities.mak
-# We need this for the "__cc-option" macro.
-include $(srctree)/scripts/Makefile.compiler
-
-ifneq ($(O),)
-ifneq ($(call is-absolute,$(O)),y)
-$(error Only absolute O= parameters are supported)
-endif
-objtree := $(O)
-else
-objtree ?= $(srctree)
-endif
-
-ifeq ($(ARCH),)
-include $(srctree)/scripts/subarch.include
-ARCH = $(SUBARCH)
-endif
-
-cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2))
-
-# XARCH extends the kernel's ARCH with a few variants of the same
-# architecture that only differ by the configuration, the toolchain
-# and the Qemu program used. It is copied as-is into ARCH except for
-# a few specific values which are mapped like this:
-#
-# XARCH | ARCH | config
-# -------------|-----------|-------------------------
-# ppc | powerpc | 32 bits
-# ppc64 | powerpc | 64 bits big endian
-# ppc64le | powerpc | 64 bits little endian
-#
-# It is recommended to only use XARCH, though it does not harm if
-# ARCH is already set. For simplicity, ARCH is sufficient for all
-# architectures where both are equal.
-
-# configure default variants for target kernel supported architectures
-XARCH_powerpc = ppc
-XARCH_mips = mips32le
-XARCH_riscv = riscv64
-XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
-
-# map from user input variants to their kernel supported architectures
-ARCH_armthumb = arm
-ARCH_ppc = powerpc
-ARCH_ppc64 = powerpc
-ARCH_ppc64le = powerpc
-ARCH_mips32le = mips
-ARCH_mips32be = mips
-ARCH_riscv32 = riscv
-ARCH_riscv64 = riscv
-ARCH_s390x = s390
-ARCH_sparc32 = sparc
-ARCH_sparc64 = sparc
-ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
-# kernel image names by architecture
-IMAGE_i386 = arch/x86/boot/bzImage
-IMAGE_x86_64 = arch/x86/boot/bzImage
-IMAGE_x86 = arch/x86/boot/bzImage
-IMAGE_arm64 = arch/arm64/boot/Image
-IMAGE_arm = arch/arm/boot/zImage
-IMAGE_armthumb = arch/arm/boot/zImage
-IMAGE_mips32le = vmlinuz
-IMAGE_mips32be = vmlinuz
-IMAGE_ppc = vmlinux
-IMAGE_ppc64 = vmlinux
-IMAGE_ppc64le = arch/powerpc/boot/zImage
-IMAGE_riscv = arch/riscv/boot/Image
-IMAGE_riscv32 = arch/riscv/boot/Image
-IMAGE_riscv64 = arch/riscv/boot/Image
-IMAGE_s390x = arch/s390/boot/bzImage
-IMAGE_s390 = arch/s390/boot/bzImage
-IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
-IMAGE_sparc32 = arch/sparc/boot/image
-IMAGE_sparc64 = arch/sparc/boot/image
-IMAGE_m68k = vmlinux
-IMAGE = $(objtree)/$(IMAGE_$(XARCH))
-IMAGE_NAME = $(notdir $(IMAGE))
+TEST_GEN_PROGS := nolibc-test
-# default kernel configurations that appear to be usable
-DEFCONFIG_i386 = defconfig
-DEFCONFIG_x86_64 = defconfig
-DEFCONFIG_x86 = defconfig
-DEFCONFIG_arm64 = defconfig
-DEFCONFIG_arm = multi_v7_defconfig
-DEFCONFIG_armthumb = multi_v7_defconfig
-DEFCONFIG_mips32le = malta_defconfig
-DEFCONFIG_mips32be = malta_defconfig generic/eb.config
-DEFCONFIG_ppc = pmac32_defconfig
-DEFCONFIG_ppc64 = powernv_be_defconfig
-DEFCONFIG_ppc64le = powernv_defconfig
-DEFCONFIG_riscv = defconfig
-DEFCONFIG_riscv32 = rv32_defconfig
-DEFCONFIG_riscv64 = defconfig
-DEFCONFIG_s390x = defconfig
-DEFCONFIG_s390 = defconfig compat.config
-DEFCONFIG_loongarch = defconfig
-DEFCONFIG_sparc32 = sparc32_defconfig
-DEFCONFIG_sparc64 = sparc64_defconfig
-DEFCONFIG_m68k = virt_defconfig
-DEFCONFIG = $(DEFCONFIG_$(XARCH))
+include ../lib.mk
+include $(top_srcdir)/scripts/Makefile.compiler
-EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD
-EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
-EXTRACONFIG_arm = -e CONFIG_NAMESPACES
-EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES
-
-# optional tests to run (default = all)
-TEST =
-
-# QEMU_ARCH: arch names used by qemu
-QEMU_ARCH_i386 = i386
-QEMU_ARCH_x86_64 = x86_64
-QEMU_ARCH_x86 = x86_64
-QEMU_ARCH_arm64 = aarch64
-QEMU_ARCH_arm = arm
-QEMU_ARCH_armthumb = arm
-QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
-QEMU_ARCH_mips32be = mips
-QEMU_ARCH_ppc = ppc
-QEMU_ARCH_ppc64 = ppc64
-QEMU_ARCH_ppc64le = ppc64
-QEMU_ARCH_riscv = riscv64
-QEMU_ARCH_riscv32 = riscv32
-QEMU_ARCH_riscv64 = riscv64
-QEMU_ARCH_s390x = s390x
-QEMU_ARCH_s390 = s390x
-QEMU_ARCH_loongarch = loongarch64
-QEMU_ARCH_sparc32 = sparc
-QEMU_ARCH_sparc64 = sparc64
-QEMU_ARCH_m68k = m68k
-QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
-
-QEMU_ARCH_USER_ppc64le = ppc64le
-QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH)))
-
-QEMU_BIOS_DIR = /usr/share/edk2/
-QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd
-
-ifneq ($(QEMU_BIOS_$(XARCH)),)
-QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH))
-endif
-
-# QEMU_ARGS : some arch-specific args to pass to qemu
-QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
-
-# OUTPUT is only set when run from the main makefile, otherwise
-# it defaults to this nolibc directory.
-OUTPUT ?= $(CURDIR)/
-
-ifeq ($(V),1)
-Q=
-else
-Q=@
-endif
+cc-option = $(call __cc-option, $(CC),,$(1),$(2))
-CFLAGS_i386 = $(call cc-option,-m32)
-CFLAGS_arm = -marm
-CFLAGS_armthumb = -mthumb -march=armv6t2
-CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
-CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
-CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
-CFLAGS_s390x = -m64
-CFLAGS_s390 = -m31
-CFLAGS_mips32le = -EL -mabi=32 -fPIC
-CFLAGS_mips32be = -EB -mabi=32
-CFLAGS_sparc32 = $(call cc-option,-m32)
-ifeq ($(origin XARCH),command line)
-CFLAGS_XARCH = $(CFLAGS_$(XARCH))
-endif
-CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
-CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all)
-CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
- $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
- $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA)
-LDFLAGS :=
+include Makefile.include
-LIBGCC := -lgcc
+CFLAGS = -nostdlib -nostdinc -static \
+ -isystem $(top_srcdir)/tools/include/nolibc -isystem $(top_srcdir)/usr/include \
+ $(CFLAGS_NOLIBC_TEST)
-ifneq ($(LLVM),)
-# Not needed for clang
-LIBGCC :=
+ifeq ($(LLVM),)
+LDLIBS := -lgcc
endif
-# Modify CFLAGS based on LLVM=
-include $(srctree)/tools/scripts/Makefile.include
-
-# GCC uses "s390", clang "systemz"
-CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS))
-
-REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
- END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
- if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
- printf("\nSee all results in %s\n", ARGV[1]); }'
+$(OUTPUT)/nolibc-test: nolibc-test.c nolibc-test-linkage.c | headers
help:
- @echo "Supported targets under selftests/nolibc:"
- @echo " all call the \"run\" target below"
- @echo " help this help"
- @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
- @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
- @echo " libc-test build an executable using the compiler's default libc instead"
- @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
- @echo " initramfs.cpio prepare the initramfs archive with nolibc-test"
- @echo " initramfs prepare the initramfs tree with nolibc-test"
- @echo " defconfig create a fresh new default config (uses \$$XARCH)"
- @echo " kernel (re)build the kernel (uses \$$XARCH)"
- @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
- @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
- @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
- @echo " clean clean the sysroot, initramfs, build and output files"
- @echo ""
- @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
- @echo ""
- @echo "Currently using the following variables:"
- @echo " ARCH = $(ARCH)"
- @echo " XARCH = $(XARCH)"
- @echo " CROSS_COMPILE = $(CROSS_COMPILE)"
- @echo " CC = $(CC)"
- @echo " OUTPUT = $(OUTPUT)"
- @echo " TEST = $(TEST)"
- @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]"
- @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]"
- @echo ""
-
-all: run
-
-sysroot: sysroot/$(ARCH)/include
-
-sysroot/$(ARCH)/include:
- $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
- $(QUIET_MKDIR)mkdir -p sysroot
- $(Q)$(MAKE) -C $(srctree) outputmakefile
- $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
- $(Q)mv sysroot/sysroot sysroot/$(ARCH)
-
-ifneq ($(NOLIBC_SYSROOT),0)
-nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
- $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
-else
-nolibc-test: nolibc-test.c nolibc-test-linkage.c
- $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
-endif
-
-libc-test: nolibc-test.c nolibc-test-linkage.c
- $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
-
-# local libc-test
-run-libc-test: libc-test
- $(Q)./libc-test > "$(CURDIR)/run.out" || :
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# local nolibc-test
-run-nolibc-test: nolibc-test
- $(Q)./nolibc-test > "$(CURDIR)/run.out" || :
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# qemu user-land test
-run-user: nolibc-test
- $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || :
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-initramfs.cpio: kernel nolibc-test
- $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio
-
-initramfs: nolibc-test
- $(QUIET_MKDIR)mkdir -p initramfs
- $(call QUIET_INSTALL, initramfs/init)
- $(Q)cp nolibc-test initramfs/init
-
-defconfig:
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
- $(Q)if [ -n "$(EXTRACONFIG)" ]; then \
- $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
- $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
- fi
-
-kernel: | defconfig
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
-
-kernel-standalone: initramfs | defconfig
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
-
-# run the tests after building the kernel
-run: kernel initramfs.cpio
- $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# re-run the tests from an existing kernel
-rerun:
- $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-# report with existing test log
-report:
- $(Q)$(REPORT) $(CURDIR)/run.out
-
-clean:
- $(call QUIET_CLEAN, sysroot)
- $(Q)rm -rf sysroot
- $(call QUIET_CLEAN, nolibc-test)
- $(Q)rm -f nolibc-test
- $(call QUIET_CLEAN, libc-test)
- $(Q)rm -f libc-test
- $(call QUIET_CLEAN, initramfs.cpio)
- $(Q)rm -rf initramfs.cpio
- $(call QUIET_CLEAN, initramfs)
- $(Q)rm -rf initramfs
- $(call QUIET_CLEAN, run.out)
- $(Q)rm -rf run.out
+ @echo "For the custom nolibc testsuite use '$(MAKE) -f Makefile.nolibc'; available targets:"
+ @$(MAKE) -f Makefile.nolibc help
-.PHONY: sysroot/$(ARCH)/include
+.PHONY: help
diff --git a/tools/testing/selftests/nolibc/Makefile.include b/tools/testing/selftests/nolibc/Makefile.include
new file mode 100644
index 000000000000..66287fafbbe0
--- /dev/null
+++ b/tools/testing/selftests/nolibc/Makefile.include
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+__CFLAGS_STACKPROTECTOR = $(call cc-option,-fstack-protector-all) $(call cc-option,-mstack-protector-guard=global)
+_CFLAGS_STACKPROTECTOR ?= $(call try-run, \
+ echo 'void foo(void) {}' | $(CC) -x c - -o - -S $(CLANG_CROSS_FLAGS) $(__CFLAGS_STACKPROTECTOR) | grep -q __stack_chk_guard, \
+ $(__CFLAGS_STACKPROTECTOR))
+_CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all)
+CFLAGS_NOLIBC_TEST ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
+ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
+ $(_CFLAGS_STACKPROTECTOR) $(_CFLAGS_SANITIZER)
diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc
new file mode 100644
index 000000000000..0fb759ba992e
--- /dev/null
+++ b/tools/testing/selftests/nolibc/Makefile.nolibc
@@ -0,0 +1,383 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc tests
+# we're in ".../tools/testing/selftests/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
+endif
+
+include $(srctree)/tools/scripts/utilities.mak
+# We need this for the "__cc-option" macro.
+include $(srctree)/scripts/Makefile.compiler
+
+ifneq ($(O),)
+ifneq ($(call is-absolute,$(O)),y)
+$(error Only absolute O= parameters are supported)
+endif
+objtree := $(O)
+else
+objtree ?= $(srctree)
+endif
+
+ifeq ($(ARCH),)
+include $(srctree)/scripts/subarch.include
+ARCH = $(SUBARCH)
+endif
+
+cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2))
+
+# XARCH extends the kernel's ARCH with a few variants of the same
+# architecture that only differ by the configuration, the toolchain
+# and the Qemu program used. It is copied as-is into ARCH except for
+# a few specific values which are mapped like this:
+#
+# XARCH | ARCH | config
+# -------------|-----------|-------------------------
+# ppc | powerpc | 32 bits
+# ppc64 | powerpc | 64 bits big endian
+# ppc64le | powerpc | 64 bits little endian
+#
+# It is recommended to only use XARCH, though it does not harm if
+# ARCH is already set. For simplicity, ARCH is sufficient for all
+# architectures where both are equal.
+
+# configure default variants for target kernel supported architectures
+XARCH_powerpc = ppc
+XARCH_mips = mips32le
+XARCH_riscv = riscv64
+XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
+
+# map from user input variants to their kernel supported architectures
+ARCH_x32 = x86
+ARCH_armthumb = arm
+ARCH_ppc = powerpc
+ARCH_ppc64 = powerpc
+ARCH_ppc64le = powerpc
+ARCH_mips32le = mips
+ARCH_mips32be = mips
+ARCH_mipsn32le = mips
+ARCH_mipsn32be = mips
+ARCH_mips64le = mips
+ARCH_mips64be = mips
+ARCH_riscv32 = riscv
+ARCH_riscv64 = riscv
+ARCH_s390x = s390
+ARCH_sparc32 = sparc
+ARCH_sparc64 = sparc
+ARCH_sh4 = sh
+ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
+
+# kernel image names by architecture
+IMAGE_i386 = arch/x86/boot/bzImage
+IMAGE_x86_64 = arch/x86/boot/bzImage
+IMAGE_x32 = arch/x86/boot/bzImage
+IMAGE_x86 = arch/x86/boot/bzImage
+IMAGE_arm64 = arch/arm64/boot/Image
+IMAGE_arm = arch/arm/boot/zImage
+IMAGE_armthumb = arch/arm/boot/zImage
+IMAGE_mips32le = vmlinuz
+IMAGE_mips32be = vmlinuz
+IMAGE_mipsn32le = vmlinuz
+IMAGE_mipsn32be = vmlinuz
+IMAGE_mips64le = vmlinuz
+IMAGE_mips64be = vmlinuz
+IMAGE_ppc = vmlinux
+IMAGE_ppc64 = vmlinux
+IMAGE_ppc64le = arch/powerpc/boot/zImage
+IMAGE_riscv = arch/riscv/boot/Image
+IMAGE_riscv32 = arch/riscv/boot/Image
+IMAGE_riscv64 = arch/riscv/boot/Image
+IMAGE_s390x = arch/s390/boot/bzImage
+IMAGE_s390 = arch/s390/boot/bzImage
+IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
+IMAGE_sparc32 = arch/sparc/boot/image
+IMAGE_sparc64 = arch/sparc/boot/image
+IMAGE_m68k = vmlinux
+IMAGE_sh4 = arch/sh/boot/zImage
+IMAGE = $(objtree)/$(IMAGE_$(XARCH))
+IMAGE_NAME = $(notdir $(IMAGE))
+
+# default kernel configurations that appear to be usable
+DEFCONFIG_i386 = defconfig
+DEFCONFIG_x86_64 = defconfig
+DEFCONFIG_x32 = defconfig
+DEFCONFIG_x86 = defconfig
+DEFCONFIG_arm64 = defconfig
+DEFCONFIG_arm = multi_v7_defconfig
+DEFCONFIG_armthumb = multi_v7_defconfig
+DEFCONFIG_mips32le = malta_defconfig
+DEFCONFIG_mips32be = malta_defconfig generic/eb.config
+DEFCONFIG_mipsn32le = malta_defconfig generic/64r2.config
+DEFCONFIG_mipsn32be = malta_defconfig generic/64r6.config generic/eb.config
+DEFCONFIG_mips64le = malta_defconfig generic/64r6.config
+DEFCONFIG_mips64be = malta_defconfig generic/64r2.config generic/eb.config
+DEFCONFIG_ppc = pmac32_defconfig
+DEFCONFIG_ppc64 = powernv_be_defconfig
+DEFCONFIG_ppc64le = powernv_defconfig
+DEFCONFIG_riscv = defconfig
+DEFCONFIG_riscv32 = rv32_defconfig
+DEFCONFIG_riscv64 = defconfig
+DEFCONFIG_s390x = defconfig
+DEFCONFIG_s390 = defconfig compat.config
+DEFCONFIG_loongarch = defconfig
+DEFCONFIG_sparc32 = sparc32_defconfig
+DEFCONFIG_sparc64 = sparc64_defconfig
+DEFCONFIG_m68k = virt_defconfig
+DEFCONFIG_sh4 = rts7751r2dplus_defconfig
+DEFCONFIG = $(DEFCONFIG_$(XARCH))
+
+EXTRACONFIG_x32 = -e CONFIG_X86_X32_ABI
+EXTRACONFIG_arm = -e CONFIG_NAMESPACES
+EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES
+EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD
+EXTRACONFIG_sh4 = -e CONFIG_BLK_DEV_INITRD -e CONFIG_CMDLINE_FROM_BOOTLOADER
+EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
+
+# optional tests to run (default = all)
+TEST =
+
+# QEMU_ARCH: arch names used by qemu
+QEMU_ARCH_i386 = i386
+QEMU_ARCH_x86_64 = x86_64
+QEMU_ARCH_x32 = x86_64
+QEMU_ARCH_x86 = x86_64
+QEMU_ARCH_arm64 = aarch64
+QEMU_ARCH_arm = arm
+QEMU_ARCH_armthumb = arm
+QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
+QEMU_ARCH_mips32be = mips
+QEMU_ARCH_mipsn32le = mips64el
+QEMU_ARCH_mipsn32be = mips64
+QEMU_ARCH_mips64le = mips64el
+QEMU_ARCH_mips64be = mips64
+QEMU_ARCH_ppc = ppc
+QEMU_ARCH_ppc64 = ppc64
+QEMU_ARCH_ppc64le = ppc64
+QEMU_ARCH_riscv = riscv64
+QEMU_ARCH_riscv32 = riscv32
+QEMU_ARCH_riscv64 = riscv64
+QEMU_ARCH_s390x = s390x
+QEMU_ARCH_s390 = s390x
+QEMU_ARCH_loongarch = loongarch64
+QEMU_ARCH_sparc32 = sparc
+QEMU_ARCH_sparc64 = sparc64
+QEMU_ARCH_m68k = m68k
+QEMU_ARCH_sh4 = sh4
+QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
+
+QEMU_ARCH_USER_ppc64le = ppc64le
+QEMU_ARCH_USER_mipsn32le = mipsn32el
+QEMU_ARCH_USER_mipsn32be = mipsn32
+QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH)))
+
+QEMU_BIOS_DIR = /usr/share/edk2/
+QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd
+
+ifneq ($(QEMU_BIOS_$(XARCH)),)
+QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH))
+endif
+
+# QEMU_ARGS : some arch-specific args to pass to qemu
+QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x32 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mipsn32le = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mipsn32be = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips64le = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips64be = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sh4 = -M r2d -serial file:/dev/stdout -append "console=ttySC1,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
+
+# OUTPUT is only set when run from the main makefile, otherwise
+# it defaults to this nolibc directory.
+OUTPUT ?= $(CURDIR)/
+
+ifeq ($(V),1)
+Q=
+else
+Q=@
+endif
+
+CFLAGS_i386 = $(call cc-option,-m32)
+CFLAGS_x32 = -mx32
+CFLAGS_arm = -marm
+CFLAGS_armthumb = -mthumb -march=armv6t2
+CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
+CFLAGS_s390x = -m64
+CFLAGS_s390 = -m31
+CFLAGS_mips32le = -EL -mabi=32 -fPIC
+CFLAGS_mips32be = -EB -mabi=32
+CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2
+CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6
+CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6
+CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2
+CFLAGS_sparc32 = $(call cc-option,-m32)
+CFLAGS_sh4 = -ml -m4
+ifeq ($(origin XARCH),command line)
+CFLAGS_XARCH = $(CFLAGS_$(XARCH))
+endif
+
+include Makefile.include
+
+CFLAGS ?= $(CFLAGS_NOLIBC_TEST) $(CFLAGS_XARCH) $(CFLAGS_EXTRA)
+LDFLAGS :=
+
+LIBGCC := -lgcc
+
+ifeq ($(ARCH),x86)
+# Not needed on x86, probably not present for x32
+LIBGCC :=
+endif
+
+ifneq ($(LLVM),)
+# Not needed for clang
+LIBGCC :=
+endif
+
+# Modify CFLAGS based on LLVM=
+include $(srctree)/tools/scripts/Makefile.include
+
+REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
+ /^Total number of errors:/{done++} \
+ END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
+ if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
+ printf("\nSee all results in %s\n", ARGV[1]); }'
+
+help:
+ @echo "Supported targets under selftests/nolibc:"
+ @echo " all call the \"run\" target below"
+ @echo " help this help"
+ @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
+ @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+ @echo " libc-test build an executable using the compiler's default libc instead"
+ @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " initramfs.cpio prepare the initramfs archive with nolibc-test"
+ @echo " initramfs prepare the initramfs tree with nolibc-test"
+ @echo " defconfig create a fresh new default config (uses \$$XARCH)"
+ @echo " kernel (re)build the kernel (uses \$$XARCH)"
+ @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
+ @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
+ @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " clean clean the sysroot, initramfs, build and output files"
+ @echo ""
+ @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
+ @echo ""
+ @echo "Currently using the following variables:"
+ @echo " ARCH = $(ARCH)"
+ @echo " XARCH = $(XARCH)"
+ @echo " CROSS_COMPILE = $(CROSS_COMPILE)"
+ @echo " CC = $(CC)"
+ @echo " OUTPUT = $(OUTPUT)"
+ @echo " TEST = $(TEST)"
+ @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]"
+ @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]"
+ @echo ""
+
+all: run
+
+sysroot: sysroot/$(ARCH)/include
+
+sysroot/$(ARCH)/include:
+ $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
+ $(QUIET_MKDIR)mkdir -p sysroot
+ $(Q)$(MAKE) -C $(srctree) outputmakefile
+ $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
+ $(Q)mv sysroot/sysroot sysroot/$(ARCH)
+
+ifneq ($(NOLIBC_SYSROOT),0)
+nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
+ $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+ -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
+else
+nolibc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+ -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
+endif
+
+libc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
+
+# local libc-test
+run-libc-test: libc-test
+ $(Q)./libc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# local nolibc-test
+run-nolibc-test: nolibc-test
+ $(Q)./nolibc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# qemu user-land test
+run-user: nolibc-test
+ $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+initramfs.cpio: kernel nolibc-test
+ $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio
+
+initramfs: nolibc-test
+ $(QUIET_MKDIR)mkdir -p initramfs
+ $(call QUIET_INSTALL, initramfs/init)
+ $(Q)cp nolibc-test initramfs/init
+
+defconfig:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
+ $(Q)if [ -n "$(EXTRACONFIG)" ]; then \
+ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
+ $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
+ fi
+
+kernel:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
+
+kernel-standalone: initramfs
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
+
+# run the tests after building the kernel
+run: kernel initramfs.cpio
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# re-run the tests from an existing kernel
+rerun:
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# report with existing test log
+report:
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+clean:
+ $(call QUIET_CLEAN, sysroot)
+ $(Q)rm -rf sysroot
+ $(call QUIET_CLEAN, nolibc-test)
+ $(Q)rm -f nolibc-test
+ $(call QUIET_CLEAN, libc-test)
+ $(Q)rm -f libc-test
+ $(call QUIET_CLEAN, initramfs.cpio)
+ $(Q)rm -rf initramfs.cpio
+ $(call QUIET_CLEAN, initramfs)
+ $(Q)rm -rf initramfs
+ $(call QUIET_CLEAN, run.out)
+ $(Q)rm -rf run.out
+
+.PHONY: sysroot/$(ARCH)/include
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index dbe13000fb1a..a297ee0d6d07 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -877,7 +877,12 @@ int test_file_stream(void)
return 0;
}
-int test_fork(void)
+enum fork_type {
+ FORK_STANDARD,
+ FORK_VFORK,
+};
+
+int test_fork(enum fork_type type)
{
int status;
pid_t pid;
@@ -886,14 +891,23 @@ int test_fork(void)
fflush(stdout);
fflush(stderr);
- pid = fork();
+ switch (type) {
+ case FORK_STANDARD:
+ pid = fork();
+ break;
+ case FORK_VFORK:
+ pid = vfork();
+ break;
+ default:
+ return 1;
+ }
switch (pid) {
case -1:
return 1;
case 0:
- exit(123);
+ _exit(123);
default:
pid = waitpid(pid, &status, 0);
@@ -1330,7 +1344,7 @@ int run_syscall(int min, int max)
CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break;
- CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
+ CASE_TEST(fork); EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break;
@@ -1349,6 +1363,7 @@ int run_syscall(int min, int max)
CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break;
+ CASE_TEST(nanosleep); ts.tv_nsec = -1; EXPECT_SYSER(1, nanosleep(&ts, NULL), -1, EINVAL); break;
CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break;
CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break;
CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break;
@@ -1374,6 +1389,7 @@ int run_syscall(int min, int max)
CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break;
+ CASE_TEST(vfork); EXPECT_SYSZR(1, test_fork(FORK_VFORK)); break;
CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break;
CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break;
CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break;
@@ -1413,7 +1429,7 @@ int run_stdlib(int min, int max)
* Add some more chars after the \0, to test functions that overwrite the buffer set
* the \0 at the exact right position.
*/
- char buf[10] = "test123456";
+ char buf[11] = "test123456";
buf[4] = '\0';
@@ -1646,6 +1662,28 @@ int test_strerror(void)
return 0;
}
+static int test_printf_error(void)
+{
+ int fd, ret, saved_errno;
+
+ fd = open("/dev/full", O_RDWR);
+ if (fd == -1)
+ return 1;
+
+ errno = 0;
+ ret = dprintf(fd, "foo");
+ saved_errno = errno;
+ close(fd);
+
+ if (ret != -1)
+ return 2;
+
+ if (saved_errno != ENOSPC)
+ return 3;
+
+ return 0;
+}
+
static int run_printf(int min, int max)
{
int test;
@@ -1675,6 +1713,7 @@ static int run_printf(int min, int max)
CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break;
CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break;
CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break;
+ CASE_TEST(printf_error); EXPECT_ZR(1, test_printf_error()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -1762,12 +1801,14 @@ int prepare(void)
if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) {
if (stat("/dev/console", &stat_buf) != 0 ||
stat("/dev/null", &stat_buf) != 0 ||
- stat("/dev/zero", &stat_buf) != 0) {
+ stat("/dev/zero", &stat_buf) != 0 ||
+ stat("/dev/full", &stat_buf) != 0) {
/* try devtmpfs first, otherwise fall back to manual creation */
if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) {
mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1));
mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3));
mknod("/dev/zero", 0666 | S_IFCHR, makedev(1, 5));
+ mknod("/dev/full", 0666 | S_IFCHR, makedev(1, 7));
}
}
}
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 8277599e6441..e8af1fb505cf 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -18,15 +18,16 @@ test_mode=system
werror=1
llvm=
all_archs=(
- i386 x86_64
+ i386 x86_64 x32
arm64 arm armthumb
- mips32le mips32be
+ mips32le mips32be mipsn32le mipsn32be mips64le mips64be
ppc ppc64 ppc64le
riscv32 riscv64
s390x s390
loongarch
sparc32 sparc64
m68k
+ sh4
)
archs="${all_archs[@]}"
@@ -114,6 +115,7 @@ crosstool_arch() {
mips*) echo mips;;
s390*) echo s390;;
sparc*) echo sparc64;;
+ x32*) echo x86_64;;
*) echo "$1";;
esac
}
@@ -169,7 +171,7 @@ test_arch() {
if [ "$werror" -ne 0 ]; then
CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror"
fi
- MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
+ MAKE=(make -f Makefile.nolibc -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
case "$test_mode" in
'system')
@@ -187,7 +189,11 @@ test_arch() {
echo "Unsupported configuration"
return
fi
- if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then
+ if [ "$arch" = "m68k" -o "$arch" = "sh4" ] && [ "$llvm" = "1" ]; then
+ echo "Unsupported configuration"
+ return
+ fi
+ if [ "$arch" = "x32" ] && [ "$test_mode" = "user" ]; then
echo "Unsupported configuration"
return
fi
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 0406a065deb4..144e7ff65d6a 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -10,3 +10,5 @@ pidfd_file_handle_test
pidfd_bind_mount
pidfd_info_test
pidfd_exec_helper
+pidfd_xattr_test
+pidfd_setattr_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index fcbefc0d77f6..764a8f9ecefa 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
+CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
- pidfd_file_handle_test pidfd_bind_mount pidfd_info_test
+ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \
+ pidfd_xattr_test pidfd_setattr_test
TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index efd74063126e..f87993def738 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -16,9 +16,22 @@
#include <sys/types.h>
#include <sys/wait.h>
+/*
+ * Remove the userspace definitions of the following preprocessor symbols
+ * to avoid duplicate-definition warnings from the subsequent in-kernel
+ * definitions.
+ */
+#undef SCHED_NORMAL
+#undef SCHED_FLAG_KEEP_ALL
+#undef SCHED_FLAG_UTIL_CLAMP
+
#include "../kselftest.h"
#include "../clone3/clone3_selftests.h"
+#ifndef FD_PIDFS_ROOT
+#define FD_PIDFS_ROOT -10002
+#endif
+
#ifndef P_PIDFD
#define P_PIDFD 3
#endif
@@ -56,7 +69,7 @@
#endif
#ifndef PIDFD_SELF_THREAD_GROUP
-#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */
+#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */
#endif
#ifndef PIDFD_SELF
diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
index 439b9c6c0457..6bd2e9c9565b 100644
--- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
@@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags)
ASSERT_EQ(close(pidfd), 0);
}
+/*
+ * That we decode a file handle without having to pass a pidfd.
+ */
+TEST_F(file_handle, decode_purely_based_on_file_handle)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ int pidfd = -EBADF;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0);
+
+ pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(-EBADF, fh, 0);
+ ASSERT_LT(pidfd, 0);
+
+ pidfd = open_by_handle_at(AT_FDCWD, fh, 0);
+ ASSERT_LT(pidfd, 0);
+
+ free(fh);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c
new file mode 100644
index 000000000000..d7de05edc4b3
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfs_setattr)
+{
+ pid_t child_pid;
+ int child_pidfd;
+};
+
+FIXTURE_SETUP(pidfs_setattr)
+{
+ self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid, 0);
+
+ if (self->child_pid == 0)
+ _exit(EXIT_SUCCESS);
+}
+
+FIXTURE_TEARDOWN(pidfs_setattr)
+{
+ sys_waitid(P_PID, self->child_pid, NULL, WEXITED);
+ EXPECT_EQ(close(self->child_pidfd), 0);
+}
+
+TEST_F(pidfs_setattr, no_chown)
+{
+ ASSERT_LT(fchown(self->child_pidfd, 1234, 5678), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST_F(pidfs_setattr, no_chmod)
+{
+ ASSERT_LT(fchmod(self->child_pidfd, 0777), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST_F(pidfs_setattr, no_exec)
+{
+ char *const argv[] = { NULL };
+ char *const envp[] = { NULL };
+
+ ASSERT_LT(execveat(self->child_pidfd, "", argv, envp, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EACCES);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c
new file mode 100644
index 000000000000..5cf7bb0e4bf2
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfs_xattr)
+{
+ pid_t child_pid;
+ int child_pidfd;
+};
+
+FIXTURE_SETUP(pidfs_xattr)
+{
+ self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid, 0);
+
+ if (self->child_pid == 0)
+ _exit(EXIT_SUCCESS);
+}
+
+FIXTURE_TEARDOWN(pidfs_xattr)
+{
+ sys_waitid(P_PID, self->child_pid, NULL, WEXITED);
+}
+
+TEST_F(pidfs_xattr, set_get_list_xattr_multiple)
+{
+ int ret, i;
+ char xattr_name[32];
+ char xattr_value[32];
+ char buf[32];
+ const int num_xattrs = 10;
+ char list[PATH_MAX] = {};
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i);
+ ret = fsetxattr(self->child_pidfd, xattr_name, xattr_value, strlen(xattr_value), 0);
+ ASSERT_EQ(ret, 0);
+ }
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i);
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf));
+ ASSERT_EQ(ret, strlen(xattr_value));
+ ASSERT_EQ(strcmp(buf, xattr_value), 0);
+ }
+
+ ret = flistxattr(self->child_pidfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ bool found = false;
+ for (char *it = list; it < list + ret; it += strlen(it) + 1) {
+ if (strcmp(it, xattr_name))
+ continue;
+ found = true;
+ break;
+ }
+ ASSERT_TRUE(found);
+ }
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i);
+ ret = fremovexattr(self->child_pidfd, xattr_name);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+ }
+}
+
+TEST_F(pidfs_xattr, set_get_list_xattr_persistent)
+{
+ int ret;
+ char buf[32];
+ char list[PATH_MAX] = {};
+
+ ret = fsetxattr(self->child_pidfd, "trusted.persistent", "persistent value", strlen("persistent value"), 0);
+ ASSERT_EQ(ret, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, strlen("persistent value"));
+ ASSERT_EQ(strcmp(buf, "persistent value"), 0);
+
+ ret = flistxattr(self->child_pidfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ ASSERT_EQ(strcmp(list, "trusted.persistent"), 0)
+
+ ASSERT_EQ(close(self->child_pidfd), 0);
+ self->child_pidfd = -EBADF;
+ sleep(2);
+
+ self->child_pidfd = sys_pidfd_open(self->child_pid, 0);
+ ASSERT_GE(self->child_pidfd, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, strlen("persistent value"));
+ ASSERT_EQ(strcmp(buf, "persistent value"), 0);
+
+ ret = flistxattr(self->child_pidfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ ASSERT_EQ(strcmp(list, "trusted.persistent"), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index edc08a4433fd..ed1e2886ba3c 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -120,6 +120,7 @@ static void usage(char *progname)
" -c query the ptp clock's capabilities\n"
" -d name device to open\n"
" -e val read 'val' external time stamp events\n"
+ " -E val enable rising (1), falling (2), or both (3) edges\n"
" -f val adjust the ptp clock frequency by 'val' ppb\n"
" -F chan Enable single channel mask and keep device open for debugfs verification.\n"
" -g get the ptp clock time\n"
@@ -178,6 +179,7 @@ int main(int argc, char *argv[])
int adjphase = 0;
int capabilities = 0;
int extts = 0;
+ int edge = 0;
int flagtest = 0;
int gettime = 0;
int index = 0;
@@ -202,7 +204,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:E:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -213,6 +215,11 @@ int main(int argc, char *argv[])
case 'e':
extts = atoi(optarg);
break;
+ case 'E':
+ edge = atoi(optarg);
+ edge = (edge & 1 ? PTP_RISING_EDGE : 0) |
+ (edge & 2 ? PTP_FALLING_EDGE : 0);
+ break;
case 'f':
adjfreq = atoi(optarg);
break;
@@ -444,7 +451,7 @@ int main(int argc, char *argv[])
if (!readonly) {
memset(&extts_request, 0, sizeof(extts_request));
extts_request.index = index;
- extts_request.flags = PTP_ENABLE_FEATURE;
+ extts_request.flags = PTP_ENABLE_FEATURE | edge;
if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
perror("PTP_EXTTS_REQUEST");
extts = 0;
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
index a6884f66dc01..2f345d11e4b8 100644
--- a/tools/testing/selftests/ptrace/peeksiginfo.c
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -199,7 +199,7 @@ int main(int argc, char *argv[])
/*
* Dump signal from the process-wide queue.
- * The number of signals is not multible to the buffer size
+ * The number of signals is not multiple to the buffer size
*/
if (check_direct_path(child, 1, 3))
goto out;
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 11f8d232b0ee..3edfd064ef81 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -44,7 +44,7 @@ fi
ncpus="`getconf _NPROCESSORS_ONLN`"
make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
-if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out
+if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|ERROR|Error|error:|warning:" || grep -E -q "Stop|ERROR|Error|error:" < $resdir/Make.out
then
echo Kernel build error
grep -E "Stop|Error|error:|warning:" < $resdir/Make.out
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 42e5e8597a1a..617cba339d28 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -199,7 +199,7 @@ do
fi
;;
--kconfig|--kconfigs)
- checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
+ checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)* *$' '^error$'
TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
@@ -442,18 +442,7 @@ echo $scriptname $args
touch $resdir/$ds/log
echo $scriptname $args >> $resdir/$ds/log
echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite
-echo Build directory: `pwd` > $resdir/$ds/testid.txt
-if test -d .git
-then
- echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt
- echo >> $resdir/$ds/testid.txt
- echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt
- git status >> $resdir/$ds/testid.txt
- echo >> $resdir/$ds/testid.txt
- echo >> $resdir/$ds/testid.txt
- echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt
- git diff HEAD >> $resdir/$ds/testid.txt
-fi
+mktestid.sh $resdir/$ds
___EOF___
kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk
diff --git a/tools/testing/selftests/rcutorture/bin/mktestid.sh b/tools/testing/selftests/rcutorture/bin/mktestid.sh
new file mode 100755
index 000000000000..16f9907a4dae
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/mktestid.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Create a testid.txt file in the specified directory.
+#
+# Usage: mktestid.sh dirpath
+#
+# Copyright (C) Meta Platforms, Inc. 2025
+#
+# Author: Paul E. McKenney <paulmck@kernel.org>
+
+resdir="$1"
+if test -z "${resdir}" || ! test -d "${resdir}" || ! test -w "${resdir}"
+then
+ echo Path '"'${resdir}'"' not writeable directory, no ${resdir}/testid.txt.
+ exit 1
+fi
+echo Build directory: `pwd` > ${resdir}/testid.txt
+if test -d .git
+then
+ echo Current commit: `git rev-parse HEAD` >> ${resdir}/testid.txt
+ echo >> ${resdir}/testid.txt
+ echo ' ---' Output of "'"git status"'": >> ${resdir}/testid.txt
+ git status >> ${resdir}/testid.txt
+ echo >> ${resdir}/testid.txt
+ echo >> ${resdir}/testid.txt
+ echo ' ---' Output of "'"git diff HEAD"'": >> ${resdir}/testid.txt
+ git diff HEAD >> ${resdir}/testid.txt
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index e03fdaca89b3..611bc03a8dc7 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -30,6 +30,15 @@ then
VERBOSE_BATCH_CPUS=0
fi
+# Machine architecture? ("uname -p" is said to be less portable.)1
+thisarch="`uname -m`"
+if test "${thisarch}" = aarch64
+then
+ ifnotaarch64=no
+else
+ ifnotaarch64=yes
+fi
+
# Configurations/scenarios.
configs_rcutorture=
configs_locktorture=
@@ -55,9 +64,9 @@ do_normal=yes
explicit_normal=no
do_kasan=yes
do_kcsan=no
-do_clocksourcewd=yes
+do_clocksourcewd="${ifnotaarch64}"
do_rt=yes
-do_rcutasksflavors=yes
+do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided
do_srcu_lockdep=yes
do_rcu_rust=no
@@ -124,7 +133,7 @@ do
;;
--do-all|--doall)
do_allmodconfig=yes
- do_rcutasksflavor=yes
+ do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided
do_rcutorture=yes
do_locktorture=yes
do_scftorture=yes
@@ -136,7 +145,7 @@ do
explicit_normal=no
do_kasan=yes
do_kcsan=yes
- do_clocksourcewd=yes
+ do_clocksourcewd="${ifnotaarch64}"
do_srcu_lockdep=yes
;;
--do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig)
@@ -274,7 +283,7 @@ then
configs_rcutorture=CFLIST
fi
duration_rcutorture=$((duration_base*duration_rcutorture_frac/10))
-if test "$duration_rcutorture" -eq 0
+if test "$duration_rcutorture" -eq 0 && test "$do_locktorture" = "yes"
then
echo " --- Zero time for rcutorture, disabling" | tee -a $T/log
do_rcutorture=no
@@ -286,7 +295,7 @@ then
configs_locktorture=CFLIST
fi
duration_locktorture=$((duration_base*duration_locktorture_frac/10))
-if test "$duration_locktorture" -eq 0
+if test "$duration_locktorture" -eq 0 && test "$do_locktorture" = "yes"
then
echo " --- Zero time for locktorture, disabling" | tee -a $T/log
do_locktorture=no
@@ -298,12 +307,19 @@ then
configs_scftorture=CFLIST
fi
duration_scftorture=$((duration_base*duration_scftorture_frac/10))
-if test "$duration_scftorture" -eq 0
+if test "$duration_scftorture" -eq 0 && test "$do_scftorture" = "yes"
then
echo " --- Zero time for scftorture, disabling" | tee -a $T/log
do_scftorture=no
fi
+# CONFIG_EXPERT=y is currently required for arm64 KCSAN runs.
+kcsan_expert=
+if test "${thisarch}" = aarch64
+then
+ kcsan_expert="CONFIG_EXPERT=y"
+fi
+
touch $T/failures
touch $T/successes
@@ -362,13 +378,19 @@ function torture_set {
then
curflavor=$flavor
torture_one "$@"
- mv $T/last-resdir $T/last-resdir-nodebug || :
+ if test -e $T/last-resdir
+ then
+ mv $T/last-resdir $T/last-resdir-nodebug || :
+ fi
fi
if test "$do_kasan" = "yes"
then
curflavor=${flavor}-kasan
torture_one "$@" --kasan
- mv $T/last-resdir $T/last-resdir-kasan || :
+ if test -e $T/last-resdir
+ then
+ mv $T/last-resdir $T/last-resdir-kasan || :
+ fi
fi
if test "$do_kcsan" = "yes"
then
@@ -378,8 +400,16 @@ function torture_set {
kcsan_kmake_tag="--kmake-args"
cur_kcsan_kmake_args="$kcsan_kmake_args"
fi
- torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
- mv $T/last-resdir $T/last-resdir-kcsan || :
+ chk_rdr_state=
+ if test "${flavor}" = rcutorture
+ then
+ chk_rdr_state="CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y"
+ fi
+ torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${kcsan_expert} ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
+ if test -e $T/last-resdir
+ then
+ mv $T/last-resdir $T/last-resdir-kcsan || :
+ fi
fi
}
@@ -389,6 +419,7 @@ then
echo " --- allmodconfig:" Start `date` | tee -a $T/log
amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig"
mkdir -p "$amcdir"
+ mktestid.sh "$amcdir"
echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
retcode=$?
@@ -407,6 +438,10 @@ then
make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
retcode="$?"
echo $retcode > "$amcdir/Make.exitcode"
+ if grep -E -q "Stop|ERROR|Error|error:|warning:" < "$amcdir/Make.out"
+ then
+ retcode=99
+ fi
buildphase='"make"'
fi
if test "$retcode" -eq 0
@@ -495,6 +530,7 @@ then
echo " --- do-rcu-rust:" Start `date` | tee -a $T/log
rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust"
mkdir -p "$rrdir"
+ mktestid.sh "$rrdir"
echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out
make LLVM=1 rustavailable > $T/rustavailable.out 2>&1
retcode=$?
@@ -681,7 +717,14 @@ nfailures=0
echo FAILURES: | tee -a $T/log
if test -s "$T/failures"
then
- awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
+ awk < "$T/failures" -v sq="'" '
+ {
+ print "echo " sq $0 sq;
+ if ($2 != "")
+ print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq;
+ else
+ print "echo " sq " " sq "Run failed to produce results directory.";
+ }' | sh | tee -a $T/log | tee "$T/failuresum"
nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
grep "^ Summary: " "$T/failuresum" |
grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
@@ -691,15 +734,18 @@ then
fi
ret=2
fi
-if test "$do_kcsan" = "yes"
+if test "$do_kcsan" = "yes" && test -e tools/testing/selftests/rcutorture/res/$ds
then
TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum
fi
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
-tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
-find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors
-find "$tdir" -name 'Make.out.diags' -print > $T/builderrors
+tdir="`cat $T/successes $T/failures | awk 'NF > 1 { print $NF }' | head -1 | sed -e 's,/[^/]\+/*$,,'`"
+if test -n "$tdir"
+then
+ find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors
+ find "$tdir" -name 'Make.out.diags' -print > $T/builderrors
+fi
if test -s "$T/configerrors"
then
echo " Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
index 48d8a245c7fa..7d75f4b94943 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
@@ -5,3 +5,6 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
+CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y
+CONFIG_RCU_TORTURE_TEST_LOG_CPU=y
+CONFIG_RCU_TORTURE_TEST_LOG_GP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index 45f572570a8c..98b6175e5aa0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -5,7 +5,6 @@ TREE04
TREE05
TREE07
TREE09
-SRCU-L
SRCU-N
SRCU-P
SRCU-T
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L
deleted file mode 100644
index 3b4fa8dbef8a..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L
+++ /dev/null
@@ -1,10 +0,0 @@
-CONFIG_RCU_TRACE=n
-CONFIG_SMP=y
-CONFIG_NR_CPUS=6
-CONFIG_HOTPLUG_CPU=y
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
-CONFIG_PREEMPT=n
-#CHECK#CONFIG_RCU_EXPERT=n
-CONFIG_KPROBES=n
-CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot
deleted file mode 100644
index 0207b3138c5b..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot
+++ /dev/null
@@ -1,3 +0,0 @@
-rcutorture.torture_type=srcu
-rcutorture.reader_flavor=0x4
-rcutorture.fwd_progress=3
diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 9451782689de..ee25824b1cbe 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx)
struct bpf_link *link;
char buf[16];
+ /*
+ * On single-CPU systems, ops.select_cpu() is never
+ * invoked, so skip this test to avoid getting stuck
+ * indefinitely.
+ */
+ if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1)
+ continue;
+
skel = exit__open();
SCX_ENUM_INIT(skel);
skel->rodata->exit_point = tc;
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
index d975a6767329..2eb2c06303f2 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -10,6 +10,8 @@
#include <sys/sysinfo.h>
#include <sys/syscall.h>
#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
#include <asm/unistd.h>
#include "../kselftest_harness.h"
@@ -17,11 +19,15 @@
#ifndef PR_SET_SYSCALL_USER_DISPATCH
# define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
-# define PR_SYS_DISPATCH_ON 1
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif
+#ifndef PR_SYS_DISPATCH_EXCLUSIVE_ON
+# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
+# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
+#endif
+
#ifndef SYS_USER_DISPATCH
# define SYS_USER_DISPATCH 2
#endif
@@ -65,7 +71,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
ret = sysinfo(&info);
ASSERT_EQ(0, ret);
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -79,6 +85,21 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
}
}
+static void prctl_valid(struct __test_metadata *_metadata,
+ unsigned long op, unsigned long off,
+ unsigned long size, void *sel)
+{
+ EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel));
+}
+
+static void prctl_invalid(struct __test_metadata *_metadata,
+ unsigned long op, unsigned long off,
+ unsigned long size, void *sel, int err)
+{
+ EXPECT_EQ(-1, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel));
+ EXPECT_EQ(err, errno);
+}
+
TEST(bad_prctl_param)
{
char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
@@ -86,57 +107,54 @@ TEST(bad_prctl_param)
/* Invalid op */
op = -1;
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel);
- ASSERT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0, 0, &sel, EINVAL);
/* PR_SYS_DISPATCH_OFF */
op = PR_SYS_DISPATCH_OFF;
/* offset != 0 */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x1, 0x0, 0, EINVAL);
/* len != 0 */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x0, 0xff, 0, EINVAL);
/* sel != NULL */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x0, 0x0, &sel, EINVAL);
/* Valid parameter */
- errno = 0;
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0);
- EXPECT_EQ(0, errno);
+ prctl_valid(_metadata, op, 0x0, 0x0, 0x0);
- /* PR_SYS_DISPATCH_ON */
- op = PR_SYS_DISPATCH_ON;
+ /* PR_SYS_DISPATCH_EXCLUSIVE_ON */
+ op = PR_SYS_DISPATCH_EXCLUSIVE_ON;
/* Dispatcher region is bad (offset > 0 && len == 0) */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel);
- EXPECT_EQ(EINVAL, errno);
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel);
- EXPECT_EQ(EINVAL, errno);
+ prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL);
+ prctl_invalid(_metadata, op, -1L, 0x0, &sel, EINVAL);
/* Invalid selector */
- prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1);
- ASSERT_EQ(EFAULT, errno);
+ prctl_invalid(_metadata, op, 0x0, 0x1, (void *) -1, EFAULT);
/*
* Dispatcher range overflows unsigned long
*/
- prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel);
- ASSERT_EQ(EINVAL, errno) {
- TH_LOG("Should reject bad syscall range");
- }
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 1, -1L, &sel, EINVAL);
/*
* Allowed range overflows usigned long
*/
- prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel);
- ASSERT_EQ(EINVAL, errno) {
- TH_LOG("Should reject bad syscall range");
- }
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, -1L, 0x1, &sel, EINVAL);
+
+ /* 0 len should fail for PR_SYS_DISPATCH_INCLUSIVE_ON */
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 1, 0, 0, EINVAL);
+
+ /* Range wrap-around should fail */
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, -1L, 2, 0, EINVAL);
+
+ /* Normal range shouldn't fail */
+ prctl_valid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, 0);
+
+ /* Invalid selector */
+ prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, (void *) -1, EFAULT);
}
/*
@@ -147,11 +165,13 @@ char glob_sel;
int nr_syscalls_emulated;
int si_code;
int si_errno;
+unsigned long syscall_addr;
static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
{
si_code = info->si_code;
si_errno = info->si_errno;
+ syscall_addr = (unsigned long)info->si_call_addr;
if (info->si_syscall == MAGIC_SYSCALL_1)
nr_syscalls_emulated++;
@@ -174,31 +194,34 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
#endif
}
-TEST(dispatch_and_return)
+int setup_sigsys_handler(void)
{
- long ret;
struct sigaction act;
sigset_t mask;
- glob_sel = 0;
- nr_syscalls_emulated = 0;
- si_code = 0;
- si_errno = 0;
-
memset(&act, 0, sizeof(act));
sigemptyset(&mask);
-
act.sa_sigaction = handle_sigsys;
act.sa_flags = SA_SIGINFO;
act.sa_mask = mask;
+ return sigaction(SIGSYS, &act, NULL);
+}
- ret = sigaction(SIGSYS, &act, NULL);
- ASSERT_EQ(0, ret);
+TEST(dispatch_and_return)
+{
+ long ret;
+
+ glob_sel = 0;
+ nr_syscalls_emulated = 0;
+ si_code = 0;
+ si_errno = 0;
+
+ ASSERT_EQ(0, setup_sigsys_handler());
/* Make sure selector is good prior to prctl. */
SYSCALL_DISPATCH_OFF(glob_sel);
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -254,7 +277,7 @@ TEST_SIGNAL(bad_selector, SIGSYS)
/* Make sure selector is good prior to prctl. */
SYSCALL_DISPATCH_OFF(glob_sel);
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -278,7 +301,7 @@ TEST(disable_dispatch)
struct sysinfo info;
char sel = 0;
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -310,7 +333,7 @@ TEST(direct_dispatch_range)
* Instead of calculating libc addresses; allow the entire
* memory map and lock the selector.
*/
- ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel);
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, -1L, &sel);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
}
@@ -323,4 +346,35 @@ TEST(direct_dispatch_range)
}
}
+static void test_range(struct __test_metadata *_metadata,
+ unsigned long op, unsigned long off,
+ unsigned long size, bool dispatch)
+{
+ nr_syscalls_emulated = 0;
+ SYSCALL_DISPATCH_OFF(glob_sel);
+ EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, &glob_sel));
+ SYSCALL_DISPATCH_ON(glob_sel);
+ if (dispatch) {
+ EXPECT_EQ(syscall(MAGIC_SYSCALL_1), MAGIC_SYSCALL_1);
+ EXPECT_EQ(nr_syscalls_emulated, 1);
+ } else {
+ EXPECT_EQ(syscall(MAGIC_SYSCALL_1), -1);
+ EXPECT_EQ(nr_syscalls_emulated, 0);
+ }
+}
+
+TEST(dispatch_range)
+{
+ ASSERT_EQ(0, setup_sigsys_handler());
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, true);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr, 1, false);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 200, false);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr+1, 100, true);
+ test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 100, true);
+ test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr, 1, true);
+ test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr-1, 1, false);
+ test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr+1, 1, false);
+ SYSCALL_DISPATCH_OFF(glob_sel);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index a10350c8a46e..b2d8bd9026a7 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index db176fe7d0c3..c20aa16b1d63 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -21,6 +21,7 @@ CONFIG_NF_NAT=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NET_SCHED=y
+CONFIG_IP_SET=m
#
# Queueing/Scheduling
@@ -30,6 +31,7 @@ CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index c6db7fa94f55..23a61e5b99d0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -504,7 +504,6 @@
"$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
"$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1",
"$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit",
- "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
"$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2",
"ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true",
"$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1",
@@ -517,8 +516,8 @@
{
"kind": "hfsc",
"handle": "1:",
- "bytes": 392,
- "packets": 4
+ "bytes": 294,
+ "packets": 3
}
],
"matchCount": "1",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
new file mode 100644
index 000000000000..cd1f2ee8f354
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
@@ -0,0 +1,254 @@
+[
+ {
+ "id": "a4c7",
+ "name": "Create DualPI2 with default setting",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* step_thresh 1ms min_qlen_step 0p coupling_factor 2 drop_on_overload drop_dequeue classic_protection 10% l4s_ect split_gso",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "1ea4",
+ "name": "Create DualPI2 with memlimit",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 memlimit 20000000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* memlimit 20000000B",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2130",
+ "name": "Create DualPI2 with typical_rtt and max_rtt",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 typical_rtt 20ms max_rtt 200ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 20ms tupdate 20ms alpha 0.042969 beta 1.496094",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "90c1",
+ "name": "Create DualPI2 with max_rtt",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 max_rtt 300ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 50ms tupdate 50ms alpha 0.050781 beta 0.996094",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7b3c",
+ "name": "Create DualPI2 with any_ect option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 any_ect",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* any_ect",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "49a3",
+ "name": "Create DualPI2 with overflow option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 overflow",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* overflow",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "d0a1",
+ "name": "Create DualPI2 with drop_enqueue option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 drop_enqueue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* drop_enqueue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "f051",
+ "name": "Create DualPI2 with no_split_gso option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 no_split_gso",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* no_split_gso",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "456b",
+ "name": "Create DualPI2 with packet step_thresh",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 step_thresh 3p",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* step_thresh 3p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "610c",
+ "name": "Create DualPI2 with packet min_qlen_step",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 min_qlen_step 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* min_qlen_step 1p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b4fa",
+ "name": "Create DualPI2 with packet coupling_factor",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 coupling_factor 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* coupling_factor 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "37f1",
+ "name": "Create DualPI2 with packet classic_protection",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 classic_protection 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* classic_protection 0%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
index 3c4444961488..718d2df2aafa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
@@ -336,5 +336,86 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "d34d",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "b33f",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "cafe",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "1337",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree across branches",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 28c6ce6da7db..531a2f6e4900 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -264,5 +264,41 @@
"matchPattern": "sfq",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "cdc1",
+ "name": "Check that a negative perturb timer is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb -10",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a9f0",
+ "name": "Check that a too big perturb timer is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 1000000000",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index 589b18ed758a..dae19687912d 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -4,8 +4,7 @@
# If a module is required and was not compiled
# the test that requires it will fail anyways
try_modprobe() {
- modprobe -q -R "$1"
- if [ $? -ne 0 ]; then
+ if ! modprobe -q -R "$1"; then
echo "Module $1 not found... skipping."
else
modprobe "$1"
@@ -67,4 +66,5 @@ try_modprobe sch_hfsc
try_modprobe sch_hhf
try_modprobe sch_htb
try_modprobe sch_teql
-./tdc.py -J`nproc`
+try_modprobe sch_dualpi2
+./tdc.py -J"$(nproc)"
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 6e60f7d97125..b227bd78b252 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -38,7 +38,8 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
return 0;
}
-static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
+static int ublk_fault_inject_queue_io(struct ublk_thread *t,
+ struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe;
@@ -46,25 +47,27 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
.tv_nsec = (long long)q->dev->private_data,
};
- ublk_io_alloc_sqes(ublk_get_io(q, tag), &sqe, 1);
+ ublk_io_alloc_sqes(t, &sqe, 1);
io_uring_prep_timeout(sqe, &ts, 1, 0);
sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1);
- ublk_queued_tgt_io(q, tag, 1);
+ ublk_queued_tgt_io(t, q, tag, 1);
return 0;
}
-static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
+static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t,
+ struct ublk_queue *q,
const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
if (cqe->res != -ETIME)
ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
- if (ublk_completed_tgt_io(q, tag))
- ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ if (ublk_completed_tgt_io(t, q, tag))
+ ublk_complete_io(t, q, tag, iod->nr_sectors << 9);
else
ublk_err("%s: io not complete after 1 cqe\n", __func__);
}
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index cfa59b631693..2d93ac860bd5 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -13,12 +13,13 @@ static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int
assert(0);
}
-static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
struct io_uring_sqe *sqe[1];
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
@@ -26,7 +27,8 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des
return 1;
}
-static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
unsigned zc = ublk_queue_use_zc(q);
@@ -36,7 +38,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
if (!zc || auto_zc) {
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
if (!sqe[0])
return -ENOMEM;
@@ -52,7 +54,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
return 1;
}
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
+ ublk_io_alloc_sqes(t, sqe, 3);
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
@@ -72,7 +74,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
return 2;
}
-static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
+static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned ublk_op = ublksrv_get_op(iod);
@@ -80,7 +82,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
switch (ublk_op) {
case UBLK_IO_OP_FLUSH:
- ret = loop_queue_flush_io(q, iod, tag);
+ ret = loop_queue_flush_io(t, q, iod, tag);
break;
case UBLK_IO_OP_WRITE_ZEROES:
case UBLK_IO_OP_DISCARD:
@@ -88,7 +90,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
break;
case UBLK_IO_OP_READ:
case UBLK_IO_OP_WRITE:
- ret = loop_queue_tgt_rw_io(q, iod, tag);
+ ret = loop_queue_tgt_rw_io(t, q, iod, tag);
break;
default:
ret = -EINVAL;
@@ -100,17 +102,19 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
return ret;
}
-static int ublk_loop_queue_io(struct ublk_queue *q, int tag)
+static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
- int queued = loop_queue_tgt_io(q, tag);
+ int queued = loop_queue_tgt_io(t, q, tag);
- ublk_queued_tgt_io(q, tag, queued);
+ ublk_queued_tgt_io(t, q, tag, queued);
return 0;
}
-static void ublk_loop_io_done(struct ublk_queue *q, int tag,
+static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
@@ -126,8 +130,8 @@ static void ublk_loop_io_done(struct ublk_queue *q, int tag,
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
io->tgt_ios += 1;
- if (ublk_completed_tgt_io(q, tag))
- ublk_complete_io(q, tag, io->result);
+ if (ublk_completed_tgt_io(t, q, tag))
+ ublk_complete_io(t, q, tag, io->result);
}
static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index e2d2042810d4..95188065b2e9 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -441,17 +441,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
unsigned long off;
q->tgt_ops = dev->tgt.ops;
- q->state = 0;
+ q->flags = 0;
q->q_depth = depth;
-
- if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
- q->state |= UBLKSRV_NO_BUF;
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
- q->state |= UBLKSRV_ZC;
- if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
- q->state |= UBLKSRV_AUTO_BUF_REG;
- }
- q->state |= extra_flags;
+ q->flags = dev->dev_info.flags;
+ q->flags |= extra_flags;
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
@@ -466,10 +459,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
io_buf_size = dev->dev_info.max_io_buf_bytes;
for (i = 0; i < q->q_depth; i++) {
q->ios[i].buf_addr = NULL;
- q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
+ q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE;
q->ios[i].tag = i;
- if (q->state & UBLKSRV_NO_BUF)
+ if (ublk_queue_no_buf(q))
continue;
if (posix_memalign((void **)&q->ios[i].buf_addr,
@@ -583,15 +576,14 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
else
buf.index = q->ios[tag].buf_index;
- if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ if (ublk_queue_auto_zc_fallback(q))
buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
}
-int ublk_queue_io_cmd(struct ublk_io *io)
+int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
{
- struct ublk_thread *t = io->t;
struct ublk_queue *q = ublk_io_to_queue(io);
struct ublksrv_io_cmd *cmd;
struct io_uring_sqe *sqe[1];
@@ -599,7 +591,7 @@ int ublk_queue_io_cmd(struct ublk_io *io)
__u64 user_data;
/* only freed io can be issued */
- if (!(io->flags & UBLKSRV_IO_FREE))
+ if (!(io->flags & UBLKS_IO_FREE))
return 0;
/*
@@ -607,20 +599,20 @@ int ublk_queue_io_cmd(struct ublk_io *io)
* getting data
*/
if (!(io->flags &
- (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
+ (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA)))
return 0;
- if (io->flags & UBLKSRV_NEED_GET_DATA)
+ if (io->flags & UBLKS_IO_NEED_GET_DATA)
cmd_op = UBLK_U_IO_NEED_GET_DATA;
- else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
+ else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP)
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
- else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
+ else if (io->flags & UBLKS_IO_NEED_FETCH_RQ)
cmd_op = UBLK_U_IO_FETCH_REQ;
if (io_uring_sq_space_left(&t->ring) < 1)
io_uring_submit(&t->ring);
- ublk_io_alloc_sqes(io, sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
if (!sqe[0]) {
ublk_err("%s: run out of sqe. thread %u, tag %d\n",
__func__, t->idx, io->tag);
@@ -640,12 +632,12 @@ int ublk_queue_io_cmd(struct ublk_io *io)
sqe[0]->rw_flags = 0;
cmd->tag = io->tag;
cmd->q_id = q->q_id;
- if (!(q->state & UBLKSRV_NO_BUF))
+ if (!ublk_queue_no_buf(q))
cmd->addr = (__u64) (uintptr_t) io->buf_addr;
else
cmd->addr = 0;
- if (q->state & UBLKSRV_AUTO_BUF_REG)
+ if (ublk_queue_use_auto_zc(q))
ublk_set_auto_buf_reg(q, sqe[0], io->tag);
user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0);
@@ -657,7 +649,7 @@ int ublk_queue_io_cmd(struct ublk_io *io)
ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n",
__func__, t->idx, q->q_id, io->tag, cmd_op,
- io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING));
+ io->flags, !!(t->state & UBLKS_T_STOPPING));
return 1;
}
@@ -685,9 +677,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
int tag = i % dinfo->queue_depth;
q = &t->dev->q[q_id];
io = &q->ios[tag];
- io->t = t;
io->buf_index = j++;
- ublk_queue_io_cmd(io);
+ ublk_queue_io_cmd(t, io);
}
} else {
/*
@@ -697,9 +688,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
struct ublk_queue *q = &t->dev->q[t->idx];
for (i = 0; i < q->q_depth; i++) {
io = &q->ios[i];
- io->t = t;
io->buf_index = i;
- ublk_queue_io_cmd(io);
+ ublk_queue_io_cmd(t, io);
}
}
}
@@ -711,14 +701,13 @@ static int ublk_thread_is_idle(struct ublk_thread *t)
static int ublk_thread_is_done(struct ublk_thread *t)
{
- return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t);
+ return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t);
}
-static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
- struct io_uring_cqe *cqe)
+static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t,
+ struct ublk_queue *q,
+ struct io_uring_cqe *cqe)
{
- unsigned tag = user_data_to_tag(cqe->user_data);
-
if (cqe->res < 0 && cqe->res != -EAGAIN)
ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
__func__, cqe->res, q->q_id,
@@ -726,7 +715,41 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
user_data_to_op(cqe->user_data));
if (q->tgt_ops->tgt_io_done)
- q->tgt_ops->tgt_io_done(q, tag, cqe);
+ q->tgt_ops->tgt_io_done(t, q, cqe);
+}
+
+static void ublk_handle_uring_cmd(struct ublk_thread *t,
+ struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
+{
+ int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
+ !(t->state & UBLKS_T_STOPPING);
+ unsigned tag = user_data_to_tag(cqe->user_data);
+ struct ublk_io *io = &q->ios[tag];
+
+ if (!fetch) {
+ t->state |= UBLKS_T_STOPPING;
+ io->flags &= ~UBLKS_IO_NEED_FETCH_RQ;
+ }
+
+ if (cqe->res == UBLK_IO_RES_OK) {
+ assert(tag < q->q_depth);
+ if (q->tgt_ops->queue_io)
+ q->tgt_ops->queue_io(t, q, tag);
+ } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
+ io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE;
+ ublk_queue_io_cmd(t, io);
+ } else {
+ /*
+ * COMMIT_REQ will be completed immediately since no fetching
+ * piggyback is required.
+ *
+ * Marking IO_FREE only, then this io won't be issued since
+ * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*)
+ *
+ * */
+ io->flags = UBLKS_IO_FREE;
+ }
}
static void ublk_handle_cqe(struct ublk_thread *t,
@@ -735,54 +758,27 @@ static void ublk_handle_cqe(struct ublk_thread *t,
struct ublk_dev *dev = t->dev;
unsigned q_id = user_data_to_q_id(cqe->user_data);
struct ublk_queue *q = &dev->q[q_id];
- unsigned tag = user_data_to_tag(cqe->user_data);
unsigned cmd_op = user_data_to_op(cqe->user_data);
- int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
- !(t->state & UBLKSRV_THREAD_STOPPING);
- struct ublk_io *io;
if (cqe->res < 0 && cqe->res != -ENODEV)
ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
- cqe->res, cqe->user_data, q->state);
+ cqe->res, cqe->user_data, q->flags);
ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
- __func__, cqe->res, q->q_id, tag, cmd_op,
- is_target_io(cqe->user_data),
+ __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data),
+ cmd_op, is_target_io(cqe->user_data),
user_data_to_tgt_data(cqe->user_data),
- (t->state & UBLKSRV_THREAD_STOPPING));
+ (t->state & UBLKS_T_STOPPING));
/* Don't retrieve io in case of target io */
if (is_target_io(cqe->user_data)) {
- ublksrv_handle_tgt_cqe(q, cqe);
+ ublksrv_handle_tgt_cqe(t, q, cqe);
return;
}
- io = &q->ios[tag];
t->cmd_inflight--;
- if (!fetch) {
- t->state |= UBLKSRV_THREAD_STOPPING;
- io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
- }
-
- if (cqe->res == UBLK_IO_RES_OK) {
- assert(tag < q->q_depth);
- if (q->tgt_ops->queue_io)
- q->tgt_ops->queue_io(q, tag);
- } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
- io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
- ublk_queue_io_cmd(io);
- } else {
- /*
- * COMMIT_REQ will be completed immediately since no fetching
- * piggyback is required.
- *
- * Marking IO_FREE only, then this io won't be issued since
- * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
- *
- * */
- io->flags = UBLKSRV_IO_FREE;
- }
+ ublk_handle_uring_cmd(t, q, cqe);
}
static int ublk_reap_events_uring(struct ublk_thread *t)
@@ -808,7 +804,7 @@ static int ublk_process_io(struct ublk_thread *t)
t->dev->dev_info.dev_id,
t->idx, io_uring_sq_ready(&t->ring),
t->cmd_inflight,
- (t->state & UBLKSRV_THREAD_STOPPING));
+ (t->state & UBLKS_T_STOPPING));
if (ublk_thread_is_done(t))
return -ENODEV;
@@ -817,8 +813,8 @@ static int ublk_process_io(struct ublk_thread *t)
reapped = ublk_reap_events_uring(t);
ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n",
- ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING),
- (t->state & UBLKSRV_THREAD_IDLE));
+ ret, reapped, (t->state & UBLKS_T_STOPPING),
+ (t->state & UBLKS_T_IDLE));
return reapped;
}
@@ -915,7 +911,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
struct ublk_thread_info *tinfo;
- unsigned extra_flags = 0;
+ unsigned long long extra_flags = 0;
cpu_set_t *affinity_buf;
void *thread_ret;
sem_t ready;
@@ -937,7 +933,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
return ret;
if (ctx->auto_zc_fallback)
- extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+ extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK;
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 6be601536b3d..219233f8a053 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -29,13 +29,9 @@
#include "ublk_dep.h"
#include <linux/ublk_cmd.h>
-#define __maybe_unused __attribute__((unused))
-#define MAX_BACK_FILES 4
-#ifndef min
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
+#include "utils.h"
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#define MAX_BACK_FILES 4
/****************** part 1: libublk ********************/
@@ -45,9 +41,6 @@
#define UBLK_CTRL_RING_DEPTH 32
#define ERROR_EVTFD_DEVID -2
-/* queue idle timeout */
-#define UBLKSRV_IO_IDLE_SECS 20
-
#define UBLK_IO_MAX_BYTES (1 << 20)
#define UBLK_MAX_QUEUES_SHIFT 5
#define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
@@ -55,13 +48,6 @@
#define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
#define UBLK_QUEUE_DEPTH 1024
-#define UBLK_DBG_DEV (1U << 0)
-#define UBLK_DBG_THREAD (1U << 1)
-#define UBLK_DBG_IO_CMD (1U << 2)
-#define UBLK_DBG_IO (1U << 3)
-#define UBLK_DBG_CTRL_CMD (1U << 4)
-#define UBLK_LOG (1U << 5)
-
struct ublk_dev;
struct ublk_queue;
struct ublk_thread;
@@ -121,11 +107,11 @@ struct ublk_ctrl_cmd_data {
struct ublk_io {
char *buf_addr;
-#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
-#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
-#define UBLKSRV_IO_FREE (1UL << 2)
-#define UBLKSRV_NEED_GET_DATA (1UL << 3)
-#define UBLKSRV_NEED_REG_BUF (1UL << 4)
+#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0)
+#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1)
+#define UBLKS_IO_FREE (1UL << 2)
+#define UBLKS_IO_NEED_GET_DATA (1UL << 3)
+#define UBLKS_IO_NEED_REG_BUF (1UL << 4)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -136,7 +122,6 @@ struct ublk_io {
unsigned short buf_index;
unsigned short tgt_ios;
void *private_data;
- struct ublk_thread *t;
};
struct ublk_tgt_ops {
@@ -144,9 +129,9 @@ struct ublk_tgt_ops {
int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
void (*deinit_tgt)(struct ublk_dev *);
- int (*queue_io)(struct ublk_queue *, int tag);
- void (*tgt_io_done)(struct ublk_queue *,
- int tag, const struct io_uring_cqe *);
+ int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
+ void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
+ const struct io_uring_cqe *);
/*
* Target specific command line handling
@@ -179,12 +164,10 @@ struct ublk_queue {
const struct ublk_tgt_ops *tgt_ops;
struct ublksrv_io_desc *io_cmd_buf;
+/* borrow one bit of ublk uapi flags, which may never be used */
+#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
+ __u64 flags;
struct ublk_io ios[UBLK_QUEUE_DEPTH];
-#define UBLKSRV_NO_BUF (1U << 2)
-#define UBLKSRV_ZC (1U << 3)
-#define UBLKSRV_AUTO_BUF_REG (1U << 4)
-#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
- unsigned state;
};
struct ublk_thread {
@@ -196,8 +179,8 @@ struct ublk_thread {
pthread_t thread;
unsigned idx;
-#define UBLKSRV_THREAD_STOPPING (1U << 0)
-#define UBLKSRV_THREAD_IDLE (1U << 1)
+#define UBLKS_T_STOPPING (1U << 0)
+#define UBLKS_T_IDLE (1U << 1)
unsigned state;
};
@@ -217,22 +200,7 @@ struct ublk_dev {
void *private_data;
};
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
-#endif
-
-#ifndef container_of
-#define container_of(ptr, type, member) ({ \
- unsigned long __mptr = (unsigned long)(ptr); \
- ((type *)(__mptr - offsetof(type, member))); })
-#endif
-
-#define round_up(val, rnd) \
- (((val) + ((rnd) - 1)) & ~((rnd) - 1))
-
-
-extern unsigned int ublk_dbg_mask;
-extern int ublk_queue_io_cmd(struct ublk_io *io);
+extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
@@ -281,43 +249,15 @@ static inline unsigned short ublk_cmd_op_nr(unsigned int op)
return _IOC_NR(op);
}
-static inline void ublk_err(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
-}
-
-static inline void ublk_log(const char *fmt, ...)
-{
- if (ublk_dbg_mask & UBLK_LOG) {
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stdout, fmt, ap);
- }
-}
-
-static inline void ublk_dbg(int level, const char *fmt, ...)
-{
- if (level & ublk_dbg_mask) {
- va_list ap;
-
- va_start(ap, fmt);
- vfprintf(stdout, fmt, ap);
- }
-}
-
static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
{
return container_of(io, struct ublk_queue, ios[io->tag]);
}
-static inline int ublk_io_alloc_sqes(struct ublk_io *io,
+static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
struct io_uring_sqe *sqes[], int nr_sqes)
{
- struct io_uring *ring = &io->t->ring;
+ struct io_uring *ring = &t->ring;
unsigned left = io_uring_sq_space_left(ring);
int i;
@@ -380,7 +320,7 @@ static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
static inline void ublk_mark_io_done(struct ublk_io *io, int res)
{
- io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
+ io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
io->result = res;
}
@@ -402,45 +342,58 @@ static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
return &q->ios[tag];
}
-static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res)
+static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int res)
{
struct ublk_io *io = &q->ios[tag];
ublk_mark_io_done(io, res);
- return ublk_queue_io_cmd(io);
+ return ublk_queue_io_cmd(t, io);
}
-static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued)
+static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
+ unsigned tag, int queued)
{
if (queued < 0)
- ublk_complete_io(q, tag, queued);
+ ublk_complete_io(t, q, tag, queued);
else {
struct ublk_io *io = ublk_get_io(q, tag);
- io->t->io_inflight += queued;
+ t->io_inflight += queued;
io->tgt_ios = queued;
io->result = 0;
}
}
-static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag)
+static inline int ublk_completed_tgt_io(struct ublk_thread *t,
+ struct ublk_queue *q, unsigned tag)
{
struct ublk_io *io = ublk_get_io(q, tag);
- io->t->io_inflight--;
+ t->io_inflight--;
return --io->tgt_ios == 0;
}
static inline int ublk_queue_use_zc(const struct ublk_queue *q)
{
- return q->state & UBLKSRV_ZC;
+ return q->flags & UBLK_F_SUPPORT_ZERO_COPY;
}
static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
{
- return q->state & UBLKSRV_AUTO_BUF_REG;
+ return q->flags & UBLK_F_AUTO_BUF_REG;
+}
+
+static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
+{
+ return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK;
+}
+
+static inline int ublk_queue_no_buf(const struct ublk_queue *q)
+{
+ return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
}
extern const struct ublk_tgt_ops null_tgt_ops;
@@ -451,10 +404,4 @@ extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev);
-static inline unsigned int ilog2(unsigned int x)
-{
- if (x == 0)
- return 0;
- return (sizeof(x) * 8 - 1) - __builtin_clz(x);
-}
#endif
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index afe0b99d77ee..f0e0003a4860 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -55,12 +55,13 @@ static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1);
}
-static int null_queue_zc_io(struct ublk_queue *q, int tag)
+static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[3];
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
+ ublk_io_alloc_sqes(t, sqe, 3);
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->user_data = build_user_data(tag,
@@ -77,19 +78,21 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
return 2;
}
-static int null_queue_auto_zc_io(struct ublk_queue *q, int tag)
+static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[1];
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ ublk_io_alloc_sqes(t, sqe, 1);
__setup_nop_io(tag, iod, sqe[0], q->q_id);
return 1;
}
-static void ublk_null_io_done(struct ublk_queue *q, int tag,
- const struct io_uring_cqe *cqe)
+static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
@@ -105,11 +108,12 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag,
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
io->tgt_ios += 1;
- if (ublk_completed_tgt_io(q, tag))
- ublk_complete_io(q, tag, io->result);
+ if (ublk_completed_tgt_io(t, q, tag))
+ ublk_complete_io(t, q, tag, io->result);
}
-static int ublk_null_queue_io(struct ublk_queue *q, int tag)
+static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned auto_zc = ublk_queue_use_auto_zc(q);
@@ -117,14 +121,14 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
int queued;
if (auto_zc && !ublk_io_auto_zc_fallback(iod))
- queued = null_queue_auto_zc_io(q, tag);
+ queued = null_queue_auto_zc_io(t, q, tag);
else if (zc)
- queued = null_queue_zc_io(q, tag);
+ queued = null_queue_zc_io(t, q, tag);
else {
- ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ ublk_complete_io(t, q, tag, iod->nr_sectors << 9);
return 0;
}
- ublk_queued_tgt_io(q, tag, queued);
+ ublk_queued_tgt_io(t, q, tag, queued);
return 0;
}
@@ -134,7 +138,7 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
*/
static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
{
- if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ if (ublk_queue_auto_zc_fallback(q))
return (unsigned short)-1;
return q->ios[tag].buf_index;
}
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 37d50bbf5f5e..1fb9b7cc281b 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -123,7 +123,8 @@ static inline enum io_uring_op stripe_to_uring_op(
assert(0);
}
-static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
@@ -138,7 +139,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
io->private_data = s;
calculate_stripe_array(conf, iod, s, base);
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra);
+ ublk_io_alloc_sqes(t, sqe, s->nr + extra);
if (zc) {
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index);
@@ -176,13 +177,14 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
return s->nr + zc;
}
-static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+static int handle_flush(struct ublk_thread *t, struct ublk_queue *q,
+ const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
struct io_uring_sqe *sqe[NR_STRIPE];
int i;
- ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, conf->nr_files);
+ ublk_io_alloc_sqes(t, sqe, conf->nr_files);
for (i = 0; i < conf->nr_files; i++) {
io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
@@ -191,7 +193,8 @@ static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod,
return conf->nr_files;
}
-static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
+static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned ublk_op = ublksrv_get_op(iod);
@@ -199,7 +202,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
switch (ublk_op) {
case UBLK_IO_OP_FLUSH:
- ret = handle_flush(q, iod, tag);
+ ret = handle_flush(t, q, iod, tag);
break;
case UBLK_IO_OP_WRITE_ZEROES:
case UBLK_IO_OP_DISCARD:
@@ -207,7 +210,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
break;
case UBLK_IO_OP_READ:
case UBLK_IO_OP_WRITE:
- ret = stripe_queue_tgt_rw_io(q, iod, tag);
+ ret = stripe_queue_tgt_rw_io(t, q, iod, tag);
break;
default:
ret = -EINVAL;
@@ -218,17 +221,19 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
return ret;
}
-static int ublk_stripe_queue_io(struct ublk_queue *q, int tag)
+static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q,
+ int tag)
{
- int queued = stripe_queue_tgt_io(q, tag);
+ int queued = stripe_queue_tgt_io(t, q, tag);
- ublk_queued_tgt_io(q, tag, queued);
+ ublk_queued_tgt_io(t, q, tag, queued);
return 0;
}
-static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
- const struct io_uring_cqe *cqe)
+static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
{
+ unsigned tag = user_data_to_tag(cqe->user_data);
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
@@ -257,13 +262,13 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
}
}
- if (ublk_completed_tgt_io(q, tag)) {
+ if (ublk_completed_tgt_io(t, q, tag)) {
int res = io->result;
if (!res)
res = iod->nr_sectors << 9;
- ublk_complete_io(q, tag, res);
+ ublk_complete_io(t, q, tag, res);
free_stripe_array(io->private_data);
io->private_data = NULL;
diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h
new file mode 100644
index 000000000000..36545d1567f1
--- /dev/null
+++ b/tools/testing/selftests/ublk/utils.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef KUBLK_UTILS_H
+#define KUBLK_UTILS_H
+
+#define __maybe_unused __attribute__((unused))
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ unsigned long __mptr = (unsigned long)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); })
+#endif
+
+#define round_up(val, rnd) \
+ (((val) + ((rnd) - 1)) & ~((rnd) - 1))
+
+static inline unsigned int ilog2(unsigned int x)
+{
+ if (x == 0)
+ return 0;
+ return (sizeof(x) * 8 - 1) - __builtin_clz(x);
+}
+
+#define UBLK_DBG_DEV (1U << 0)
+#define UBLK_DBG_THREAD (1U << 1)
+#define UBLK_DBG_IO_CMD (1U << 2)
+#define UBLK_DBG_IO (1U << 3)
+#define UBLK_DBG_CTRL_CMD (1U << 4)
+#define UBLK_LOG (1U << 5)
+
+extern unsigned int ublk_dbg_mask;
+
+static inline void ublk_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+}
+
+static inline void ublk_log(const char *fmt, ...)
+{
+ if (ublk_dbg_mask & UBLK_LOG) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ }
+}
+
+static inline void ublk_dbg(int level, const char *fmt, ...)
+{
+ if (level & ublk_dbg_mask) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ }
+}
+
+#endif
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 12a0614b9fd4..918a2caa070e 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -12,7 +12,7 @@ TEST_GEN_PROGS += vdso_test_correctness
TEST_GEN_PROGS += vdso_test_getrandom
TEST_GEN_PROGS += vdso_test_chacha
-CFLAGS := -std=gnu99 -O2
+CFLAGS := -std=gnu99 -O2 -Wall -Wstrict-prototypes
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 722260f97561..5fdd0f362337 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -58,6 +58,7 @@
#define VDSO_NAMES 1
#endif
+__attribute__((unused))
static const char *versions[7] = {
"LINUX_2.6",
"LINUX_2.6.15",
@@ -68,6 +69,7 @@ static const char *versions[7] = {
"LINUX_5.10"
};
+__attribute__((unused))
static const char *names[2][7] = {
{
"__kernel_gettimeofday",
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 9ce795b806f0..4d3d96f1e440 100644..120000
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -1,58 +1 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
- * vDSO gettimeofday()
- * Copyright (c) 2014 Andy Lutomirski
- *
- * Compile with:
- * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
- *
- * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
- */
-
-#include <stdio.h>
-#ifndef NOLIBC
-#include <sys/auxv.h>
-#include <sys/time.h>
-#endif
-
-#include "../kselftest.h"
-#include "parse_vdso.h"
-#include "vdso_config.h"
-#include "vdso_call.h"
-
-int main(int argc, char **argv)
-{
- const char *version = versions[VDSO_VERSION];
- const char **name = (const char **)&names[VDSO_NAMES];
-
- unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
- if (!sysinfo_ehdr) {
- printf("AT_SYSINFO_EHDR is not present!\n");
- return KSFT_SKIP;
- }
-
- vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
-
- /* Find gettimeofday. */
- typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
- gtod_t gtod = (gtod_t)vdso_sym(version, name[0]);
-
- if (!gtod) {
- printf("Could not find %s\n", name[0]);
- return KSFT_SKIP;
- }
-
- struct timeval tv;
- long ret = VDSO_CALL(gtod, 2, &tv, 0);
-
- if (ret == 0) {
- printf("The time is %lld.%06lld\n",
- (long long)tv.tv_sec, (long long)tv.tv_usec);
- } else {
- printf("%s failed\n", name[0]);
- return KSFT_FAIL;
- }
-
- return 0;
-}
+vdso_test_gettimeofday.c \ No newline at end of file
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
index 8757f738b0b1..0aad682b12c8 100644
--- a/tools/testing/selftests/vDSO/vdso_test_chacha.c
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -76,7 +76,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u
void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks)
{
- ksft_exit_skip("Not implemented on architecture\n");
+ ksft_test_result_skip("Not implemented on architecture\n");
+ ksft_finished();
}
int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
index 38d46a8bf7cb..b5d5f59f725a 100644
--- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
@@ -13,7 +13,6 @@
#define _GNU_SOURCE
#include <elf.h>
-#include <err.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index 5fb97ad67eea..da651cf53c6c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -108,7 +108,7 @@ static void *vsyscall_getcpu(void)
}
-static void fill_function_pointers()
+static void fill_function_pointers(void)
{
void *vdso = dlopen("linux-vdso.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
index 95057f7567db..dd1132508a0d 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
@@ -21,7 +21,6 @@
#include <sys/wait.h>
#include <sys/types.h>
#include <linux/random.h>
-#include <linux/compiler.h>
#include <linux/ptrace.h>
#include "../kselftest.h"
@@ -101,6 +100,7 @@ out:
return state;
}
+__attribute__((unused)) /* Example for libc implementors */
static void vgetrandom_put_state(void *state)
{
if (!state)
@@ -242,6 +242,7 @@ static void kselftest(void)
pid_t child;
ksft_print_header();
+ vgetrandom_init();
ksft_set_plan(2);
for (size_t i = 0; i < 1000; ++i) {
@@ -265,7 +266,7 @@ static void kselftest(void)
}
for (;;) {
struct ptrace_syscall_info info = { 0 };
- int status, ret;
+ int status;
ksft_assert(waitpid(child, &status, 0) >= 0);
if (WIFEXITED(status)) {
ksft_assert(WEXITSTATUS(status) == 0);
@@ -295,8 +296,6 @@ static void usage(const char *argv0)
int main(int argc, char *argv[])
{
- vgetrandom_init();
-
if (argc == 1) {
kselftest();
return 0;
@@ -306,6 +305,9 @@ int main(int argc, char *argv[])
usage(argv[0]);
return 1;
}
+
+ vgetrandom_init();
+
if (!strcmp(argv[1], "bench-single"))
bench_single();
else if (!strcmp(argv[1], "bench-multi"))
diff --git a/tools/testing/selftests/vsock/.gitignore b/tools/testing/selftests/vsock/.gitignore
new file mode 100644
index 000000000000..9c5bf379480f
--- /dev/null
+++ b/tools/testing/selftests/vsock/.gitignore
@@ -0,0 +1,2 @@
+vmtest.log
+vsock_test
diff --git a/tools/testing/selftests/vsock/Makefile b/tools/testing/selftests/vsock/Makefile
new file mode 100644
index 000000000000..c407c0afd938
--- /dev/null
+++ b/tools/testing/selftests/vsock/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CURDIR := $(abspath .)
+TOOLSDIR := $(abspath ../../..)
+VSOCK_TEST_DIR := $(TOOLSDIR)/testing/vsock
+VSOCK_TEST_SRCS := $(wildcard $(VSOCK_TEST_DIR)/*.c $(VSOCK_TEST_DIR)/*.h)
+
+$(OUTPUT)/vsock_test: $(VSOCK_TEST_DIR)/vsock_test
+ install -m 755 $< $@
+
+$(VSOCK_TEST_DIR)/vsock_test: $(VSOCK_TEST_SRCS)
+ $(MAKE) -C $(VSOCK_TEST_DIR) vsock_test
+TEST_PROGS += vmtest.sh
+TEST_GEN_FILES := vsock_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/vsock/config b/tools/testing/selftests/vsock/config
new file mode 100644
index 000000000000..5f0a4f17dfc9
--- /dev/null
+++ b/tools/testing/selftests/vsock/config
@@ -0,0 +1,111 @@
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_BPF_EVENTS=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
+CONFIG_ARCH_SUPPORTS_UPROBES=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
+CONFIG_DEBUG_FS=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM_VIRTIO_GPU_KMS=y
+CONFIG_DRM_BOCHS=y
+CONFIG_VIRTIO_IOMMU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_PCI=y
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+CONFIG_SECURITYFS=y
+CONFIG_CGROUP_BPF=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+CONFIG_FUSE_FS=y
+CONFIG_VIRTIO_FS=y
+CONFIG_SERIO=y
+CONFIG_PCI=y
+CONFIG_INPUT=y
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_X86_VERBOSE_BOOTUP=y
+CONFIG_VGA_CONSOLE=y
+CONFIG_FB=y
+CONFIG_FB_VESA=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_DIAG=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VMWARE_VMCI_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
+CONFIG_HYPERV_VSOCKETS=y
+CONFIG_VMWARE_VMCI=y
+CONFIG_VHOST_VSOCK=y
+CONFIG_HYPERV=y
+CONFIG_UEVENT_HELPER=n
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_9P_FS=y
+CONFIG_VIRTIO_NET=y
+CONFIG_CMDLINE_OVERRIDE=n
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_SHMEM=y
+CONFIG_TMPFS=y
+CONFIG_UNIX=y
+CONFIG_MODULE_SIG_FORCE=n
+CONFIG_DEVTMPFS=y
+CONFIG_TTY=y
+CONFIG_VT=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_INOTIFY_USER=y
+CONFIG_BLOCK=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+CONFIG_I6300ESB_WDT=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_OVERLAY_FS=y
+CONFIG_DAX=y
+CONFIG_DAX_DRIVER=y
+CONFIG_FS_DAX=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_ZONE_DEVICE=y
diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/vsock/settings
@@ -0,0 +1 @@
+timeout=300
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
new file mode 100755
index 000000000000..edacebfc1632
--- /dev/null
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -0,0 +1,487 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates
+#
+# Dependencies:
+# * virtme-ng
+# * busybox-static (used by virtme-ng)
+# * qemu (used by virtme-ng)
+
+readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
+
+source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh
+
+readonly VSOCK_TEST="${SCRIPT_DIR}"/vsock_test
+readonly TEST_GUEST_PORT=51000
+readonly TEST_HOST_PORT=50000
+readonly TEST_HOST_PORT_LISTENER=50001
+readonly SSH_GUEST_PORT=22
+readonly SSH_HOST_PORT=2222
+readonly VSOCK_CID=1234
+readonly WAIT_PERIOD=3
+readonly WAIT_PERIOD_MAX=60
+readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
+readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
+
+# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a
+# control port forwarded for vsock_test. Because virtme-ng doesn't support
+# adding an additional port to forward to the device created from "--ssh" and
+# virtme-init mistakenly sets identical IPs to the ssh device and additional
+# devices, we instead opt out of using --ssh, add the device manually, and also
+# add the kernel cmdline options that virtme-init uses to setup the interface.
+readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}"
+readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}"
+readonly QEMU_OPTS="\
+ -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
+ -device virtio-net-pci,netdev=n0 \
+ -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
+ --pidfile ${QEMU_PIDFILE} \
+"
+readonly KERNEL_CMDLINE="\
+ virtme.dhcp net.ifnames=0 biosdevname=0 \
+ virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
+"
+readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log)
+readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback)
+readonly TEST_DESCS=(
+ "Run vsock_test in server mode on the VM and in client mode on the host."
+ "Run vsock_test in client mode on the VM and in server mode on the host."
+ "Run vsock_test using the loopback transport in the VM."
+)
+
+VERBOSE=0
+
+usage() {
+ local name
+ local desc
+ local i
+
+ echo
+ echo "$0 [OPTIONS] [TEST]..."
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -b: build the kernel from the current source tree and use it for guest VMs"
+ echo " -q: set the path to or name of qemu binary"
+ echo " -v: verbose output"
+ echo
+ echo "Available tests"
+
+ for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
+ name=${TEST_NAMES[${i}]}
+ desc=${TEST_DESCS[${i}]}
+ printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ done
+ echo
+
+ exit 1
+}
+
+die() {
+ echo "$*" >&2
+ exit "${KSFT_FAIL}"
+}
+
+vm_ssh() {
+ ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
+ return $?
+}
+
+cleanup() {
+ if [[ -s "${QEMU_PIDFILE}" ]]; then
+ pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
+ fi
+
+ # If failure occurred during or before qemu start up, then we need
+ # to clean this up ourselves.
+ if [[ -e "${QEMU_PIDFILE}" ]]; then
+ rm "${QEMU_PIDFILE}"
+ fi
+}
+
+check_args() {
+ local found
+
+ for arg in "$@"; do
+ found=0
+ for name in "${TEST_NAMES[@]}"; do
+ if [[ "${name}" = "${arg}" ]]; then
+ found=1
+ break
+ fi
+ done
+
+ if [[ "${found}" -eq 0 ]]; then
+ echo "${arg} is not an available test" >&2
+ usage
+ fi
+ done
+
+ for arg in "$@"; do
+ if ! command -v > /dev/null "test_${arg}"; then
+ echo "Test ${arg} not found" >&2
+ usage
+ fi
+ done
+}
+
+check_deps() {
+ for dep in vng ${QEMU} busybox pkill ssh; do
+ if [[ ! -x $(command -v "${dep}") ]]; then
+ echo -e "skip: dependency ${dep} not found!\n"
+ exit "${KSFT_SKIP}"
+ fi
+ done
+
+ if [[ ! -x $(command -v "${VSOCK_TEST}") ]]; then
+ printf "skip: %s not found!" "${VSOCK_TEST}"
+ printf " Please build the kselftest vsock target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+}
+
+check_vng() {
+ local tested_versions
+ local version
+ local ok
+
+ tested_versions=("1.33" "1.36")
+ version="$(vng --version)"
+
+ ok=0
+ for tv in "${tested_versions[@]}"; do
+ if [[ "${version}" == *"${tv}"* ]]; then
+ ok=1
+ break
+ fi
+ done
+
+ if [[ ! "${ok}" -eq 1 ]]; then
+ printf "warning: vng version '%s' has not been tested and may " "${version}" >&2
+ printf "not function properly.\n\tThe following versions have been tested: " >&2
+ echo "${tested_versions[@]}" >&2
+ fi
+}
+
+handle_build() {
+ if [[ ! "${BUILD}" -eq 1 ]]; then
+ return
+ fi
+
+ if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then
+ echo "-b requires vmtest.sh called from the kernel source tree" >&2
+ exit 1
+ fi
+
+ pushd "${KERNEL_CHECKOUT}" &>/dev/null
+
+ if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then
+ die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ if ! make -j$(nproc); then
+ die "failed to build kernel from source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ popd &>/dev/null
+}
+
+vm_start() {
+ local logfile=/dev/null
+ local verbose_opt=""
+ local kernel_opt=""
+ local qemu
+
+ qemu=$(command -v "${QEMU}")
+
+ if [[ "${VERBOSE}" -eq 1 ]]; then
+ verbose_opt="--verbose"
+ logfile=/dev/stdout
+ fi
+
+ if [[ "${BUILD}" -eq 1 ]]; then
+ kernel_opt="${KERNEL_CHECKOUT}"
+ fi
+
+ vng \
+ --run \
+ ${kernel_opt} \
+ ${verbose_opt} \
+ --qemu-opts="${QEMU_OPTS}" \
+ --qemu="${qemu}" \
+ --user root \
+ --append "${KERNEL_CMDLINE}" \
+ --rw &> ${logfile} &
+
+ if ! timeout ${WAIT_TOTAL} \
+ bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then
+ die "failed to boot VM"
+ fi
+}
+
+vm_wait_for_ssh() {
+ local i
+
+ i=0
+ while true; do
+ if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
+ die "Timed out waiting for guest ssh"
+ fi
+ if vm_ssh -- true; then
+ break
+ fi
+ i=$(( i + 1 ))
+ sleep ${WAIT_PERIOD}
+ done
+}
+
+# derived from selftests/net/net_helper.sh
+wait_for_listener()
+{
+ local port=$1
+ local interval=$2
+ local max_intervals=$3
+ local protocol=tcp
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq "${max_intervals}"); do
+ if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \
+ grep -q "${pattern}"; then
+ break
+ fi
+ sleep "${interval}"
+ done
+}
+
+vm_wait_for_listener() {
+ local port=$1
+
+ vm_ssh <<EOF
+$(declare -f wait_for_listener)
+wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX}
+EOF
+}
+
+host_wait_for_listener() {
+ wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+}
+
+__log_stdin() {
+ cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+__log_args() {
+ echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+log() {
+ local prefix="$1"
+
+ shift
+ local redirect=
+ if [[ ${VERBOSE} -eq 0 ]]; then
+ redirect=/dev/null
+ else
+ redirect=/dev/stdout
+ fi
+
+ if [[ "$#" -eq 0 ]]; then
+ __log_stdin | tee -a "${LOG}" > ${redirect}
+ else
+ __log_args "$@" | tee -a "${LOG}" > ${redirect}
+ fi
+}
+
+log_setup() {
+ log "setup" "$@"
+}
+
+log_host() {
+ local testname=$1
+
+ shift
+ log "test:${testname}:host" "$@"
+}
+
+log_guest() {
+ local testname=$1
+
+ shift
+ log "test:${testname}:guest" "$@"
+}
+
+test_vm_server_host_client() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --control-port="${TEST_GUEST_PORT}" \
+ --peer-cid=2 \
+ 2>&1 | log_guest "${testname}" &
+
+ vm_wait_for_listener "${TEST_GUEST_PORT}"
+
+ ${VSOCK_TEST} \
+ --mode=client \
+ --control-host=127.0.0.1 \
+ --peer-cid="${VSOCK_CID}" \
+ --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}"
+
+ return $?
+}
+
+test_vm_client_host_server() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ ${VSOCK_TEST} \
+ --mode "server" \
+ --control-port "${TEST_HOST_PORT_LISTENER}" \
+ --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" &
+
+ host_wait_for_listener
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host=10.0.2.2 \
+ --peer-cid=2 \
+ --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}"
+
+ return $?
+}
+
+test_vm_loopback() {
+ local testname="${FUNCNAME[0]#test_}"
+ local port=60000 # non-forwarded local port
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --control-port="${port}" \
+ --peer-cid=1 2>&1 | log_guest "${testname}" &
+
+ vm_wait_for_listener "${port}"
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host="127.0.0.1" \
+ --control-port="${port}" \
+ --peer-cid=1 2>&1 | log_guest "${testname}"
+
+ return $?
+}
+
+run_test() {
+ local host_oops_cnt_before
+ local host_warn_cnt_before
+ local vm_oops_cnt_before
+ local vm_warn_cnt_before
+ local host_oops_cnt_after
+ local host_warn_cnt_after
+ local vm_oops_cnt_after
+ local vm_warn_cnt_after
+ local name
+ local rc
+
+ host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
+ host_warn_cnt_before=$(dmesg --level=warn | wc -l)
+ vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
+ vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l)
+
+ name=$(echo "${1}" | awk '{ print $1 }')
+ eval test_"${name}"
+ rc=$?
+
+ host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on host" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ host_warn_cnt_after=$(dmesg --level=warn | wc -l)
+ if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then
+ echo "FAIL: kernel warning detected on host" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l)
+ if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
+ echo "FAIL: kernel warning detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ return "${rc}"
+}
+
+QEMU="qemu-system-$(uname -m)"
+
+while getopts :hvsq:b o
+do
+ case $o in
+ v) VERBOSE=1;;
+ b) BUILD=1;;
+ q) QEMU=$OPTARG;;
+ h|*) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+trap cleanup EXIT
+
+if [[ ${#} -eq 0 ]]; then
+ ARGS=("${TEST_NAMES[@]}")
+else
+ ARGS=("$@")
+fi
+
+check_args "${ARGS[@]}"
+check_deps
+check_vng
+handle_build
+
+echo "1..${#ARGS[@]}"
+
+log_setup "Booting up VM"
+vm_start
+vm_wait_for_ssh
+log_setup "VM booted up"
+
+cnt_pass=0
+cnt_fail=0
+cnt_skip=0
+cnt_total=0
+for arg in "${ARGS[@]}"; do
+ run_test "${arg}"
+ rc=$?
+ if [[ ${rc} -eq $KSFT_PASS ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq $KSFT_SKIP ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq $KSFT_FAIL ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=$rc"
+ fi
+ cnt_total=$(( cnt_total + 1 ))
+done
+
+echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
+echo "Log: ${LOG}"
+
+if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then
+ exit "$KSFT_PASS"
+else
+ exit "$KSFT_FAIL"
+fi
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index f314d3789f17..0a5381717e9f 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_MANGLE=y